289 lines
No EOL
10 KiB
Kotlin
289 lines
No EOL
10 KiB
Kotlin
package net.dankito.banking.util
|
|
|
|
import net.dankito.utils.favicon.FaviconComparator
|
|
import net.dankito.utils.favicon.FaviconFinder
|
|
import net.dankito.utils.web.client.OkHttpWebClient
|
|
import org.jsoup.Jsoup
|
|
import org.jsoup.nodes.Document
|
|
import org.slf4j.LoggerFactory
|
|
import java.net.URI
|
|
import java.util.regex.Pattern
|
|
|
|
|
|
open class BankIconFinder : IBankIconFinder {
|
|
|
|
companion object {
|
|
|
|
const val SearchBankWebsiteBaseUrlQwant = "https://lite.qwant.com/?l=de&t=mobile&q="
|
|
|
|
const val SearchBankWebsiteBaseUrlEcosia = "https://www.ecosia.org/search?q="
|
|
|
|
const val SearchBankWebsiteBaseUrlDuckDuckGo = "https://duckduckgo.com/html/?q="
|
|
|
|
|
|
val ReplaceGfRegex = Pattern.compile(" \\(Gf [\\w]+\\)").toRegex()
|
|
|
|
|
|
private val log = LoggerFactory.getLogger(BankIconFinder::class.java)
|
|
|
|
}
|
|
|
|
|
|
protected val webClient = OkHttpWebClient()
|
|
|
|
protected val faviconFinder = FaviconFinder(webClient)
|
|
|
|
protected val faviconComparator = FaviconComparator(webClient)
|
|
|
|
|
|
override fun findIconForBank(bankName: String, prefSize: Int): String? {
|
|
findBankWebsite(bankName)?.let { bankUrl ->
|
|
webClient.get(bankUrl).body?.let { bankHomepageResponse ->
|
|
val favicons = faviconFinder.extractFavicons(Jsoup.parse(bankHomepageResponse), bankUrl)
|
|
|
|
faviconComparator.getBestIcon(favicons, prefSize, prefSize + 32, true)?.let { prefFavicon ->
|
|
return prefFavicon.url
|
|
}
|
|
|
|
return faviconComparator.getBestIcon(favicons, 16)?.url
|
|
}
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
|
|
override fun findBankWebsite(bankName: String): String? {
|
|
try {
|
|
val adjustedBankName = bankName.replace("-alt-", "").replace(ReplaceGfRegex, "")
|
|
|
|
findBankWebsiteWithQwant(adjustedBankName)?.let { return it }
|
|
|
|
log.warn("Could not find bank website with Qwant for '$bankName'")
|
|
|
|
findBankWebsiteWithEcosia(adjustedBankName)?.let { return it }
|
|
|
|
log.warn("Could not find bank website with Ecosia for '$bankName'")
|
|
|
|
findBankWebsiteWithDuckDuckGo(adjustedBankName)?.let { return it }
|
|
} catch (e: Exception) {
|
|
log.error("Could not find website for bank '$bankName'", e)
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun findBankWebsiteWithQwant(bankName: String): String? {
|
|
try {
|
|
return findBankWebsite(bankName, SearchBankWebsiteBaseUrlQwant) { searchResponseDoc ->
|
|
searchResponseDoc.select(".url")
|
|
.filter { it.selectFirst("span") == null }.map { it.text() }
|
|
}
|
|
} catch (e: Exception) {
|
|
log.error("Could not find website for bank '$bankName' with Qwant", e)
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun findBankWebsiteWithEcosia(bankName: String): String? {
|
|
try {
|
|
return findBankWebsite(bankName, SearchBankWebsiteBaseUrlEcosia) { searchResponseDoc ->
|
|
searchResponseDoc.select(".js-result-url").map { it.attr("href") }
|
|
}
|
|
} catch (e: Exception) {
|
|
log.error("Could not find website for bank '$bankName' with DuckDuckGo", e)
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun findBankWebsiteWithDuckDuckGo(bankName: String): String? {
|
|
try {
|
|
return findBankWebsite(bankName, SearchBankWebsiteBaseUrlDuckDuckGo) { searchResponseDoc ->
|
|
searchResponseDoc.select(".result__url").map { it.attr("href") }
|
|
}
|
|
} catch (e: Exception) {
|
|
log.error("Could not find website for bank '$bankName' with DuckDuckGo", e)
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun findBankWebsite(bankName: String, searchBaseUrl: String, extractUrls: (Document) -> List<String>): String? {
|
|
val encodedBankName = bankName.replace(" ", "+")
|
|
|
|
val exactSearchUrl = searchBaseUrl + "\"" + encodedBankName + "\""
|
|
getSearchResultForBank(exactSearchUrl)?.let { searchResponseDocument ->
|
|
findBestUrlForBank(bankName, extractUrls(searchResponseDocument))?.let { bestUrl ->
|
|
return bestUrl
|
|
}
|
|
}
|
|
|
|
|
|
val searchUrl = searchBaseUrl + encodedBankName
|
|
getSearchResultForBank(searchUrl)?.let { searchResponseDocument ->
|
|
return findBestUrlForBank(bankName, extractUrls(searchResponseDocument))
|
|
}
|
|
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun getSearchResultForBank(searchUrl: String): Document? {
|
|
val response = webClient.get(searchUrl)
|
|
|
|
response.body?.let { responseBody ->
|
|
return Jsoup.parse(responseBody)
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
|
|
protected open fun findBestUrlForBank(bankName: String, unmappedUrls: List<String>): String? {
|
|
val urlCandidates = getUrlCandidates(unmappedUrls)
|
|
val urlCandidatesWithoutUnlikely = urlCandidates.filterNot { isUnlikelyBankUrl(bankName, it) }
|
|
|
|
val urlForBank = findUrlThatContainsBankName(bankName, urlCandidatesWithoutUnlikely)
|
|
|
|
// cut off stuff like 'filalsuche' etc., they most like don't contain as many favicons as main page
|
|
return getMainPageForBankUrl(urlForBank, urlCandidatesWithoutUnlikely) ?: urlForBank
|
|
}
|
|
|
|
protected open fun getUrlCandidates(urls: List<String?>): List<String> {
|
|
return urls.mapNotNull { fixUrl(it) }
|
|
}
|
|
|
|
protected open fun fixUrl(url: String?): String? {
|
|
if (url.isNullOrBlank() == false) {
|
|
val urlEncoded = url.replace(" ", "%20F")
|
|
|
|
if (urlEncoded.startsWith("http")) {
|
|
return urlEncoded
|
|
}
|
|
else {
|
|
return "https://" + urlEncoded
|
|
}
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun findUrlThatContainsBankName(bankName: String, urlCandidates: List<String>): String? {
|
|
val bankNameParts = bankName.replace(",", "")
|
|
.replace("-", " ") // to find 'Sparda-Bank' in 'sparda.de'
|
|
.split(" ")
|
|
.filter { it.isNullOrBlank() == false }
|
|
val urlsContainsPartsOfBankName = mutableMapOf<Int, MutableList<String>>()
|
|
|
|
urlCandidates.forEach { urlCandidate ->
|
|
findBankNameInUrlHost(urlCandidate, bankNameParts)?.let { containingCountParts ->
|
|
if (urlsContainsPartsOfBankName.containsKey(containingCountParts) == false) {
|
|
urlsContainsPartsOfBankName.put(containingCountParts, mutableListOf(urlCandidate))
|
|
}
|
|
else {
|
|
urlsContainsPartsOfBankName[containingCountParts]!!.add(urlCandidate)
|
|
}
|
|
}
|
|
}
|
|
|
|
urlsContainsPartsOfBankName.keys.max()?.let { countMostMatches ->
|
|
val urisWithMostMatches = urlsContainsPartsOfBankName[countMostMatches]
|
|
|
|
return urisWithMostMatches?.firstOrNull()
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun findBankNameInUrlHost(urlCandidate: String, bankNameParts: List<String>): Int? {
|
|
try {
|
|
val candidateUri = URI.create(urlCandidate.replace("onlinebanking-", ""))
|
|
val candidateHost = candidateUri.host
|
|
|
|
return bankNameParts.filter { part -> candidateHost.contains(part, true) }.size
|
|
} catch (e: Exception) {
|
|
log.warn("Could not find host of url '$urlCandidate' in bank name $bankNameParts'", e)
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun getMainPageForBankUrl(urlForBank: String?, urlCandidates: List<String>): String? {
|
|
try {
|
|
urlForBank?.let {
|
|
if (isHomePage(urlForBank)) {
|
|
return urlForBank
|
|
}
|
|
|
|
val bankUri = URI.create(urlForBank)
|
|
val bankUriHost = bankUri.host
|
|
|
|
urlCandidates.forEach { candidateUrl ->
|
|
val candidateUri = URI.create(candidateUrl)
|
|
|
|
if (candidateUri.host == bankUriHost && isHomePage(candidateUrl)) {
|
|
return candidateUrl
|
|
}
|
|
}
|
|
}
|
|
} catch (e: Exception) {
|
|
log.warn("Could not find main page for bank url '$urlForBank'", e)
|
|
}
|
|
|
|
try {
|
|
if (urlForBank != null) {
|
|
val bankUri = URI.create(urlForBank)
|
|
|
|
return bankUri.scheme + "://" + bankUri.host
|
|
}
|
|
} catch (e: Exception) {
|
|
log.error("Could get main page for bank url '$urlForBank'", e)
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
protected open fun isHomePage(url: String): Boolean {
|
|
try {
|
|
val uri = URI.create(url)
|
|
|
|
if (uri.path.isNullOrBlank() && uri.host.startsWith("www.")) {
|
|
return true
|
|
}
|
|
} catch (e: Exception) {
|
|
log.warn("Could not check if '$url' is url of domain's home page", e)
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
protected open fun isUnlikelyBankUrl(bankName: String, urlCandidate: String): Boolean {
|
|
return urlCandidate.contains("meinprospekt.de/")
|
|
|| urlCandidate.contains("onlinestreet.de/")
|
|
|| urlCandidate.contains("iban-blz.de/")
|
|
|| urlCandidate.contains("bankleitzahlen.ws/")
|
|
|| urlCandidate.contains("bankleitzahl-finden.de/")
|
|
|| urlCandidate.contains("bankleitzahl-bic.de/")
|
|
|| urlCandidate.contains("bankleitzahlensuche.org/")
|
|
|| urlCandidate.contains("bankleitzahlensuche.com/")
|
|
|| urlCandidate.contains("bankverzeichnis.com")
|
|
|| urlCandidate.contains("banksuche.com/")
|
|
|| urlCandidate.contains("bank-code.net/")
|
|
|| urlCandidate.contains("thebankcodes.com/")
|
|
|| urlCandidate.contains("zinsen-berechnen.de/")
|
|
|| urlCandidate.contains("kredit-anzeiger.com/")
|
|
|| urlCandidate.contains("kreditbanken.de/")
|
|
|| urlCandidate.contains("nifox.de/")
|
|
|| urlCandidate.contains("wikipedia.org/")
|
|
|| urlCandidate.contains("transferwise.com/")
|
|
|| urlCandidate.contains("wogibtes.info/")
|
|
|| urlCandidate.contains("11880.com/")
|
|
|| urlCandidate.contains("kaufda.de/")
|
|
|| urlCandidate.contains("boomle.com/")
|
|
|| urlCandidate.contains("berlin.de/")
|
|
|| urlCandidate.contains("berliner-zeitung.de")
|
|
}
|
|
|
|
} |