From 3023b49ca333da5943ec1e35af3aebd0cb3fffab Mon Sep 17 00:00:00 2001 From: dankito Date: Mon, 27 Apr 2020 00:22:14 +0200 Subject: [PATCH] Implemented BankIconFinder --- BankingUiCommon/build.gradle | 6 + .../dankito/banking/util/BankIconFinder.kt | 290 ++++++++++++++++++ .../dankito/banking/util/IBankIconFinder.kt | 14 + .../banking/util/BankIconFinderTest.kt | 269 ++++++++++++++++ 4 files changed, 579 insertions(+) create mode 100644 BankingUiCommon/src/main/java/net/dankito/banking/util/BankIconFinder.kt create mode 100644 BankingUiCommon/src/main/java/net/dankito/banking/util/IBankIconFinder.kt create mode 100644 BankingUiCommon/src/test/kotlin/net/dankito/banking/util/BankIconFinderTest.kt diff --git a/BankingUiCommon/build.gradle b/BankingUiCommon/build.gradle index 0370eab8..d8f298af 100644 --- a/BankingUiCommon/build.gradle +++ b/BankingUiCommon/build.gradle @@ -18,10 +18,16 @@ dependencies { api "net.dankito.utils:java-utils:$javaUtilsVersion" + implementation "net.dankito.utils:favicon-finder:1.0.0-SNAPSHOT" + + implementation "org.jsoup:jsoup:1.13.1" + // TODO: try to get rid of this import api project(':fints4javaLib') testImplementation "junit:junit:$junitVersion" testImplementation "org.assertj:assertj-core:$assertJVersion" + + testImplementation "org.slf4j:slf4j-simple:$slf4JVersion" } \ No newline at end of file diff --git a/BankingUiCommon/src/main/java/net/dankito/banking/util/BankIconFinder.kt b/BankingUiCommon/src/main/java/net/dankito/banking/util/BankIconFinder.kt new file mode 100644 index 00000000..23b213c6 --- /dev/null +++ b/BankingUiCommon/src/main/java/net/dankito/banking/util/BankIconFinder.kt @@ -0,0 +1,290 @@ +package net.dankito.banking.util + +import net.dankito.fints.model.BankInfo +import net.dankito.utils.favicon.FaviconComparator +import net.dankito.utils.favicon.FaviconFinder +import net.dankito.utils.web.client.OkHttpWebClient +import org.jsoup.Jsoup +import org.jsoup.nodes.Document +import org.slf4j.LoggerFactory +import java.net.URI +import java.util.regex.Pattern + + +open class BankIconFinder : IBankIconFinder { + + companion object { + + const val SearchBankWebsiteBaseUrlQwant = "https://lite.qwant.com/?l=de&t=mobile&q=" + + const val SearchBankWebsiteBaseUrlEcosia = "https://www.ecosia.org/search?q=" + + const val SearchBankWebsiteBaseUrlDuckDuckGo = "https://duckduckgo.com/html/?q=" + + + val ReplaceGfRegex = Pattern.compile(" \\(Gf [\\w]+\\)").toRegex() + + + private val log = LoggerFactory.getLogger(BankIconFinder::class.java) + + } + + + protected val webClient = OkHttpWebClient() + + protected val faviconFinder = FaviconFinder(webClient) + + protected val faviconComparator = FaviconComparator(webClient) + + + override fun findIconForBank(bankInfo: BankInfo): String? { + return findIconForBank(bankInfo.name) + } + + override fun findIconForBank(bankName: String): String? { + findBankWebsite(bankName)?.let { bankUrl -> + webClient.get(bankUrl).body?.let { bankHomepageResponse -> + val favicons = faviconFinder.extractFavicons(Jsoup.parse(bankHomepageResponse), bankUrl) + + return faviconComparator.getBestIcon(favicons, 16)?.url + } + } + + return null + } + + + override fun findBankWebsite(bankName: String): String? { + try { + val adjustedBankName = bankName.replace("-alt-", "").replace(ReplaceGfRegex, "") + + findBankWebsiteWithQwant(adjustedBankName)?.let { return it } + + log.warn("Could not find bank website with Qwant for '$bankName'") + + findBankWebsiteWithEcosia(adjustedBankName)?.let { return it } + + log.warn("Could not find bank website with Ecosia for '$bankName'") + + findBankWebsiteWithDuckDuckGo(adjustedBankName)?.let { return it } + } catch (e: Exception) { + log.error("Could not find website for bank '$bankName'", e) + } + + return null + } + + protected open fun findBankWebsiteWithQwant(bankName: String): String? { + try { + return findBankWebsite(bankName, SearchBankWebsiteBaseUrlQwant) { searchResponseDoc -> + searchResponseDoc.select(".url") + .filter { it.selectFirst("span") == null }.map { it.text() } + } + } catch (e: Exception) { + log.error("Could not find website for bank '$bankName' with Qwant", e) + } + + return null + } + + protected open fun findBankWebsiteWithEcosia(bankName: String): String? { + try { + return findBankWebsite(bankName, SearchBankWebsiteBaseUrlEcosia) { searchResponseDoc -> + searchResponseDoc.select(".js-result-url").map { it.attr("href") } + } + } catch (e: Exception) { + log.error("Could not find website for bank '$bankName' with DuckDuckGo", e) + } + + return null + } + + protected open fun findBankWebsiteWithDuckDuckGo(bankName: String): String? { + try { + return findBankWebsite(bankName, SearchBankWebsiteBaseUrlDuckDuckGo) { searchResponseDoc -> + searchResponseDoc.select(".result__url").map { it.attr("href") } + } + } catch (e: Exception) { + log.error("Could not find website for bank '$bankName' with DuckDuckGo", e) + } + + return null + } + + protected open fun findBankWebsite(bankName: String, searchBaseUrl: String, extractUrls: (Document) -> List): String? { + val encodedBankName = bankName.replace(" ", "+") + + val exactSearchUrl = searchBaseUrl + "\"" + encodedBankName + "\"" + getSearchResultForBank(exactSearchUrl)?.let { searchResponseDocument -> + findBestUrlForBank(bankName, extractUrls(searchResponseDocument))?.let { bestUrl -> + return bestUrl + } + } + + + val searchUrl = searchBaseUrl + encodedBankName + getSearchResultForBank(searchUrl)?.let { searchResponseDocument -> + return findBestUrlForBank(bankName, extractUrls(searchResponseDocument)) + } + + + return null + } + + protected open fun getSearchResultForBank(searchUrl: String): Document? { + val response = webClient.get(searchUrl) + + response.body?.let { responseBody -> + return Jsoup.parse(responseBody) + } + + return null + } + + + protected open fun findBestUrlForBank(bankName: String, unmappedUrls: List): String? { + val urlCandidates = getUrlCandidates(unmappedUrls) + val urlCandidatesWithoutUnlikely = urlCandidates.filterNot { isUnlikelyBankUrl(bankName, it) } + + val urlForBank = findUrlThatContainsBankName(bankName, urlCandidatesWithoutUnlikely) + + // cut off stuff like 'filalsuche' etc., they most like don't contain as many favicons as main page + return getMainPageForBankUrl(urlForBank, urlCandidatesWithoutUnlikely) ?: urlForBank + } + + protected open fun getUrlCandidates(urls: List): List { + return urls.mapNotNull { fixUrl(it) } + } + + protected open fun fixUrl(url: String?): String? { + if (url.isNullOrBlank() == false) { + val urlEncoded = url.replace(" ", "%20F") + + if (urlEncoded.startsWith("http")) { + return urlEncoded + } + else { + return "https://" + urlEncoded + } + } + + return null + } + + protected open fun findUrlThatContainsBankName(bankName: String, urlCandidates: List): String? { + val bankNameParts = bankName.replace(",", "") + .replace("-", " ") // to find 'Sparda-Bank' in 'sparda.de' + .split(" ") + .filter { it.isNullOrBlank() == false } + val urlsContainsPartsOfBankName = mutableMapOf>() + + urlCandidates.forEach { urlCandidate -> + findBankNameInUrlHost(urlCandidate, bankNameParts)?.let { containingCountParts -> + if (urlsContainsPartsOfBankName.containsKey(containingCountParts) == false) { + urlsContainsPartsOfBankName.put(containingCountParts, mutableListOf(urlCandidate)) + } + else { + urlsContainsPartsOfBankName[containingCountParts]!!.add(urlCandidate) + } + } + } + + urlsContainsPartsOfBankName.keys.max()?.let { countMostMatches -> + val urisWithMostMatches = urlsContainsPartsOfBankName[countMostMatches] + + return urisWithMostMatches?.firstOrNull() + } + + return null + } + + protected open fun findBankNameInUrlHost(urlCandidate: String, bankNameParts: List): Int? { + try { + val candidateUri = URI.create(urlCandidate.replace("onlinebanking-", "")) + val candidateHost = candidateUri.host + + return bankNameParts.filter { part -> candidateHost.contains(part, true) }.size + } catch (e: Exception) { + log.warn("Could not find host of url '$urlCandidate' in bank name $bankNameParts'", e) + } + + return null + } + + protected open fun getMainPageForBankUrl(urlForBank: String?, urlCandidates: List): String? { + try { + urlForBank?.let { + if (isHomePage(urlForBank)) { + return urlForBank + } + + val bankUri = URI.create(urlForBank) + val bankUriHost = bankUri.host + + urlCandidates.forEach { candidateUrl -> + val candidateUri = URI.create(candidateUrl) + + if (candidateUri.host == bankUriHost && isHomePage(candidateUrl)) { + return candidateUrl + } + } + } + } catch (e: Exception) { + log.warn("Could not find main page for bank url '$urlForBank'", e) + } + + try { + if (urlForBank != null) { + val bankUri = URI.create(urlForBank) + + return bankUri.scheme + "://" + bankUri.host + } + } catch (e: Exception) { + log.error("Could get main page for bank url '$urlForBank'", e) + } + + return null + } + + protected open fun isHomePage(url: String): Boolean { + try { + val uri = URI.create(url) + + if (uri.path.isNullOrBlank() && uri.host.startsWith("www.")) { + return true + } + } catch (e: Exception) { + log.warn("Could not check if '$url' is url of domain's home page", e) + } + + return false + } + + protected open fun isUnlikelyBankUrl(bankName: String, urlCandidate: String): Boolean { + return urlCandidate.contains("meinprospekt.de/") + || urlCandidate.contains("onlinestreet.de/") + || urlCandidate.contains("iban-blz.de/") + || urlCandidate.contains("bankleitzahlen.ws/") + || urlCandidate.contains("bankleitzahl-finden.de/") + || urlCandidate.contains("bankleitzahl-bic.de/") + || urlCandidate.contains("bankleitzahlensuche.org/") + || urlCandidate.contains("bankleitzahlensuche.com/") + || urlCandidate.contains("bankverzeichnis.com") + || urlCandidate.contains("banksuche.com/") + || urlCandidate.contains("bank-code.net/") + || urlCandidate.contains("thebankcodes.com/") + || urlCandidate.contains("zinsen-berechnen.de/") + || urlCandidate.contains("kredit-anzeiger.com/") + || urlCandidate.contains("kreditbanken.de/") + || urlCandidate.contains("nifox.de/") + || urlCandidate.contains("wikipedia.org/") + || urlCandidate.contains("transferwise.com/") + || urlCandidate.contains("wogibtes.info/") + || urlCandidate.contains("11880.com/") + || urlCandidate.contains("kaufda.de/") + || urlCandidate.contains("boomle.com/") + || urlCandidate.contains("berlin.de/") + || urlCandidate.contains("berliner-zeitung.de") + } + +} \ No newline at end of file diff --git a/BankingUiCommon/src/main/java/net/dankito/banking/util/IBankIconFinder.kt b/BankingUiCommon/src/main/java/net/dankito/banking/util/IBankIconFinder.kt new file mode 100644 index 00000000..662f689e --- /dev/null +++ b/BankingUiCommon/src/main/java/net/dankito/banking/util/IBankIconFinder.kt @@ -0,0 +1,14 @@ +package net.dankito.banking.util + +import net.dankito.fints.model.BankInfo + + +interface IBankIconFinder { + + fun findIconForBank(bankInfo: BankInfo): String? + + fun findIconForBank(bankName: String): String? + + fun findBankWebsite(bankName: String): String? + +} \ No newline at end of file diff --git a/BankingUiCommon/src/test/kotlin/net/dankito/banking/util/BankIconFinderTest.kt b/BankingUiCommon/src/test/kotlin/net/dankito/banking/util/BankIconFinderTest.kt new file mode 100644 index 00000000..fb860961 --- /dev/null +++ b/BankingUiCommon/src/test/kotlin/net/dankito/banking/util/BankIconFinderTest.kt @@ -0,0 +1,269 @@ +package net.dankito.banking.util + +import org.assertj.core.api.Assertions.assertThat +import org.junit.Test + + +class BankIconFinderTest { + + private val underTest = object : BankIconFinder() { + + fun findBestUrlForBankPublic(bankName: String, urlCandidates: List): String? { + return super.findBestUrlForBank(bankName, urlCandidates) + } + + } + + + @Test + fun findBestUrlForBank_BerlinerSparkasse() { + + // when + val result = underTest.findBestUrlForBankPublic("Landesbank Berlin - Berliner Sparkasse", listOf( + "https://www.lbb.de", + "https://www.berliner-sparkasse.de", + "https://www.berliner-sparkasse.de/de/home/toolbar/impressum.html", + "https://onlinestreet.de/banken/bank/2745", + "https://www.berliner-sparkasse.de/en/home.html", + "https://www.berlin.de/ba-charlottenburg-wilmersdorf/ueber-den-bezirk/wirtschaft/banken/...", + "https://www.lbb.de/landesbank/de/10_Veroeffentlichungen/10_finanzberichte/015_LBB/LBB...", + "https://www.bankleitzahl-bic.de/landesbank-berlin-berliner-sparkasse-berlin-blz-10050000", + "https://www.berliner-sparkasse.de/de/home/privatkunden/online-mobile-banking.html", + "https://www.berliner-sparkasse.de/de/home/privatkunden/girokonto/kontopfaendung.html" + )) + + // then + assertThat(result).isEqualTo("https://www.berliner-sparkasse.de") + } + + @Test + fun findBestUrlForBank_Postbank() { + + // when + val result = underTest.findBestUrlForBankPublic("Postbank Ndl der DB Privat- und Firmenkundenbank", listOf( + "https://www.postbank.de/privatkunden/kontakt.html", + "https://onlinestreet.de/banken/bank/538", + "https://www.zinsen-berechnen.de/.../bank/postbank-ndl-der-db-privat-und-firmenkundenbank", + "https://www.bankleitzahl-finden.de/Postbank", + "https://www.postbank.de/firmenkunden", + "https://antworten.postbank.de/frage/wie-lautet-die-adresse-der-pfaendungsabteilung...", + "https://www.db.com/ir/de/db-pfk-postbank-finanzpublikationen.htm", + "https://www.fb.postbank.de/iisenbart/unternehmen/Impressum.php" + )) + + + // then + assertThat(result).isEqualTo("https://www.postbank.de") + } + + @Test + fun findBestUrlForBank_Commerzbank() { + + // when + val result = underTest.findBestUrlForBankPublic("Commerzbank, Filiale Berlin 1", listOf( + "https://onlinestreet.de/banken/bank/24463", + "https://www.commerzbank.de/filialen/de/filial-uebersicht.html", + "https://filialsuche.commerzbank.de/de/city/Berlin", + "https://www.meinprospekt.de/berlin/filialen/commerzbank-de", + "https://www.commerzbank.de/de/hauptnavigation/presse/mediathek/bilddaten/filialen/...", + "https://www.kaufda.de/Filialen/Berlin/Commerzbank/v-r841", + "https://www.bankleitzahl-bic.de/commerzbank-filiale-berlin-1-berlin-blz-10040000" + )) + + + // then + assertThat(result).isEqualTo("https://www.commerzbank.de") + } + + @Test + fun findBestUrlForBank_SpardaBankBerlin() { + + // when + val result = underTest.findBestUrlForBankPublic("Sparda-Bank Berlin", listOf( + "https://www.sparda-b.de", + "https://www.meinprospekt.de/berlin/filialen/sparda-bank", + "https://www.sparda.de/online-service-banking-app-berlin", + "https://www.sparda-n.de/online-banking-jetzt-online-banking-freischalten", + "https://www.berlin.de/special/finanzen-und-recht/adressen/bank/spardabank-berliner...", + "https://www.berlin.de/special/finanzen-und-recht/adressen/bank/spardabank...", + "https://genostore.de/SBB/online-banking", + "https://www.sparda-west.de/online-banking-ihr-online-banking", + "https://www.sparda.de/genossenschaftsbank-gute-gruende" + )) + + + // then + assertThat(result).isEqualTo("https://www.sparda-b.de") + } + + @Test + fun findBestUrlForBank_Dexia() { + + underTest.findBankWebsite("Dexia Kommunalbank Deutschland - DPB") + + // when + val result = underTest.findBestUrlForBankPublic("Dexia Kommunalbank Deutschland - DPB", listOf( + "https://www.boomle.com/dexia-kommunalbank", + "https://www.helaba.com/de/informationen-fuer/medien-und-oeffentlichkeit/news/meldungen/...", + "https://www.wiwo.de/unternehmen/banken/352-millionen-euro-helaba-kauft-dexia...", + "https://www.dexia.com/sites/default/files/2020-01/DSA%20FHalf-yearly%20FReport%20F2019%20FEN.pdf", + "https://www.dexia.com/sites/default/files/2019-12/DSA%20FAnnual%20FReport%20F2018%20FEN_0.pdf", + "https://www.online-handelsregister.de/.../D/Dexia+Hypothekenbank+Berlin+AG/3102677" + )) + + + // then + assertThat(result).isEqualTo("https://www.dexia.com") + } + + @Test + fun findBestUrlForBank_BhfBank() { + + // when + val result = underTest.findBestUrlForBankPublic("BHF-BANK", listOf( + "https://www.bhf-bank.com", + "https://www.oddo-bhf.com/de", + "https://www.oddo-bhf.com/#!identite/de", + "https://www.bv-activebanking.de/onlinebanking-bhf/sessionEnded.jsp", + "https://www.handelsblatt.com/themen/bhf-bank", + "https://www.faz.net/aktuell/finanzen/thema/bhf-bank", + "https://www.kununu.com/de/oddo-bhf", + "https://www.wallstreet-online.de/thema/bhf-bank" + )) + + + // then + assertThat(result).isEqualTo("https://www.bhf-bank.com") + } + + @Test + fun findBestUrlForBank_BankhausLöbbecke() { + + // when + val result = underTest.findBestUrlForBankPublic("Bankhaus Löbbecke", listOf( + "https://www.mmwarburg.de", + "https://www.berlin.de/special/finanzen-und-recht/adressen/bank/bankhaus-loebbecke-4f...", + "https://www.wallstreet-online.de/thema/bankhaus-loebbecke", + "https://www.mmwarburg.de/de/bankhaus/historie/ehemalige-tochterbanken", + "https://de.kompass.com/c/bankhaus-lobbecke-ag/de665396", + "https://www.fuchsbriefe.de/ratings/vermoegensmanagement/bankhaus-loebbecke-ag-vor..." + )) + + + // then + assertThat(result).isEqualTo("https://www.mmwarburg.de") + } + + @Test + fun findBestUrlForBank_EurocityBank() { + + // when + val result = underTest.findBestUrlForBankPublic("Eurocity Bank Gf GAA", listOf( + "https://www.eurocitybank.de", + "https://www.eurocitybank.de/?q=de/Festgeld" + )) + + + // then + assertThat(result).isEqualTo("https://www.eurocitybank.de") + } + + @Test + fun findBestUrlForBank_BankFürKircheUndDiakonie() { + + // when + val result = underTest.findBestUrlForBankPublic("Bank für Kirche und Diakonie - KD-Bank Gf Sonder-BLZ", listOf( + "https://www.kd-bank.de", + "https://www.kd-bank.de/privatkunden.html", + "https://www.kd-bank.de/service/impressum.html" + )) + + + // then + assertThat(result).isEqualTo("https://www.kd-bank.de") + } + + @Test + fun findBestUrlForBank_PsdBankKiel() { + + // when + val result = underTest.findBestUrlForBankPublic("PSD Bank Kiel (Gf P2)", listOf( + "https://www.psd-kiel.de", + "https://www.onlinebanking-psd-kiel.de/banking-private/entry", + "https://www.kreditbanken.de/21090900.html" + )) + + // then + assertThat(result).isEqualTo("https://www.psd-kiel.de") + } + + @Test + fun findBestUrlForBank_VrBankFlensburgSchleswig() { + + // when + val result = underTest.findBestUrlForBankPublic("VR Bank Flensburg-Schleswig -alt-", listOf( + "https://www.vrbanknord.de/banking-private/entry", + "https://www.vrbanknord.de/wir-fuer-sie/filialen-ansprechpartner/filialen/uebersicht...", + "https://sh.vr.de/privatkunden/service/kontakt.html", + "https://www.vrbanknord-immo.de/kontakt/ihre-ansprechpartner", + "https://sh.vr.de", + "https://www.unser-flensburg.de/flensburg/bankensparkassen/vrflensburgschleswig", + "https://www.kununu.com/de/vr-bank-flensburg-schleswig-eg", + "https://www.meine-vrbank.de" + )) + + // then + assertThat(result).isEqualTo("https://www.vrbanknord.de") + } + + @Test + fun findBankWebsite_VrBankLichtenfelsEbern() { + + // when + val result = underTest.findBankWebsite("VR-Bank Lichtenfels-Ebern (Gf P2)") + + // then + assertThat(result).isEqualTo("https://www.vr-lif-ebn.de") + } + + @Test + fun findBankWebsite_PsdBankKoblenz() { + + // when + val result = underTest.findBankWebsite("PSD Bank Koblenz (Gf P2)") + + // then + assertThat(result).isEqualTo("https://www.psd-koblenz.de") + } + + @Test + fun findBankWebsite_VrBankLandauMengkofen() { + + // when + val result = underTest.findBankWebsite("VR-Bank Landau-Mengkofen (Gf P2)") + + // then + assertThat(result).isEqualTo("https://www.vrbanklm.de") + } + + @Test + fun findBankWebsite_InvestitionsbankBerlin() { + + // when + val result = underTest.findBankWebsite("Investitionsbank Berlin") + + // then + assertThat(result).isEqualTo("https://www.ibb.de") + } + + @Test + fun findBankWebsite_DexiaKommunalbankDeutschland() { + + // when + val result = underTest.findBankWebsite("Dexia Kommunalbank Deutschland - DPB") + + // then + assertThat(result).isEqualTo("https://www.dexia.com") + } + +} \ No newline at end of file