Implemented BankIconFinder

This commit is contained in:
dankito 2020-04-27 00:22:14 +02:00
parent 16d6656343
commit 3023b49ca3
4 changed files with 579 additions and 0 deletions

View File

@ -18,10 +18,16 @@ dependencies {
api "net.dankito.utils:java-utils:$javaUtilsVersion"
implementation "net.dankito.utils:favicon-finder:1.0.0-SNAPSHOT"
implementation "org.jsoup:jsoup:1.13.1"
// TODO: try to get rid of this import
api project(':fints4javaLib')
testImplementation "junit:junit:$junitVersion"
testImplementation "org.assertj:assertj-core:$assertJVersion"
testImplementation "org.slf4j:slf4j-simple:$slf4JVersion"
}

View File

@ -0,0 +1,290 @@
package net.dankito.banking.util
import net.dankito.fints.model.BankInfo
import net.dankito.utils.favicon.FaviconComparator
import net.dankito.utils.favicon.FaviconFinder
import net.dankito.utils.web.client.OkHttpWebClient
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.slf4j.LoggerFactory
import java.net.URI
import java.util.regex.Pattern
open class BankIconFinder : IBankIconFinder {
companion object {
const val SearchBankWebsiteBaseUrlQwant = "https://lite.qwant.com/?l=de&t=mobile&q="
const val SearchBankWebsiteBaseUrlEcosia = "https://www.ecosia.org/search?q="
const val SearchBankWebsiteBaseUrlDuckDuckGo = "https://duckduckgo.com/html/?q="
val ReplaceGfRegex = Pattern.compile(" \\(Gf [\\w]+\\)").toRegex()
private val log = LoggerFactory.getLogger(BankIconFinder::class.java)
}
protected val webClient = OkHttpWebClient()
protected val faviconFinder = FaviconFinder(webClient)
protected val faviconComparator = FaviconComparator(webClient)
override fun findIconForBank(bankInfo: BankInfo): String? {
return findIconForBank(bankInfo.name)
}
override fun findIconForBank(bankName: String): String? {
findBankWebsite(bankName)?.let { bankUrl ->
webClient.get(bankUrl).body?.let { bankHomepageResponse ->
val favicons = faviconFinder.extractFavicons(Jsoup.parse(bankHomepageResponse), bankUrl)
return faviconComparator.getBestIcon(favicons, 16)?.url
}
}
return null
}
override fun findBankWebsite(bankName: String): String? {
try {
val adjustedBankName = bankName.replace("-alt-", "").replace(ReplaceGfRegex, "")
findBankWebsiteWithQwant(adjustedBankName)?.let { return it }
log.warn("Could not find bank website with Qwant for '$bankName'")
findBankWebsiteWithEcosia(adjustedBankName)?.let { return it }
log.warn("Could not find bank website with Ecosia for '$bankName'")
findBankWebsiteWithDuckDuckGo(adjustedBankName)?.let { return it }
} catch (e: Exception) {
log.error("Could not find website for bank '$bankName'", e)
}
return null
}
protected open fun findBankWebsiteWithQwant(bankName: String): String? {
try {
return findBankWebsite(bankName, SearchBankWebsiteBaseUrlQwant) { searchResponseDoc ->
searchResponseDoc.select(".url")
.filter { it.selectFirst("span") == null }.map { it.text() }
}
} catch (e: Exception) {
log.error("Could not find website for bank '$bankName' with Qwant", e)
}
return null
}
protected open fun findBankWebsiteWithEcosia(bankName: String): String? {
try {
return findBankWebsite(bankName, SearchBankWebsiteBaseUrlEcosia) { searchResponseDoc ->
searchResponseDoc.select(".js-result-url").map { it.attr("href") }
}
} catch (e: Exception) {
log.error("Could not find website for bank '$bankName' with DuckDuckGo", e)
}
return null
}
protected open fun findBankWebsiteWithDuckDuckGo(bankName: String): String? {
try {
return findBankWebsite(bankName, SearchBankWebsiteBaseUrlDuckDuckGo) { searchResponseDoc ->
searchResponseDoc.select(".result__url").map { it.attr("href") }
}
} catch (e: Exception) {
log.error("Could not find website for bank '$bankName' with DuckDuckGo", e)
}
return null
}
protected open fun findBankWebsite(bankName: String, searchBaseUrl: String, extractUrls: (Document) -> List<String>): String? {
val encodedBankName = bankName.replace(" ", "+")
val exactSearchUrl = searchBaseUrl + "\"" + encodedBankName + "\""
getSearchResultForBank(exactSearchUrl)?.let { searchResponseDocument ->
findBestUrlForBank(bankName, extractUrls(searchResponseDocument))?.let { bestUrl ->
return bestUrl
}
}
val searchUrl = searchBaseUrl + encodedBankName
getSearchResultForBank(searchUrl)?.let { searchResponseDocument ->
return findBestUrlForBank(bankName, extractUrls(searchResponseDocument))
}
return null
}
protected open fun getSearchResultForBank(searchUrl: String): Document? {
val response = webClient.get(searchUrl)
response.body?.let { responseBody ->
return Jsoup.parse(responseBody)
}
return null
}
protected open fun findBestUrlForBank(bankName: String, unmappedUrls: List<String>): String? {
val urlCandidates = getUrlCandidates(unmappedUrls)
val urlCandidatesWithoutUnlikely = urlCandidates.filterNot { isUnlikelyBankUrl(bankName, it) }
val urlForBank = findUrlThatContainsBankName(bankName, urlCandidatesWithoutUnlikely)
// cut off stuff like 'filalsuche' etc., they most like don't contain as many favicons as main page
return getMainPageForBankUrl(urlForBank, urlCandidatesWithoutUnlikely) ?: urlForBank
}
protected open fun getUrlCandidates(urls: List<String?>): List<String> {
return urls.mapNotNull { fixUrl(it) }
}
protected open fun fixUrl(url: String?): String? {
if (url.isNullOrBlank() == false) {
val urlEncoded = url.replace(" ", "%20F")
if (urlEncoded.startsWith("http")) {
return urlEncoded
}
else {
return "https://" + urlEncoded
}
}
return null
}
protected open fun findUrlThatContainsBankName(bankName: String, urlCandidates: List<String>): String? {
val bankNameParts = bankName.replace(",", "")
.replace("-", " ") // to find 'Sparda-Bank' in 'sparda.de'
.split(" ")
.filter { it.isNullOrBlank() == false }
val urlsContainsPartsOfBankName = mutableMapOf<Int, MutableList<String>>()
urlCandidates.forEach { urlCandidate ->
findBankNameInUrlHost(urlCandidate, bankNameParts)?.let { containingCountParts ->
if (urlsContainsPartsOfBankName.containsKey(containingCountParts) == false) {
urlsContainsPartsOfBankName.put(containingCountParts, mutableListOf(urlCandidate))
}
else {
urlsContainsPartsOfBankName[containingCountParts]!!.add(urlCandidate)
}
}
}
urlsContainsPartsOfBankName.keys.max()?.let { countMostMatches ->
val urisWithMostMatches = urlsContainsPartsOfBankName[countMostMatches]
return urisWithMostMatches?.firstOrNull()
}
return null
}
protected open fun findBankNameInUrlHost(urlCandidate: String, bankNameParts: List<String>): Int? {
try {
val candidateUri = URI.create(urlCandidate.replace("onlinebanking-", ""))
val candidateHost = candidateUri.host
return bankNameParts.filter { part -> candidateHost.contains(part, true) }.size
} catch (e: Exception) {
log.warn("Could not find host of url '$urlCandidate' in bank name $bankNameParts'", e)
}
return null
}
protected open fun getMainPageForBankUrl(urlForBank: String?, urlCandidates: List<String>): String? {
try {
urlForBank?.let {
if (isHomePage(urlForBank)) {
return urlForBank
}
val bankUri = URI.create(urlForBank)
val bankUriHost = bankUri.host
urlCandidates.forEach { candidateUrl ->
val candidateUri = URI.create(candidateUrl)
if (candidateUri.host == bankUriHost && isHomePage(candidateUrl)) {
return candidateUrl
}
}
}
} catch (e: Exception) {
log.warn("Could not find main page for bank url '$urlForBank'", e)
}
try {
if (urlForBank != null) {
val bankUri = URI.create(urlForBank)
return bankUri.scheme + "://" + bankUri.host
}
} catch (e: Exception) {
log.error("Could get main page for bank url '$urlForBank'", e)
}
return null
}
protected open fun isHomePage(url: String): Boolean {
try {
val uri = URI.create(url)
if (uri.path.isNullOrBlank() && uri.host.startsWith("www.")) {
return true
}
} catch (e: Exception) {
log.warn("Could not check if '$url' is url of domain's home page", e)
}
return false
}
protected open fun isUnlikelyBankUrl(bankName: String, urlCandidate: String): Boolean {
return urlCandidate.contains("meinprospekt.de/")
|| urlCandidate.contains("onlinestreet.de/")
|| urlCandidate.contains("iban-blz.de/")
|| urlCandidate.contains("bankleitzahlen.ws/")
|| urlCandidate.contains("bankleitzahl-finden.de/")
|| urlCandidate.contains("bankleitzahl-bic.de/")
|| urlCandidate.contains("bankleitzahlensuche.org/")
|| urlCandidate.contains("bankleitzahlensuche.com/")
|| urlCandidate.contains("bankverzeichnis.com")
|| urlCandidate.contains("banksuche.com/")
|| urlCandidate.contains("bank-code.net/")
|| urlCandidate.contains("thebankcodes.com/")
|| urlCandidate.contains("zinsen-berechnen.de/")
|| urlCandidate.contains("kredit-anzeiger.com/")
|| urlCandidate.contains("kreditbanken.de/")
|| urlCandidate.contains("nifox.de/")
|| urlCandidate.contains("wikipedia.org/")
|| urlCandidate.contains("transferwise.com/")
|| urlCandidate.contains("wogibtes.info/")
|| urlCandidate.contains("11880.com/")
|| urlCandidate.contains("kaufda.de/")
|| urlCandidate.contains("boomle.com/")
|| urlCandidate.contains("berlin.de/")
|| urlCandidate.contains("berliner-zeitung.de")
}
}

View File

@ -0,0 +1,14 @@
package net.dankito.banking.util
import net.dankito.fints.model.BankInfo
interface IBankIconFinder {
fun findIconForBank(bankInfo: BankInfo): String?
fun findIconForBank(bankName: String): String?
fun findBankWebsite(bankName: String): String?
}

View File

@ -0,0 +1,269 @@
package net.dankito.banking.util
import org.assertj.core.api.Assertions.assertThat
import org.junit.Test
class BankIconFinderTest {
private val underTest = object : BankIconFinder() {
fun findBestUrlForBankPublic(bankName: String, urlCandidates: List<String>): String? {
return super.findBestUrlForBank(bankName, urlCandidates)
}
}
@Test
fun findBestUrlForBank_BerlinerSparkasse() {
// when
val result = underTest.findBestUrlForBankPublic("Landesbank Berlin - Berliner Sparkasse", listOf(
"https://www.lbb.de",
"https://www.berliner-sparkasse.de",
"https://www.berliner-sparkasse.de/de/home/toolbar/impressum.html",
"https://onlinestreet.de/banken/bank/2745",
"https://www.berliner-sparkasse.de/en/home.html",
"https://www.berlin.de/ba-charlottenburg-wilmersdorf/ueber-den-bezirk/wirtschaft/banken/...",
"https://www.lbb.de/landesbank/de/10_Veroeffentlichungen/10_finanzberichte/015_LBB/LBB...",
"https://www.bankleitzahl-bic.de/landesbank-berlin-berliner-sparkasse-berlin-blz-10050000",
"https://www.berliner-sparkasse.de/de/home/privatkunden/online-mobile-banking.html",
"https://www.berliner-sparkasse.de/de/home/privatkunden/girokonto/kontopfaendung.html"
))
// then
assertThat(result).isEqualTo("https://www.berliner-sparkasse.de")
}
@Test
fun findBestUrlForBank_Postbank() {
// when
val result = underTest.findBestUrlForBankPublic("Postbank Ndl der DB Privat- und Firmenkundenbank", listOf(
"https://www.postbank.de/privatkunden/kontakt.html",
"https://onlinestreet.de/banken/bank/538",
"https://www.zinsen-berechnen.de/.../bank/postbank-ndl-der-db-privat-und-firmenkundenbank",
"https://www.bankleitzahl-finden.de/Postbank",
"https://www.postbank.de/firmenkunden",
"https://antworten.postbank.de/frage/wie-lautet-die-adresse-der-pfaendungsabteilung...",
"https://www.db.com/ir/de/db-pfk-postbank-finanzpublikationen.htm",
"https://www.fb.postbank.de/iisenbart/unternehmen/Impressum.php"
))
// then
assertThat(result).isEqualTo("https://www.postbank.de")
}
@Test
fun findBestUrlForBank_Commerzbank() {
// when
val result = underTest.findBestUrlForBankPublic("Commerzbank, Filiale Berlin 1", listOf(
"https://onlinestreet.de/banken/bank/24463",
"https://www.commerzbank.de/filialen/de/filial-uebersicht.html",
"https://filialsuche.commerzbank.de/de/city/Berlin",
"https://www.meinprospekt.de/berlin/filialen/commerzbank-de",
"https://www.commerzbank.de/de/hauptnavigation/presse/mediathek/bilddaten/filialen/...",
"https://www.kaufda.de/Filialen/Berlin/Commerzbank/v-r841",
"https://www.bankleitzahl-bic.de/commerzbank-filiale-berlin-1-berlin-blz-10040000"
))
// then
assertThat(result).isEqualTo("https://www.commerzbank.de")
}
@Test
fun findBestUrlForBank_SpardaBankBerlin() {
// when
val result = underTest.findBestUrlForBankPublic("Sparda-Bank Berlin", listOf(
"https://www.sparda-b.de",
"https://www.meinprospekt.de/berlin/filialen/sparda-bank",
"https://www.sparda.de/online-service-banking-app-berlin",
"https://www.sparda-n.de/online-banking-jetzt-online-banking-freischalten",
"https://www.berlin.de/special/finanzen-und-recht/adressen/bank/spardabank-berliner...",
"https://www.berlin.de/special/finanzen-und-recht/adressen/bank/spardabank...",
"https://genostore.de/SBB/online-banking",
"https://www.sparda-west.de/online-banking-ihr-online-banking",
"https://www.sparda.de/genossenschaftsbank-gute-gruende"
))
// then
assertThat(result).isEqualTo("https://www.sparda-b.de")
}
@Test
fun findBestUrlForBank_Dexia() {
underTest.findBankWebsite("Dexia Kommunalbank Deutschland - DPB")
// when
val result = underTest.findBestUrlForBankPublic("Dexia Kommunalbank Deutschland - DPB", listOf(
"https://www.boomle.com/dexia-kommunalbank",
"https://www.helaba.com/de/informationen-fuer/medien-und-oeffentlichkeit/news/meldungen/...",
"https://www.wiwo.de/unternehmen/banken/352-millionen-euro-helaba-kauft-dexia...",
"https://www.dexia.com/sites/default/files/2020-01/DSA%20FHalf-yearly%20FReport%20F2019%20FEN.pdf",
"https://www.dexia.com/sites/default/files/2019-12/DSA%20FAnnual%20FReport%20F2018%20FEN_0.pdf",
"https://www.online-handelsregister.de/.../D/Dexia+Hypothekenbank+Berlin+AG/3102677"
))
// then
assertThat(result).isEqualTo("https://www.dexia.com")
}
@Test
fun findBestUrlForBank_BhfBank() {
// when
val result = underTest.findBestUrlForBankPublic("BHF-BANK", listOf(
"https://www.bhf-bank.com",
"https://www.oddo-bhf.com/de",
"https://www.oddo-bhf.com/#!identite/de",
"https://www.bv-activebanking.de/onlinebanking-bhf/sessionEnded.jsp",
"https://www.handelsblatt.com/themen/bhf-bank",
"https://www.faz.net/aktuell/finanzen/thema/bhf-bank",
"https://www.kununu.com/de/oddo-bhf",
"https://www.wallstreet-online.de/thema/bhf-bank"
))
// then
assertThat(result).isEqualTo("https://www.bhf-bank.com")
}
@Test
fun findBestUrlForBank_BankhausLöbbecke() {
// when
val result = underTest.findBestUrlForBankPublic("Bankhaus Löbbecke", listOf(
"https://www.mmwarburg.de",
"https://www.berlin.de/special/finanzen-und-recht/adressen/bank/bankhaus-loebbecke-4f...",
"https://www.wallstreet-online.de/thema/bankhaus-loebbecke",
"https://www.mmwarburg.de/de/bankhaus/historie/ehemalige-tochterbanken",
"https://de.kompass.com/c/bankhaus-lobbecke-ag/de665396",
"https://www.fuchsbriefe.de/ratings/vermoegensmanagement/bankhaus-loebbecke-ag-vor..."
))
// then
assertThat(result).isEqualTo("https://www.mmwarburg.de")
}
@Test
fun findBestUrlForBank_EurocityBank() {
// when
val result = underTest.findBestUrlForBankPublic("Eurocity Bank Gf GAA", listOf(
"https://www.eurocitybank.de",
"https://www.eurocitybank.de/?q=de/Festgeld"
))
// then
assertThat(result).isEqualTo("https://www.eurocitybank.de")
}
@Test
fun findBestUrlForBank_BankFürKircheUndDiakonie() {
// when
val result = underTest.findBestUrlForBankPublic("Bank für Kirche und Diakonie - KD-Bank Gf Sonder-BLZ", listOf(
"https://www.kd-bank.de",
"https://www.kd-bank.de/privatkunden.html",
"https://www.kd-bank.de/service/impressum.html"
))
// then
assertThat(result).isEqualTo("https://www.kd-bank.de")
}
@Test
fun findBestUrlForBank_PsdBankKiel() {
// when
val result = underTest.findBestUrlForBankPublic("PSD Bank Kiel (Gf P2)", listOf(
"https://www.psd-kiel.de",
"https://www.onlinebanking-psd-kiel.de/banking-private/entry",
"https://www.kreditbanken.de/21090900.html"
))
// then
assertThat(result).isEqualTo("https://www.psd-kiel.de")
}
@Test
fun findBestUrlForBank_VrBankFlensburgSchleswig() {
// when
val result = underTest.findBestUrlForBankPublic("VR Bank Flensburg-Schleswig -alt-", listOf(
"https://www.vrbanknord.de/banking-private/entry",
"https://www.vrbanknord.de/wir-fuer-sie/filialen-ansprechpartner/filialen/uebersicht...",
"https://sh.vr.de/privatkunden/service/kontakt.html",
"https://www.vrbanknord-immo.de/kontakt/ihre-ansprechpartner",
"https://sh.vr.de",
"https://www.unser-flensburg.de/flensburg/bankensparkassen/vrflensburgschleswig",
"https://www.kununu.com/de/vr-bank-flensburg-schleswig-eg",
"https://www.meine-vrbank.de"
))
// then
assertThat(result).isEqualTo("https://www.vrbanknord.de")
}
@Test
fun findBankWebsite_VrBankLichtenfelsEbern() {
// when
val result = underTest.findBankWebsite("VR-Bank Lichtenfels-Ebern (Gf P2)")
// then
assertThat(result).isEqualTo("https://www.vr-lif-ebn.de")
}
@Test
fun findBankWebsite_PsdBankKoblenz() {
// when
val result = underTest.findBankWebsite("PSD Bank Koblenz (Gf P2)")
// then
assertThat(result).isEqualTo("https://www.psd-koblenz.de")
}
@Test
fun findBankWebsite_VrBankLandauMengkofen() {
// when
val result = underTest.findBankWebsite("VR-Bank Landau-Mengkofen (Gf P2)")
// then
assertThat(result).isEqualTo("https://www.vrbanklm.de")
}
@Test
fun findBankWebsite_InvestitionsbankBerlin() {
// when
val result = underTest.findBankWebsite("Investitionsbank Berlin")
// then
assertThat(result).isEqualTo("https://www.ibb.de")
}
@Test
fun findBankWebsite_DexiaKommunalbankDeutschland() {
// when
val result = underTest.findBankWebsite("Dexia Kommunalbank Deutschland - DPB")
// then
assertThat(result).isEqualTo("https://www.dexia.com")
}
}