Added wrappers for TextExtractorRegistry and InvoiceDataExtractor

This commit is contained in:
dankito 2020-07-10 14:05:25 +02:00
parent b05d927356
commit 09a52ac539
11 changed files with 129 additions and 18 deletions

View File

@ -15,14 +15,14 @@ import net.dankito.banking.search.LuceneRemitteeSearcher
import net.dankito.banking.ui.IBankingClientCreator
import net.dankito.banking.ui.IRouter
import net.dankito.banking.ui.presenter.BankingPresenter
import net.dankito.banking.util.BankIconFinder
import net.dankito.banking.util.IBankIconFinder
import net.dankito.banking.bankfinder.IBankFinder
import net.dankito.banking.bankfinder.LuceneBankFinder
import net.dankito.text.extraction.ITextExtractorRegistry
import net.dankito.banking.util.*
import net.dankito.banking.util.extraction.IInvoiceDataExtractor
import net.dankito.banking.util.extraction.ITextExtractorRegistry
import net.dankito.banking.util.extraction.JavaInvoiceDataExtractor
import net.dankito.banking.util.extraction.JavaTextExtractorRegistry
import net.dankito.text.extraction.TextExtractorRegistry
import net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
import net.dankito.text.extraction.pdf.PdfBoxAndroidPdfTextExtractor
import net.dankito.text.extraction.pdf.iText2PdfTextExtractor
import net.dankito.utils.ThreadPool
@ -143,15 +143,15 @@ class BankingModule(private val applicationContext: Context) {
@Singleton
fun provideTextExtractorRegistry(applicationContext: Context) : ITextExtractorRegistry {
// TODO: add PdfTypeDetector
return TextExtractorRegistry(listOf(
return JavaTextExtractorRegistry(TextExtractorRegistry(listOf(
iText2PdfTextExtractor(), PdfBoxAndroidPdfTextExtractor(applicationContext)
))
)))
}
@Provides
@Singleton
fun provideInvoiceDataExtractor() : IInvoiceDataExtractor {
return InvoiceDataExtractor()
return JavaInvoiceDataExtractor()
}

View File

@ -12,6 +12,7 @@ import net.dankito.banking.util.BankIconFinder
import net.dankito.banking.bankfinder.LuceneBankFinder
import net.dankito.banking.persistence.LuceneBankingPersistence
import net.dankito.banking.search.LuceneRemitteeSearcher
import net.dankito.banking.util.extraction.JavaTextExtractorRegistry
import net.dankito.text.extraction.TextExtractorRegistry
import net.dankito.text.extraction.TikaTextExtractor
import net.dankito.text.extraction.image.Tesseract4CommandlineImageTextExtractor
@ -34,11 +35,11 @@ class MainWindow : View(messages["application.title"]) {
private val tesseractTextExtractor = Tesseract4CommandlineImageTextExtractor(TesseractConfig(listOf(OcrLanguage.English, OcrLanguage.German)))
private val textExtractorRegistry = TextExtractorRegistry(pdffontsPdfTypeDetector(), listOf(
private val textExtractorRegistry = JavaTextExtractorRegistry(TextExtractorRegistry(pdffontsPdfTypeDetector(), listOf(
pdfToTextPdfTextExtractor(), PdfBoxPdfTextExtractor(), iText2PdfTextExtractor(),
ImageOnlyPdfTextExtractor(tesseractTextExtractor, pdfimagesImagesFromPdfExtractor()),
tesseractTextExtractor, TikaTextExtractor()
))
)))
private val presenter = BankingPresenter(fints4kBankingClientCreator(),
LuceneBankFinder(indexFolder), dataFolder, LuceneBankingPersistence(indexFolder, databaseFolder),

View File

@ -23,10 +23,9 @@ import net.dankito.banking.ui.model.moneytransfer.ExtractTransferMoneyDataFromPd
import net.dankito.banking.ui.model.parameters.GetTransactionsParameter
import net.dankito.banking.ui.model.settings.AppSettings
import net.dankito.banking.util.*
import net.dankito.text.extraction.ITextExtractorRegistry
import net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
import net.dankito.text.extraction.model.ErrorType
import net.dankito.banking.util.extraction.IInvoiceDataExtractor
import net.dankito.banking.util.extraction.ITextExtractorRegistry
import net.dankito.banking.util.extraction.JavaInvoiceDataExtractor
import org.slf4j.LoggerFactory
import java.io.File
import java.io.FileOutputStream
@ -46,7 +45,7 @@ open class BankingPresenter(
protected val bankIconFinder: IBankIconFinder,
protected val textExtractorRegistry: ITextExtractorRegistry,
protected val router: IRouter,
protected val invoiceDataExtractor: IInvoiceDataExtractor = InvoiceDataExtractor(),
protected val invoiceDataExtractor: IInvoiceDataExtractor = JavaInvoiceDataExtractor(),
protected val serializer: ISerializer = JacksonJsonSerializer(),
protected val asyncRunner: IAsyncRunner = CoroutinesAsyncRunner()
) {
@ -380,9 +379,9 @@ open class BankingPresenter(
val extractionResult = textExtractorRegistry.extractTextWithBestExtractorForFile(pdf)
if (extractionResult.couldExtractText == false || extractionResult.text == null) {
val resultType = if (extractionResult.error?.type == ErrorType.NoExtractorFoundForFileType) ExtractTransferMoneyDataFromPdfResultType.NotASearchablePdf
val resultType = if (extractionResult.noExtractorFound) ExtractTransferMoneyDataFromPdfResultType.NotASearchablePdf
else ExtractTransferMoneyDataFromPdfResultType.CouldNotExtractText
return ExtractTransferMoneyDataFromPdfResult(resultType, extractionResult.error?.exception)
return ExtractTransferMoneyDataFromPdfResult(resultType, extractionResult.exception)
}
else {
extractionResult.text?.let { extractedText ->
@ -392,7 +391,7 @@ open class BankingPresenter(
val transferMoneyData = TransferMoneyData("",
invoiceData.potentialIban ?: "",
invoiceData.potentialBic ?: "",
invoiceData.potentialTotalAmount?.amount ?: BigDecimal.ZERO, "")
invoiceData.potentialTotalAmount ?: BigDecimal.ZERO, "")
showTransferMoneyDialog(null, transferMoneyData)
}
else {

View File

@ -0,0 +1,13 @@
package net.dankito.banking.util.extraction
import java.lang.Exception
open class ExtractionResult(
open val couldExtractText: Boolean,
open val text: String?,
open val exception: Exception? = null,
open val noExtractorFound: Boolean = false
) {
}

View File

@ -0,0 +1,8 @@
package net.dankito.banking.util.extraction
interface IInvoiceDataExtractor {
fun extractInvoiceData(text: String): InvoiceData
}

View File

@ -0,0 +1,10 @@
package net.dankito.banking.util.extraction
import java.io.File
interface ITextExtractorRegistry {
fun extractTextWithBestExtractorForFile(file: File): ExtractionResult
}

View File

@ -0,0 +1,13 @@
package net.dankito.banking.util.extraction
import java.math.BigDecimal
open class InvoiceData(
open val potentialTotalAmount: BigDecimal?,
open val potentialCurrency: String?,
open val potentialIban: String?,
open val potentialBic: String?,
open val error: Exception? = null
) {
}

View File

@ -0,0 +1,22 @@
package net.dankito.banking.util.extraction
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
open class JavaInvoiceDataExtractor(
protected val invoiceDataExtractor: net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor = InvoiceDataExtractor()
) : IInvoiceDataExtractor {
override fun extractInvoiceData(text: String): InvoiceData {
val invoiceData = invoiceDataExtractor.extractInvoiceData(text)
return InvoiceData(
invoiceData.potentialTotalAmount?.amount?.toBigDecimal(),
invoiceData.potentialTotalAmount?.currency,
null,
null,
invoiceData.error
)
}
}

View File

@ -0,0 +1,23 @@
package net.dankito.banking.util.extraction
import net.dankito.text.extraction.TextExtractorRegistry
import net.dankito.text.extraction.model.ErrorType
import java.io.File
open class JavaTextExtractorRegistry(
protected val textExtractorRegistry: net.dankito.text.extraction.ITextExtractorRegistry = TextExtractorRegistry()
) : ITextExtractorRegistry {
override fun extractTextWithBestExtractorForFile(file: File): ExtractionResult {
val result = textExtractorRegistry.extractTextWithBestExtractorForFile(file)
return ExtractionResult(
result.couldExtractText,
result.text,
result.error?.exception,
result.error?.type == ErrorType.NoExtractorFoundForFileType
)
}
}

View File

@ -0,0 +1,10 @@
package net.dankito.banking.util.extraction
open class NoOpInvoiceDataExtractor : IInvoiceDataExtractor {
override fun extractInvoiceData(text: String): InvoiceData {
return InvoiceData(null, null, null, null, null)
}
}

View File

@ -0,0 +1,12 @@
package net.dankito.banking.util.extraction
import java.io.File
open class NoOpTextExtractorRegistry : ITextExtractorRegistry {
override fun extractTextWithBestExtractorForFile(file: File): ExtractionResult {
return ExtractionResult(false, null, null, true)
}
}