Added wrappers for TextExtractorRegistry and InvoiceDataExtractor
This commit is contained in:
parent
b05d927356
commit
09a52ac539
|
@ -15,14 +15,14 @@ import net.dankito.banking.search.LuceneRemitteeSearcher
|
||||||
import net.dankito.banking.ui.IBankingClientCreator
|
import net.dankito.banking.ui.IBankingClientCreator
|
||||||
import net.dankito.banking.ui.IRouter
|
import net.dankito.banking.ui.IRouter
|
||||||
import net.dankito.banking.ui.presenter.BankingPresenter
|
import net.dankito.banking.ui.presenter.BankingPresenter
|
||||||
import net.dankito.banking.util.BankIconFinder
|
|
||||||
import net.dankito.banking.util.IBankIconFinder
|
|
||||||
import net.dankito.banking.bankfinder.IBankFinder
|
import net.dankito.banking.bankfinder.IBankFinder
|
||||||
import net.dankito.banking.bankfinder.LuceneBankFinder
|
import net.dankito.banking.bankfinder.LuceneBankFinder
|
||||||
import net.dankito.text.extraction.ITextExtractorRegistry
|
import net.dankito.banking.util.*
|
||||||
|
import net.dankito.banking.util.extraction.IInvoiceDataExtractor
|
||||||
|
import net.dankito.banking.util.extraction.ITextExtractorRegistry
|
||||||
|
import net.dankito.banking.util.extraction.JavaInvoiceDataExtractor
|
||||||
|
import net.dankito.banking.util.extraction.JavaTextExtractorRegistry
|
||||||
import net.dankito.text.extraction.TextExtractorRegistry
|
import net.dankito.text.extraction.TextExtractorRegistry
|
||||||
import net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor
|
|
||||||
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
|
|
||||||
import net.dankito.text.extraction.pdf.PdfBoxAndroidPdfTextExtractor
|
import net.dankito.text.extraction.pdf.PdfBoxAndroidPdfTextExtractor
|
||||||
import net.dankito.text.extraction.pdf.iText2PdfTextExtractor
|
import net.dankito.text.extraction.pdf.iText2PdfTextExtractor
|
||||||
import net.dankito.utils.ThreadPool
|
import net.dankito.utils.ThreadPool
|
||||||
|
@ -143,15 +143,15 @@ class BankingModule(private val applicationContext: Context) {
|
||||||
@Singleton
|
@Singleton
|
||||||
fun provideTextExtractorRegistry(applicationContext: Context) : ITextExtractorRegistry {
|
fun provideTextExtractorRegistry(applicationContext: Context) : ITextExtractorRegistry {
|
||||||
// TODO: add PdfTypeDetector
|
// TODO: add PdfTypeDetector
|
||||||
return TextExtractorRegistry(listOf(
|
return JavaTextExtractorRegistry(TextExtractorRegistry(listOf(
|
||||||
iText2PdfTextExtractor(), PdfBoxAndroidPdfTextExtractor(applicationContext)
|
iText2PdfTextExtractor(), PdfBoxAndroidPdfTextExtractor(applicationContext)
|
||||||
))
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Provides
|
@Provides
|
||||||
@Singleton
|
@Singleton
|
||||||
fun provideInvoiceDataExtractor() : IInvoiceDataExtractor {
|
fun provideInvoiceDataExtractor() : IInvoiceDataExtractor {
|
||||||
return InvoiceDataExtractor()
|
return JavaInvoiceDataExtractor()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ import net.dankito.banking.util.BankIconFinder
|
||||||
import net.dankito.banking.bankfinder.LuceneBankFinder
|
import net.dankito.banking.bankfinder.LuceneBankFinder
|
||||||
import net.dankito.banking.persistence.LuceneBankingPersistence
|
import net.dankito.banking.persistence.LuceneBankingPersistence
|
||||||
import net.dankito.banking.search.LuceneRemitteeSearcher
|
import net.dankito.banking.search.LuceneRemitteeSearcher
|
||||||
|
import net.dankito.banking.util.extraction.JavaTextExtractorRegistry
|
||||||
import net.dankito.text.extraction.TextExtractorRegistry
|
import net.dankito.text.extraction.TextExtractorRegistry
|
||||||
import net.dankito.text.extraction.TikaTextExtractor
|
import net.dankito.text.extraction.TikaTextExtractor
|
||||||
import net.dankito.text.extraction.image.Tesseract4CommandlineImageTextExtractor
|
import net.dankito.text.extraction.image.Tesseract4CommandlineImageTextExtractor
|
||||||
|
@ -34,11 +35,11 @@ class MainWindow : View(messages["application.title"]) {
|
||||||
|
|
||||||
private val tesseractTextExtractor = Tesseract4CommandlineImageTextExtractor(TesseractConfig(listOf(OcrLanguage.English, OcrLanguage.German)))
|
private val tesseractTextExtractor = Tesseract4CommandlineImageTextExtractor(TesseractConfig(listOf(OcrLanguage.English, OcrLanguage.German)))
|
||||||
|
|
||||||
private val textExtractorRegistry = TextExtractorRegistry(pdffontsPdfTypeDetector(), listOf(
|
private val textExtractorRegistry = JavaTextExtractorRegistry(TextExtractorRegistry(pdffontsPdfTypeDetector(), listOf(
|
||||||
pdfToTextPdfTextExtractor(), PdfBoxPdfTextExtractor(), iText2PdfTextExtractor(),
|
pdfToTextPdfTextExtractor(), PdfBoxPdfTextExtractor(), iText2PdfTextExtractor(),
|
||||||
ImageOnlyPdfTextExtractor(tesseractTextExtractor, pdfimagesImagesFromPdfExtractor()),
|
ImageOnlyPdfTextExtractor(tesseractTextExtractor, pdfimagesImagesFromPdfExtractor()),
|
||||||
tesseractTextExtractor, TikaTextExtractor()
|
tesseractTextExtractor, TikaTextExtractor()
|
||||||
))
|
)))
|
||||||
|
|
||||||
private val presenter = BankingPresenter(fints4kBankingClientCreator(),
|
private val presenter = BankingPresenter(fints4kBankingClientCreator(),
|
||||||
LuceneBankFinder(indexFolder), dataFolder, LuceneBankingPersistence(indexFolder, databaseFolder),
|
LuceneBankFinder(indexFolder), dataFolder, LuceneBankingPersistence(indexFolder, databaseFolder),
|
||||||
|
|
|
@ -23,10 +23,9 @@ import net.dankito.banking.ui.model.moneytransfer.ExtractTransferMoneyDataFromPd
|
||||||
import net.dankito.banking.ui.model.parameters.GetTransactionsParameter
|
import net.dankito.banking.ui.model.parameters.GetTransactionsParameter
|
||||||
import net.dankito.banking.ui.model.settings.AppSettings
|
import net.dankito.banking.ui.model.settings.AppSettings
|
||||||
import net.dankito.banking.util.*
|
import net.dankito.banking.util.*
|
||||||
import net.dankito.text.extraction.ITextExtractorRegistry
|
import net.dankito.banking.util.extraction.IInvoiceDataExtractor
|
||||||
import net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor
|
import net.dankito.banking.util.extraction.ITextExtractorRegistry
|
||||||
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
|
import net.dankito.banking.util.extraction.JavaInvoiceDataExtractor
|
||||||
import net.dankito.text.extraction.model.ErrorType
|
|
||||||
import org.slf4j.LoggerFactory
|
import org.slf4j.LoggerFactory
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.FileOutputStream
|
import java.io.FileOutputStream
|
||||||
|
@ -46,7 +45,7 @@ open class BankingPresenter(
|
||||||
protected val bankIconFinder: IBankIconFinder,
|
protected val bankIconFinder: IBankIconFinder,
|
||||||
protected val textExtractorRegistry: ITextExtractorRegistry,
|
protected val textExtractorRegistry: ITextExtractorRegistry,
|
||||||
protected val router: IRouter,
|
protected val router: IRouter,
|
||||||
protected val invoiceDataExtractor: IInvoiceDataExtractor = InvoiceDataExtractor(),
|
protected val invoiceDataExtractor: IInvoiceDataExtractor = JavaInvoiceDataExtractor(),
|
||||||
protected val serializer: ISerializer = JacksonJsonSerializer(),
|
protected val serializer: ISerializer = JacksonJsonSerializer(),
|
||||||
protected val asyncRunner: IAsyncRunner = CoroutinesAsyncRunner()
|
protected val asyncRunner: IAsyncRunner = CoroutinesAsyncRunner()
|
||||||
) {
|
) {
|
||||||
|
@ -380,9 +379,9 @@ open class BankingPresenter(
|
||||||
val extractionResult = textExtractorRegistry.extractTextWithBestExtractorForFile(pdf)
|
val extractionResult = textExtractorRegistry.extractTextWithBestExtractorForFile(pdf)
|
||||||
|
|
||||||
if (extractionResult.couldExtractText == false || extractionResult.text == null) {
|
if (extractionResult.couldExtractText == false || extractionResult.text == null) {
|
||||||
val resultType = if (extractionResult.error?.type == ErrorType.NoExtractorFoundForFileType) ExtractTransferMoneyDataFromPdfResultType.NotASearchablePdf
|
val resultType = if (extractionResult.noExtractorFound) ExtractTransferMoneyDataFromPdfResultType.NotASearchablePdf
|
||||||
else ExtractTransferMoneyDataFromPdfResultType.CouldNotExtractText
|
else ExtractTransferMoneyDataFromPdfResultType.CouldNotExtractText
|
||||||
return ExtractTransferMoneyDataFromPdfResult(resultType, extractionResult.error?.exception)
|
return ExtractTransferMoneyDataFromPdfResult(resultType, extractionResult.exception)
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
extractionResult.text?.let { extractedText ->
|
extractionResult.text?.let { extractedText ->
|
||||||
|
@ -392,7 +391,7 @@ open class BankingPresenter(
|
||||||
val transferMoneyData = TransferMoneyData("",
|
val transferMoneyData = TransferMoneyData("",
|
||||||
invoiceData.potentialIban ?: "",
|
invoiceData.potentialIban ?: "",
|
||||||
invoiceData.potentialBic ?: "",
|
invoiceData.potentialBic ?: "",
|
||||||
invoiceData.potentialTotalAmount?.amount ?: BigDecimal.ZERO, "")
|
invoiceData.potentialTotalAmount ?: BigDecimal.ZERO, "")
|
||||||
showTransferMoneyDialog(null, transferMoneyData)
|
showTransferMoneyDialog(null, transferMoneyData)
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
import java.lang.Exception
|
||||||
|
|
||||||
|
|
||||||
|
open class ExtractionResult(
|
||||||
|
open val couldExtractText: Boolean,
|
||||||
|
open val text: String?,
|
||||||
|
open val exception: Exception? = null,
|
||||||
|
open val noExtractorFound: Boolean = false
|
||||||
|
) {
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
|
||||||
|
interface IInvoiceDataExtractor {
|
||||||
|
|
||||||
|
fun extractInvoiceData(text: String): InvoiceData
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
|
||||||
|
interface ITextExtractorRegistry {
|
||||||
|
|
||||||
|
fun extractTextWithBestExtractorForFile(file: File): ExtractionResult
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
import java.math.BigDecimal
|
||||||
|
|
||||||
|
|
||||||
|
open class InvoiceData(
|
||||||
|
open val potentialTotalAmount: BigDecimal?,
|
||||||
|
open val potentialCurrency: String?,
|
||||||
|
open val potentialIban: String?,
|
||||||
|
open val potentialBic: String?,
|
||||||
|
open val error: Exception? = null
|
||||||
|
) {
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
|
||||||
|
|
||||||
|
|
||||||
|
open class JavaInvoiceDataExtractor(
|
||||||
|
protected val invoiceDataExtractor: net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor = InvoiceDataExtractor()
|
||||||
|
) : IInvoiceDataExtractor {
|
||||||
|
|
||||||
|
override fun extractInvoiceData(text: String): InvoiceData {
|
||||||
|
val invoiceData = invoiceDataExtractor.extractInvoiceData(text)
|
||||||
|
|
||||||
|
return InvoiceData(
|
||||||
|
invoiceData.potentialTotalAmount?.amount?.toBigDecimal(),
|
||||||
|
invoiceData.potentialTotalAmount?.currency,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
invoiceData.error
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
import net.dankito.text.extraction.TextExtractorRegistry
|
||||||
|
import net.dankito.text.extraction.model.ErrorType
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
|
||||||
|
open class JavaTextExtractorRegistry(
|
||||||
|
protected val textExtractorRegistry: net.dankito.text.extraction.ITextExtractorRegistry = TextExtractorRegistry()
|
||||||
|
) : ITextExtractorRegistry {
|
||||||
|
|
||||||
|
override fun extractTextWithBestExtractorForFile(file: File): ExtractionResult {
|
||||||
|
val result = textExtractorRegistry.extractTextWithBestExtractorForFile(file)
|
||||||
|
|
||||||
|
return ExtractionResult(
|
||||||
|
result.couldExtractText,
|
||||||
|
result.text,
|
||||||
|
result.error?.exception,
|
||||||
|
result.error?.type == ErrorType.NoExtractorFoundForFileType
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
|
||||||
|
open class NoOpInvoiceDataExtractor : IInvoiceDataExtractor {
|
||||||
|
|
||||||
|
override fun extractInvoiceData(text: String): InvoiceData {
|
||||||
|
return InvoiceData(null, null, null, null, null)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
package net.dankito.banking.util.extraction
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
|
||||||
|
open class NoOpTextExtractorRegistry : ITextExtractorRegistry {
|
||||||
|
|
||||||
|
override fun extractTextWithBestExtractorForFile(file: File): ExtractionResult {
|
||||||
|
return ExtractionResult(false, null, null, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue