Added wrappers for TextExtractorRegistry and InvoiceDataExtractor
This commit is contained in:
parent
b05d927356
commit
09a52ac539
|
@ -15,14 +15,14 @@ import net.dankito.banking.search.LuceneRemitteeSearcher
|
|||
import net.dankito.banking.ui.IBankingClientCreator
|
||||
import net.dankito.banking.ui.IRouter
|
||||
import net.dankito.banking.ui.presenter.BankingPresenter
|
||||
import net.dankito.banking.util.BankIconFinder
|
||||
import net.dankito.banking.util.IBankIconFinder
|
||||
import net.dankito.banking.bankfinder.IBankFinder
|
||||
import net.dankito.banking.bankfinder.LuceneBankFinder
|
||||
import net.dankito.text.extraction.ITextExtractorRegistry
|
||||
import net.dankito.banking.util.*
|
||||
import net.dankito.banking.util.extraction.IInvoiceDataExtractor
|
||||
import net.dankito.banking.util.extraction.ITextExtractorRegistry
|
||||
import net.dankito.banking.util.extraction.JavaInvoiceDataExtractor
|
||||
import net.dankito.banking.util.extraction.JavaTextExtractorRegistry
|
||||
import net.dankito.text.extraction.TextExtractorRegistry
|
||||
import net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor
|
||||
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
|
||||
import net.dankito.text.extraction.pdf.PdfBoxAndroidPdfTextExtractor
|
||||
import net.dankito.text.extraction.pdf.iText2PdfTextExtractor
|
||||
import net.dankito.utils.ThreadPool
|
||||
|
@ -143,15 +143,15 @@ class BankingModule(private val applicationContext: Context) {
|
|||
@Singleton
|
||||
fun provideTextExtractorRegistry(applicationContext: Context) : ITextExtractorRegistry {
|
||||
// TODO: add PdfTypeDetector
|
||||
return TextExtractorRegistry(listOf(
|
||||
return JavaTextExtractorRegistry(TextExtractorRegistry(listOf(
|
||||
iText2PdfTextExtractor(), PdfBoxAndroidPdfTextExtractor(applicationContext)
|
||||
))
|
||||
)))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
fun provideInvoiceDataExtractor() : IInvoiceDataExtractor {
|
||||
return InvoiceDataExtractor()
|
||||
return JavaInvoiceDataExtractor()
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ import net.dankito.banking.util.BankIconFinder
|
|||
import net.dankito.banking.bankfinder.LuceneBankFinder
|
||||
import net.dankito.banking.persistence.LuceneBankingPersistence
|
||||
import net.dankito.banking.search.LuceneRemitteeSearcher
|
||||
import net.dankito.banking.util.extraction.JavaTextExtractorRegistry
|
||||
import net.dankito.text.extraction.TextExtractorRegistry
|
||||
import net.dankito.text.extraction.TikaTextExtractor
|
||||
import net.dankito.text.extraction.image.Tesseract4CommandlineImageTextExtractor
|
||||
|
@ -34,11 +35,11 @@ class MainWindow : View(messages["application.title"]) {
|
|||
|
||||
private val tesseractTextExtractor = Tesseract4CommandlineImageTextExtractor(TesseractConfig(listOf(OcrLanguage.English, OcrLanguage.German)))
|
||||
|
||||
private val textExtractorRegistry = TextExtractorRegistry(pdffontsPdfTypeDetector(), listOf(
|
||||
private val textExtractorRegistry = JavaTextExtractorRegistry(TextExtractorRegistry(pdffontsPdfTypeDetector(), listOf(
|
||||
pdfToTextPdfTextExtractor(), PdfBoxPdfTextExtractor(), iText2PdfTextExtractor(),
|
||||
ImageOnlyPdfTextExtractor(tesseractTextExtractor, pdfimagesImagesFromPdfExtractor()),
|
||||
tesseractTextExtractor, TikaTextExtractor()
|
||||
))
|
||||
)))
|
||||
|
||||
private val presenter = BankingPresenter(fints4kBankingClientCreator(),
|
||||
LuceneBankFinder(indexFolder), dataFolder, LuceneBankingPersistence(indexFolder, databaseFolder),
|
||||
|
|
|
@ -23,10 +23,9 @@ import net.dankito.banking.ui.model.moneytransfer.ExtractTransferMoneyDataFromPd
|
|||
import net.dankito.banking.ui.model.parameters.GetTransactionsParameter
|
||||
import net.dankito.banking.ui.model.settings.AppSettings
|
||||
import net.dankito.banking.util.*
|
||||
import net.dankito.text.extraction.ITextExtractorRegistry
|
||||
import net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor
|
||||
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
|
||||
import net.dankito.text.extraction.model.ErrorType
|
||||
import net.dankito.banking.util.extraction.IInvoiceDataExtractor
|
||||
import net.dankito.banking.util.extraction.ITextExtractorRegistry
|
||||
import net.dankito.banking.util.extraction.JavaInvoiceDataExtractor
|
||||
import org.slf4j.LoggerFactory
|
||||
import java.io.File
|
||||
import java.io.FileOutputStream
|
||||
|
@ -46,7 +45,7 @@ open class BankingPresenter(
|
|||
protected val bankIconFinder: IBankIconFinder,
|
||||
protected val textExtractorRegistry: ITextExtractorRegistry,
|
||||
protected val router: IRouter,
|
||||
protected val invoiceDataExtractor: IInvoiceDataExtractor = InvoiceDataExtractor(),
|
||||
protected val invoiceDataExtractor: IInvoiceDataExtractor = JavaInvoiceDataExtractor(),
|
||||
protected val serializer: ISerializer = JacksonJsonSerializer(),
|
||||
protected val asyncRunner: IAsyncRunner = CoroutinesAsyncRunner()
|
||||
) {
|
||||
|
@ -380,9 +379,9 @@ open class BankingPresenter(
|
|||
val extractionResult = textExtractorRegistry.extractTextWithBestExtractorForFile(pdf)
|
||||
|
||||
if (extractionResult.couldExtractText == false || extractionResult.text == null) {
|
||||
val resultType = if (extractionResult.error?.type == ErrorType.NoExtractorFoundForFileType) ExtractTransferMoneyDataFromPdfResultType.NotASearchablePdf
|
||||
val resultType = if (extractionResult.noExtractorFound) ExtractTransferMoneyDataFromPdfResultType.NotASearchablePdf
|
||||
else ExtractTransferMoneyDataFromPdfResultType.CouldNotExtractText
|
||||
return ExtractTransferMoneyDataFromPdfResult(resultType, extractionResult.error?.exception)
|
||||
return ExtractTransferMoneyDataFromPdfResult(resultType, extractionResult.exception)
|
||||
}
|
||||
else {
|
||||
extractionResult.text?.let { extractedText ->
|
||||
|
@ -392,7 +391,7 @@ open class BankingPresenter(
|
|||
val transferMoneyData = TransferMoneyData("",
|
||||
invoiceData.potentialIban ?: "",
|
||||
invoiceData.potentialBic ?: "",
|
||||
invoiceData.potentialTotalAmount?.amount ?: BigDecimal.ZERO, "")
|
||||
invoiceData.potentialTotalAmount ?: BigDecimal.ZERO, "")
|
||||
showTransferMoneyDialog(null, transferMoneyData)
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
import java.lang.Exception
|
||||
|
||||
|
||||
open class ExtractionResult(
|
||||
open val couldExtractText: Boolean,
|
||||
open val text: String?,
|
||||
open val exception: Exception? = null,
|
||||
open val noExtractorFound: Boolean = false
|
||||
) {
|
||||
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
|
||||
interface IInvoiceDataExtractor {
|
||||
|
||||
fun extractInvoiceData(text: String): InvoiceData
|
||||
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
import java.io.File
|
||||
|
||||
|
||||
interface ITextExtractorRegistry {
|
||||
|
||||
fun extractTextWithBestExtractorForFile(file: File): ExtractionResult
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
import java.math.BigDecimal
|
||||
|
||||
|
||||
open class InvoiceData(
|
||||
open val potentialTotalAmount: BigDecimal?,
|
||||
open val potentialCurrency: String?,
|
||||
open val potentialIban: String?,
|
||||
open val potentialBic: String?,
|
||||
open val error: Exception? = null
|
||||
) {
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
import net.dankito.text.extraction.info.invoice.InvoiceDataExtractor
|
||||
|
||||
|
||||
open class JavaInvoiceDataExtractor(
|
||||
protected val invoiceDataExtractor: net.dankito.text.extraction.info.invoice.IInvoiceDataExtractor = InvoiceDataExtractor()
|
||||
) : IInvoiceDataExtractor {
|
||||
|
||||
override fun extractInvoiceData(text: String): InvoiceData {
|
||||
val invoiceData = invoiceDataExtractor.extractInvoiceData(text)
|
||||
|
||||
return InvoiceData(
|
||||
invoiceData.potentialTotalAmount?.amount?.toBigDecimal(),
|
||||
invoiceData.potentialTotalAmount?.currency,
|
||||
null,
|
||||
null,
|
||||
invoiceData.error
|
||||
)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
import net.dankito.text.extraction.TextExtractorRegistry
|
||||
import net.dankito.text.extraction.model.ErrorType
|
||||
import java.io.File
|
||||
|
||||
|
||||
open class JavaTextExtractorRegistry(
|
||||
protected val textExtractorRegistry: net.dankito.text.extraction.ITextExtractorRegistry = TextExtractorRegistry()
|
||||
) : ITextExtractorRegistry {
|
||||
|
||||
override fun extractTextWithBestExtractorForFile(file: File): ExtractionResult {
|
||||
val result = textExtractorRegistry.extractTextWithBestExtractorForFile(file)
|
||||
|
||||
return ExtractionResult(
|
||||
result.couldExtractText,
|
||||
result.text,
|
||||
result.error?.exception,
|
||||
result.error?.type == ErrorType.NoExtractorFoundForFileType
|
||||
)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
|
||||
open class NoOpInvoiceDataExtractor : IInvoiceDataExtractor {
|
||||
|
||||
override fun extractInvoiceData(text: String): InvoiceData {
|
||||
return InvoiceData(null, null, null, null, null)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
package net.dankito.banking.util.extraction
|
||||
|
||||
import java.io.File
|
||||
|
||||
|
||||
open class NoOpTextExtractorRegistry : ITextExtractorRegistry {
|
||||
|
||||
override fun extractTextWithBestExtractorForFile(file: File): ExtractionResult {
|
||||
return ExtractionResult(false, null, null, true)
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue