Implemented parsing CEF (EU) genericode Code Lists to Kotlin enum classes

This commit is contained in:
dankito 2024-12-03 22:09:52 +01:00
parent b9d75a6423
commit b47ee96183
12 changed files with 355 additions and 0 deletions

View File

@ -0,0 +1,75 @@
## Sources
Sources of Code Lists according to XRechnung specification p. 105, enhanced by information from [EN16931 code lists file](https://ec.europa.eu/digital-building-blocks/sites/display/DIGITAL/Registry+of+supporting+artefacts+to+implement+EN16931)
| Name | Beschreibung | Version | XRepository Versionskennung und Link | Usage | Using in fields |
|--------------|----------------------------------------------------------------------------------------------------|---------|-----------------------------------------------|-----------|-------------------------------------------------------------------------|
| ISO 3166-1 | Country codes (kompatibel zu ISO 3166-1) | 2022 | urn:xoev-de:kosit:codeliste:country-codes_8 | extended | BT-40, BT-55, BT-69, BT-80, BT-159 |
| ISO 4217 | Currency codes (kompatibel zu ISO 4217) | 2021 | urn:xoev-de:kosit:codeliste:currency-codes_3 | full list | BT-5, BT-6 |
| ISO/IEC 6523 | ICD — Identifier scheme code (kompatibel zu ISO 6523) | 2023 | urn:xoev-de:kosit:codeliste:icd_5 | full list | BT-29-1, BT-30-1, BT-46-1, BT-47-1, BT-60-1, BT-61-1, BT-71-1, BT-157-1 |
| UNTDID 1001 | Document name coded | 21a | urn:xoev-de:kosit:codeliste:untdid.1001_4 | subset | BT-3 |
| UNTDID 1153 | Reference code qualifier | d20a | urn:xoev-de:kosit:codeliste:untdid.1153_3 | full list | BT-18-1, BT-128-1 |
| UNTDID 2005 | Date or time or period function code qualifier | d21a | urn:xoev-de:kosit:codeliste:untdid.2005_4 | subset | BT-8 |
| UNTDID 4451 | Text subject code qualifier | d21a | urn:xoev-de:kosit:codeliste:untdid.4451_4 | full list | BT-21 |
| UNTDID 4461 | Payment means coded | d20a | urn:xoev-de:xrechnung:codeliste:untdid.4461_3 | full list | BT-81 |
| UNTDID 5189 | Allowance or charge identification coded | d20a | urn:xoev-de:kosit:codeliste:untdid.5189_3 | subset | BT-98, BT-140 |
| UNTDID 5305 | Duty or tax or fee category coded | d20a | urn:xoev-de:kosit:codeliste:untdid.5305_3 | subset | BT-95, BT-102, BT-118, BT-151 |
| UNTDID 7143 | Item type identification coded | d21a | urn:xoev-de:kosit:codeliste:untdid.7143_4 | full list | BT-158-1 |
| UNTDID 7161 | Special service description coded | d20a | urn:xoev-de:kosit:codeliste:untdid.7161_3 | full list | BT-105, BT-145 |
| EAS | Electronic Address Scheme Code list | 9.0 | urn:xoev-de:kosit:codeliste:eas_5 | full list | BT-34-1, BT-49-1 |
| VATEX | VAT exemption reason code list | 4.0 | urn:xoev-de:kosit:codeliste:vatex_1 | full list | BT-121 |
| Rec 20 | UN/EC Recommendation Nº20 Codes for Units of Measure Used in International Trade | Rev. 17 | urn:xoev-de:kosit:codeliste:rec20_3 | full list | BT-130, BT-150 |
| Rec 21 | UN/EC Recommendation Nº21 Codes for Passengers, Types of Cargo, Packages and Packaging Materials | Rev. 12 | urn:xoev-de:kosit:codeliste:rec21_3 | full list | BT-130, BT-150 |
| VAT ID | VAT Identifier; has only code "VAT" for Value added tax; code list only in EN Excel file | | | subset | BT-31, BT-48, BT-63 |
| VAT Cat | VAT Category code; has only code "VAT" for Value added tax; code list only in EN Excel file | | | subset | BT-95, BT-102, BT-118, BT-151 |
| MIME | Mime type codes — Mime codes; code list only in EN Excel file | | | subset | BT-125-1 |
## Einschätzung zu Quellen
### EN / CEF Genericode Code Listen
URL: https://ec.europa.eu/digital-building-blocks/sites/display/DIGITAL/Registry+of+supporting+artefacts+to+implement+EN16931
\+ good to parse
\- no descriptions
\- only English names
### UNTDID
URL e.g.: https://unece.org/fileadmin/DAM/trade/untdid/d16b/tred/tred1001.htm
\+ incl. Engl. descriptions
\- difficult to parse, plain text only (on a website!)
\- only English names and descriptions
\- partially more codes than XRechnung standard allows
### Factur-X / ZUGFeRD Code Lists .xslx
Download .zip from: https://www.ferd-net.de/standards/zugferd-2.3.2/zugferd-2.3.2.html?acceptCookie=1
\+ alle code lists in one file
\+ incl. Engl. descriptions and invoice fields in which codes are used
\- difficult to parse
### UNECE Rec. 20 & 21
Recommendation 20 Codes for Units of Measure Used in International Trade
Recommendation 21 Codes for Passengers, Types of Cargo, Packages and Packaging Materials (with Complementary Codes for Package Names)
URL: https://unece.org/trade/uncefact/cl-recommendations
\+ good to parse
\+ incl. Engl. descriptions
\+ incl. unit symbols
\- only units, no other code lists
\- only English names and descriptions

View File

@ -0,0 +1,36 @@
plugins {
kotlin("jvm")
}
kotlin {
jvmToolchain(11)
}
val phGenericodeVersion: String by project
val klfVersion: String by project
val assertKVersion: String by project
val logbackVersion: String by project
dependencies {
implementation(project(":e-invoice-domain"))
implementation("com.helger:ph-genericode:$phGenericodeVersion")
implementation("net.codinux.log:klf:$klfVersion")
testImplementation(kotlin("test"))
testImplementation("com.willowtreeapps.assertk:assertk:$assertKVersion")
testImplementation("ch.qos.logback:logback-classic:$logbackVersion")
}
tasks.test {
useJUnitPlatform()
}

View File

@ -0,0 +1,27 @@
package net.codinux.invoicing.app
import net.codinux.invoicing.parser.CodeGenerator
import net.codinux.invoicing.parser.genericode.CefGenericodeCodelistsParser
import java.io.File
fun main() {
CefGenericodeCodelistsParserApp().parseCefGenericodeLists()
}
class CefGenericodeCodelistsParserApp {
fun parseCefGenericodeLists() {
val zipFile = File(javaClass.classLoader.getResource("codeLists/cef-genericodes-2024-11-15.zip")!!.toURI())
val codeLists = CefGenericodeCodelistsParser().parse(zipFile)
var outputDirectoryBasePath = zipFile.parentFile.parentFile.absolutePath.replace("e-invoice-spec-parser", "e-invoice-domain")
if (outputDirectoryBasePath.contains("/build/resources/main")) {
outputDirectoryBasePath = outputDirectoryBasePath.replace("/build/resources/main", "/src/main")
}
val outputDirectory = File(outputDirectoryBasePath, "kotlin/net/codinux/invoicing/model/codes")
CodeGenerator().generateCodeFiles(codeLists, outputDirectory)
}
}

View File

@ -0,0 +1,54 @@
package net.codinux.invoicing.parser
import net.codinux.invoicing.parser.genericode.CodeList
import net.codinux.invoicing.parser.genericode.Column
import java.io.File
class CodeGenerator {
fun generateCodeFiles(codeLists: List<CodeList>, outputDirectory: File) {
codeLists.forEach { codeList ->
File(outputDirectory, codeList.name + ".kt").bufferedWriter().use { writer ->
writer.appendLine("package net.codinux.invoicing.model.codes")
writer.newLine()
writer.appendLine("enum class ${getClassName(codeList)}(${codeList.columns.joinToString(", ") { "val ${getPropertyName(it)}: ${getDataType(codeList, it)}" } }) {")
codeList.rows.forEach { row ->
writer.appendLine("\t${getEnumName(codeList.columns, row)}(${row.joinToString(", ") { it?.let { "\"${it.replace("\n", "")}\"" } ?: "null" } }),")
}
writer.appendLine("}")
}
}
}
private fun getClassName(codeList: CodeList): String {
val name = codeList.name
return if (name[0].isDigit()) "_" + name
else name
}
private fun getPropertyName(column: Column): String = when (column.name) {
"Unique code" -> "uniqueCode"
"Meaning of the code" -> "meaningOfTheCode"
"Optional remark for the usage of this code" -> "optionalRemarkForTheUsageOfTheCode"
else -> column.name.replace(" ", "")
}
private fun getDataType(codeList: CodeList, column: Column): String {
val index = codeList.columns.indexOf(column)
val containsNullValues = codeList.rows.any { it[index] == null }
return when (column.dataType) {
"string" -> "String" + (if (containsNullValues) "?" else "")
else -> column.dataType[0].uppercase() + column.dataType.substring(1).replace(" ", "")
}
}
private fun getEnumName(columns: List<Column>, row: List<String?>): String {
val name = (row[0] ?: "").replace(' ', '_').replace('/', '_').replace('.', '_').replace('-', '_')
return if (name[0].isDigit()) "_" + name
else name
}
}

View File

@ -0,0 +1,74 @@
package net.codinux.invoicing.parser.genericode
import com.helger.genericode.Genericode10CodeListMarshaller
import com.helger.xml.serialize.read.DOMReader
import net.codinux.invoicing.parser.model.CodeListType
import net.codinux.invoicing.parser.model.Column
import net.codinux.log.logger
import java.io.File
import java.io.InputStream
import java.util.zip.ZipFile
class CefGenericodeCodelistsParser {
private val log by logger()
fun parse(zipFile: File): List<CodeList> =
ZipFile(zipFile).use { zip ->
zip.entries().toList().filter { it.isDirectory == false && it.name.endsWith(".gc", true) }
.mapNotNull { parse(zip.getInputStream(it), it.name) }
}
private fun parse(genericodeInputStream: InputStream, filename: String): CodeList? {
val doc = DOMReader.readXMLDOM(genericodeInputStream)
val marshaller = Genericode10CodeListMarshaller()
if (doc == null) {
log.info { "Could not read XML document from file $filename" }
return null
}
val codeListDoc = marshaller.read(doc)
if (codeListDoc == null) {
log.info { "Could not read Code List from file $filename" }
return null
}
val columnSet = codeListDoc.columnSet
val identification = codeListDoc.identification
val simpleCodeList = codeListDoc.simpleCodeList
val name = File(filename).nameWithoutExtension
val (version, canonicalUri, canonicalVersionUri) = Triple(identification?.version, identification?.canonicalUri, identification?.canonicalVersionUri)
val columns = columnSet?.columnChoice.orEmpty().filterIsInstance<com.helger.genericode.v10.Column>().mapIndexed { index, col -> Column(index, col.id!!, col.data?.type!!, col.shortNameValue!!) }
val rows = simpleCodeList?.row.orEmpty().map { row -> columns.map { column -> row.value.firstOrNull { (it.columnRef as? com.helger.genericode.v10.Column)?.id == column.id }?.simpleValueValue } }
return CodeList(getType(name), name, version, canonicalUri, canonicalVersionUri, columns, rows)
}
private fun getType(name: String): CodeListType = when (name) {
"Country" -> CodeListType.IsoCountryCodes
"Currency" -> CodeListType.IsoCurrencyCodes
"ICD" -> CodeListType.Iso_6523_IdentificationSchemeIdentifier
"1001" -> CodeListType.UN_1001_InvoiceType
"1153" -> CodeListType.UN_1153_ReferenceCode
"Text" -> CodeListType.UN_4451_TextSubjectCodeQualifier
"Payment" -> CodeListType.UN_4461_PaymentCodes
"5305" -> CodeListType.UN_5305_DutyOrTaxOrFeeCategory
"Allowance" -> CodeListType.UN_5189_AllowanceIdentificationCode
"Item" -> CodeListType.UN_7143_ItemTypeIdentificationCode
"Charge" -> CodeListType.UN_7161_SpecialServiceDescriptionCodes
"Unit" -> CodeListType.Units
"EAS" -> CodeListType.EAS
"VATEX" -> CodeListType.VATEX
"MIME" -> CodeListType.Mime
else -> throw IllegalArgumentException("No known Code List of name '$name' found")
}
}

View File

@ -0,0 +1,16 @@
package net.codinux.invoicing.parser.genericode
import net.codinux.invoicing.parser.model.CodeListType
import net.codinux.invoicing.parser.model.Column
class CodeList(
val type: CodeListType,
val name: String,
val version: String?,
val canonicalUri: String?,
val canonicalVersionUri: String?,
val columns: List<Column>,
val rows: List<List<String?>>
) {
override fun toString() = "$name ${columns.joinToString { it.name }}, ${rows.size} rows"
}

View File

@ -0,0 +1,27 @@
package net.codinux.invoicing.parser.model
enum class CodeListType(val filename: String, val usesFullList: Boolean, val usedInFields: List<String>) {
IsoCountryCodes("Country", true, listOf("BT-40", "BT-48", "BT-55", "BT-63", "BT-69", "BT-80", "BT-159")), // actually it's not only the full list, it's "extended"
IsoCurrencyCodes("Currency", true, listOf("BT-5", "BT-6")),
Iso_6523_IdentificationSchemeIdentifier("IdentifierSchemeCode", true, listOf("BT-29-1", "BT-30-1", "BT-46-1", "BT-47-1", "BT-60-1", "BT-61-1", "BT-71-1", "BT-157-1")), // = ICD
UN_1001_InvoiceType("InvoiceType", false, listOf("BT-3")), // original name: Document type,
UN_1153_ReferenceCode("ReferenceCode", true, listOf("BT-18-1", "BT-128-1")),
UN_2005_2475_EventTimeCode("TimeReferenceCode", false, listOf("BT-8")), // code list only in EN Excel file
UN_4451_TextSubjectCodeQualifier("TextSubjectQualifier", true, listOf("BT-21")), // Text subject qualifier, tab Text
UN_4461_PaymentCodes("PaymentMeans", true, listOf("BT-81")), // Payment means, tab Payment
UN_5189_AllowanceIdentificationCode("AllowanceIdentificationCode", false, listOf("BT-98", "BT-140")), // tab Allowance
UN_5305_DutyOrTaxOrFeeCategory("DutyOrTaxOrFreeCategory", false, listOf("BT-95", "BT-102", "BT-118", "BT-151")),
UN_7143_ItemTypeIdentificationCode("ItemTypeIdentificationCode", true, listOf("BT-158-1")), // Item type identification code, tab Item, full list
UN_7161_SpecialServiceDescriptionCodes("SpecialServiceDescriptionCode", true, listOf("BT-105", "BT-145")), // Charge codes, tab Charge
Units("Unit", true, listOf("BT-130", "BT-150")), // UN/ECE Recommendation N°20 and UN/ECE Recommendation N°21 — Unit codes
EAS("ElectronicAddressSchemeIdentifier", true, listOf("BT-34-1", "BT-49-1")), // Electronic address scheme identifier
VATEX("VatExemptionReasonCode", true, listOf("BT-121")), // VAT exemption reason code
VatIdentifier("VatIdentifier", false, listOf("BT-31", "BT-48", "BT-63")), // code list only in EN Excel file
VatCategoryCode("VatCategoryCode", false, listOf("BT-95", "BT-102", "BT-118", "BT-151")), // code list only in EN Excel file
Mime("Mime", false, listOf("BT-125-1")),
}

View File

@ -0,0 +1,10 @@
package net.codinux.invoicing.parser.model
data class Column(
val index: Int,
val id: String,
val dataType: String,
val name: String,
) {
override fun toString() = "$dataType $name ($id)"
}

View File

@ -0,0 +1,32 @@
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<!-- encoders are assigned the type
ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>DEBUG</level>
</filter>
</appender>
<!-- Insert the current time formatted as "yyyyMMdd'T'HHmmss" under
the key "bySecond" into the logger context. This value will be
available to all subsequent configuration elements. -->
<timestamp key="bySecond" datePattern="yyyyMMdd'T'HHmmss"/>
<root level="ALL">
<appender-ref ref="STDOUT"/>
</root>
<!-- Apache FOP will flood otherwise the log so that test run crashes -->
<logger name="org.apache.fop" level="INFO">
<appender-ref ref="STDOUT"/>
</logger>
<logger name="org.apache.xmlgraphics.image.loader.spi.ImageImplRegistry" level="INFO">
<appender-ref ref="STDOUT"/>
</logger>
</configuration>

View File

@ -26,6 +26,8 @@ angusMailVersion=2.0.3
openHtmlToPdfVersion=1.1.22
jsoupVersion=1.18.1
phGenericodeVersion=7.1.3
klfVersion=1.6.2
lokiLogAppenderVersion=0.5.5
# only used for tests

View File

@ -30,4 +30,6 @@ include("e-invoice-domain")
include("invoice-creator")
include("e-invoice-spec-parser")
include("e-invoice-api")