From 84950f8aed47787fb8a7c68549e5918db1cf70ee Mon Sep 17 00:00:00 2001 From: bosd Date: Sat, 3 Feb 2024 19:18:16 +0100 Subject: [PATCH] add templates --- .../extract/templates/com/com.cloudflare.yml | 80 +++++++++ .../extract/templates/com/com.hetzner.yml | 91 ++++++++++ .../extract/templates/com/com.runbox.yml | 66 ++++++++ .../extract/templates/com/com.vultr.yml | 82 +++++++++ .../extract/templates/nl/nl.fletcher.yml | 104 ++++++++++++ .../extract/templates/nl/nl.makro.jsonold | 155 ++++++++++++++++++ 6 files changed, 578 insertions(+) create mode 100755 src/invoice2data/extract/templates/com/com.cloudflare.yml create mode 100644 src/invoice2data/extract/templates/com/com.hetzner.yml create mode 100644 src/invoice2data/extract/templates/com/com.runbox.yml create mode 100644 src/invoice2data/extract/templates/com/com.vultr.yml create mode 100644 src/invoice2data/extract/templates/nl/nl.fletcher.yml create mode 100644 src/invoice2data/extract/templates/nl/nl.makro.jsonold diff --git a/src/invoice2data/extract/templates/com/com.cloudflare.yml b/src/invoice2data/extract/templates/com/com.cloudflare.yml new file mode 100755 index 00000000..3b0d461c --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.cloudflare.yml @@ -0,0 +1,80 @@ +issuer: Cloudflare, Inc +fields: + amount: + parser: regex + regex: + - Total\s+[$€](\d+.\d{2})\s + type: float + amount_untaxed: + parser: regex + regex: + - Subtotal [(]USD[)]\s+[$€](\d+.\d{2})\s + type: float + amount_tax: + parser: regex + regex: + - Tax Amount\s+[$€](\d+.\d{2})\s + type: float + date: + parser: regex + regex: + - Date[:]\s+(\d{2}.\d{2}.\d{4})\s+ + type: date + invoice_number: + parser: regex + regex: + - INVOICE.\s+(\w+) + partner_website: + parser: regex + regex: + - (cloudflare[.]com) + group: first + partner_name: + parser: regex + regex: + - '(Cloudflare, Inc)' + partner_email: + parser: static + value: billing@cloudflare.com + partner_city: + parser: regex + regex: 'San Francisco' + group: first + partner_zip: + parser: regex + regex: 'CA 94107' + country_code: + parser: static + value: US + payment_method: + - (?i)(AMEX) + - (?i)(American express) + - (?i)(VISA) + - (?i)(Vpay) + - (?i)(Mastercard) + - (?i)(CONTANT) + - (?i)(KAS):\s.\s\d+\.\d+ + lines: + parser: lines + rules: + - start: 'Summary of Current Charges' + end: '\s+Total\s+[$€](\d+.\d{2})\s' + line: + - '(?P(\w+(?:\s\S+)*))\s+(?P\d{2}[\/]\d{1,2}[\/]\d{4})\s-\s(?P\d{2}[\/]\d{1,2}[\/]\d{4})\s+(?P\d)\s+[$€](?P\d+.\d{2})\s+[$€](?P\d+.\d{2})' + types: + qty: float + price_unit: float + line_tax_amount: float + price_subtotal: float + date_end: date + date_start: date +keywords: + - 'Cloudflare, Inc' + - INVOICE +options: + date_formats: + - '%d %m %Y' + currency: USD + languages: + - en + decimal_separator: '.' \ No newline at end of file diff --git a/src/invoice2data/extract/templates/com/com.hetzner.yml b/src/invoice2data/extract/templates/com/com.hetzner.yml new file mode 100644 index 00000000..18386873 --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.hetzner.yml @@ -0,0 +1,91 @@ +issuer: Hetzner Online GmbH +fields: + amount: + parser: regex + regex: \s+Amount due[:]\s+[€]\s(\d+[.]\d+) + type: float + amount_tax: + parser: regex + regex: Total\s+[€]\s\d+[.]\d+\s+[€]\s(\d+[.]\d+) + type: float + amount_untaxed: + parser: regex + regex: \s+Subtotal [(]excl. VAT[)]\s+[$€]\s(d*[.,]?\d+[,.]\d+) + type: float + invoice_number: + parser: regex + regex: Invoice no\.[:] (\w+) + partner_website: + parser: static + value: hetzner.com + partner_email: + parser: static + value: info@hetzner.com + partner_name: + parser: regex + regex: Hetzner Online GmbH + partner_city: + parser: regex + regex: Gunzenhausen + country_code: + parser: static + value: DE + partner_zip: + parser: regex + regex: '(\d{5}) Gunzenhausen' + partner_street: + parser: regex + regex: 'Industriestr. \d+' + currency_symbol: + parser: regex + regex: '[$€]' + date: + parser: regex + regex: Invoice date[:] (\d+.\d{2}.\d{4}) + type: date + vat: + parser: regex + regex: 'VAT Reg\. No\.[:] (DE8\w+)' + bic: + parser: regex + regex: 'BIC.\s+(\w{8,11})' + iban: + parser: regex + regex: '[A-Z]{2}\d{2}?\s?\w{4}?\s?\d{4}?\s?\d{4}?\s?\d{4}?\s?\d{0,2}' # mod version do not copy + lines: + parser: lines + start: 'Pos\s+' + end: 'Subtotal' + first_line: + - '(?P\w+(?:\s\S+)+\s+[(]\d{2}.\d{2}.\d{4}\s-\s\d{2}.\d{2}.\d{4}[)])\s[*]' + - '\d+\s+(?P\w+(?:\s\w+)+)\s+(?P\d+)\s+[$€]\s(?P\d+.\d+)\s+[$€]\s(?P\d+.\d+)' + line: + - (?P\d{2}.\d{2}.\d{4})\s-\s(?P\d{2}.\d{2}.\d{4}) + - Quantity type[:]\s(?P\w+) + types: + qty: float + unit_price: float + price_subtotal: float + date_start: date + date_end: date + tax_lines: + parser: lines + start: 'Tax code' + end: '(?i)Amount due' + line: + - '(?P\d+)\s+(?P\d+[,.]?\d*)[%]\s+[$€]\s(?P(\d*[.,]?\d+[,.]\d+))\s+[$€]\s(?P(\d*[.,]?\d+[,.]\d+))' + types: + line_tax_percent: float + price_subtotal: float + line_tax_amount: float +keywords: + - 'Hetzner Online' + - 'DE812871812' + - 'Invoice' +required_fields: + - lines +options: + languages: + - en + currency: EUR + diff --git a/src/invoice2data/extract/templates/com/com.runbox.yml b/src/invoice2data/extract/templates/com/com.runbox.yml new file mode 100644 index 00000000..7d832cb1 --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.runbox.yml @@ -0,0 +1,66 @@ +issuer: Runbox Solutions AS +fields: + amount: + parser: regex + regex: \s+Total\s+EUR\s(d*[.,]?\d+[,.]\d+) + type: float + amount_untaxed: + parser: regex + regex: \s+Total\s+EUR\s(d*[.,]?\d+[,.]\d+) + type: float + invoice_number: + parser: regex + regex: Invoice no[.:]+\s+(\d+) + partner_website: + parser: static + value: runbox.com + partner_name: + parser: regex + regex: Runbox Solutions AS + partner_city: + parser: regex + regex: Oslo + country_code: + parser: static + value: 'NO' + partner_zip: + parser: regex + regex: '([,]\s\d{4})\s\w+' + partner_email: + parser: regex + regex: '\w+[@]\w+[.]com' + date: + parser: regex + regex: Invoice date[:]\s+(\d+-\d{2}-\d{2}) + type: date + date_due: + parser: regex + regex: 'Due date[:]\s+(\d{4}[-]\d{2}[-]\d{2})' + type: date + iban: + parser: regex + regex: (?:[A-Z]{2}[ \-]?[0-9]{2})(?:[ \-]?[A-Z0-9]{3,5}){2,7} + bic: + parser: regex + regex: SWIFT code[:]\s+(\w{8,11}) + lines: + parser: lines + start: 'Description' + end: 'Total' + line: + - '(?P[\S ]+)\s+(?P\d+)\s+(?P\d+[,.]\d{2})\s+(?P\d+[,.]\d{2})\s+(?P\d+[,.]\d{2})' + types: + qty: float + price_subtotal: float + line_amount_tax: float + unit_price: float +keywords: + - 'Runbox' + - 'Invoice' +required_fields: + - lines +options: + languages: + - en + currency: EUR + diff --git a/src/invoice2data/extract/templates/com/com.vultr.yml b/src/invoice2data/extract/templates/com/com.vultr.yml new file mode 100644 index 00000000..04b0dbd4 --- /dev/null +++ b/src/invoice2data/extract/templates/com/com.vultr.yml @@ -0,0 +1,82 @@ +issuer: Vultr The Constant Company, LLC. +fields: + amount: + parser: regex # done + regex: \s+Total\s[(]\w+ \w+[)][:]\s+.(d*[.,]?\d+[,.]\d+) + type: float + amount_tax: # done + parser: regex + regex: \d+[,.]\d+[%):]+\s+.(\d*[.,]?\d+[,.]\d+) + type: float + amount_untaxed: # done + parser: regex + regex: \s+Sub Total[:]\s+.(d*[.,]?\d+[,.]\d+) + type: float + invoice_number: # done + parser: regex + regex: Invoice Number[:] (\d+) + partner_website: # done + parser: static + value: vultr.com + partner_name: # done + parser: regex + regex: Vultr + partner_city: # done + parser: regex + regex: West Palm Beach + state_code: # done + parser: regex + regex: FL + country_code: # done + parser: static + value: US + partner_zip: # done + parser: regex + regex: '([A-Z]{2}\s\d{5})' + partner_street: # done + parser: regex + regex: '\d{3} \w+ Street' + currency_symbol: + parser: regex + regex: '[$€]' + date: # done + parser: regex + regex: Invoice Date[:] (\d+-\d{2}-\d{2}) + type: date + date_due: # done + parser: regex + regex: 'Please Pay By[:] (\w+\s\d{2},\s\d{4})' + type: date + vat: # done + parser: regex # done + regex: 'VAT ID (\w+)' + lines: # done + parser: lines + start: 'Start' + end: 'Total' + line: + - '(?P\d{2}-\d{2}\s\d{2}[:]\d{2})\s+(?P\d{2}-\d{2}\s\d{2}[:]\d{2})\s+(?P[\S ]+)\s+(?P\d+)\s+.(?P\d+[,.]\d{2})' + types: + qty: float + price_subtotal: float + date_start: date + date_end: date + tax_lines: + parser: lines + start: 'Start' + end: '(?i)Please' + line: + - '(?P\d+[,.]\d+)[%):]+\s+.(?P(\d*[.,]?\d+[,.]\d+))' + types: + line_tax_percent: float + line_tax_amount: float +keywords: + - 'vultr' + - 'Invoice' +required_fields: + - lines +options: + languages: + - en + currency: USD + diff --git a/src/invoice2data/extract/templates/nl/nl.fletcher.yml b/src/invoice2data/extract/templates/nl/nl.fletcher.yml new file mode 100644 index 00000000..ad858563 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.fletcher.yml @@ -0,0 +1,104 @@ +issuer: Fletcher Hotel Exploitaties B.V. +fields: + amount: + parser: regex + regex: Totaal\s+(d*[.,]?\d+[,.]\d+) + type: float + amount_tax: + parser: regex + regex: BTW\s+\d+[,.]\d+[%]+\s+Verrekenbaar\s+\d*[.,]?\d+[,.]\d+\s+(\d*[.,]?\d+[,.]\d+)\s+\d*[.,]?\d+[,.]\d+ + type: float + invoice_number: + parser: regex + regex: Factuurnummer+\s+(\d+) + partner_website: + parser: static + value: fletcher.nl + partner_coc: + parser: static + value: '30144691' + partner_name: + parser: static + value: Fletcher Hotel Exploitaties B.V. + partner_street: + parser: static + value: Buizerdlaan 2 + partner_city: + parser: static + value: Nieuwegein + country_code: + parser: static + value: 'NL' + partner_zip: + parser: static + value: '3435 SB' + vat: + parser: regex + regex: BTW[:] (\S+) + date: + parser: regex + regex: Factuur datum\s+(\d+-\d{2}-\d{4}) + type: date + date_due: + parser: regex + regex: 'Te betalen voor\s+(\d+[-]\d{2}[-]\d{4})' + type: date + iban: + parser: regex + regex: (?:[A-Z]{2}[ \-]?[0-9]{2})(?:[ \-]?[A-Z0-9]{3,5}){2,7} + bic: + parser: regex + regex: BIC[:]\s(\w{8,11}) + payment_method: + - '(?i)(AMERICAN EXPRESS)' + - (Eurocard/Mastercard) + lines: + parser: lines + rules: + - start: 'Datum\s+Omschrijving' + end: 'Totaal factuur' + line: + - '(?P\d+-\d{2}-\d{4})\s{11}(?P.+)\s{10,40}(?P\d*)\s+(?P\d+[,.]\d{2})' + types: + qty: float + price_subtotal: float + line_amount_tax: float + unit_price: float + date_start: date + - start: 'Totaal factuur' + end: 'Totaal betaald' + line: + - (?PBetalingen) + - '^(?P\d+[-]\d{2}[-]\d{4}.+)' + tax_lines: + parser: lines + rules: + - start: 'Incl. BTW' + end: '(i)Totaal\s+' + line: 'BTW\s+(?P\d+[,.]\d+)[%]+\s+.(?P\d*[.,]?\d+[,.]\d+)\s+(?P(\d*[.,]?\d+[,.]\d+))\s+(?P(\d*[.,]?\d+[,.]\d+))' + types: + line_tax_percent: float + line_tax_amount: float + price_subtotal: float + price_total: float + - start: 'Incl. BTW' + end: '(?i)Totaal\s+' + line: 'BTW\s+(?P\d+[,.]\d+)[%]+\s+Verrekenbaar\s+(?P\d*[.,]?\d+[,.]\d+)\s+(?P\d*[.,]?\d+[,.]\d+)\s+(?P\d*[.,]?\d+[,.]\d+)' + types: + line_tax_percent: float + line_tax_amount: float + price_subtotal: float + price_total: float +keywords: + - '30144691' + - 'Factuur' +required_fields: + - amount_tax +options: + decimal_separator: "," + languages: + - nl + replace: + - ['\s(\d+)[.](\d{2})', ' \1,\2'] + currency: EUR + diff --git a/src/invoice2data/extract/templates/nl/nl.makro.jsonold b/src/invoice2data/extract/templates/nl/nl.makro.jsonold new file mode 100644 index 00000000..98b18eb2 --- /dev/null +++ b/src/invoice2data/extract/templates/nl/nl.makro.jsonold @@ -0,0 +1,155 @@ +{ + "issuer": "Makro", + "fields": { + "amount": { + "parser": "regex", + "regex": [ + "Te betalen\\s+(\\d+.\\d{2})", + "Totaal[:]\\s+(\\d+.\\d{2})\\sEUR" + ], + "type": "float" + }, + "amount_untaxed": { + "parser": "regex", + "regex": [ + "Netto totaal[:]\\s+(\\d+[,]\\d{2})" + ], + "type": "float" + }, + "date": { + "parser": "regex", + "regex": [ + "Factuurdatum\\s.?\\s+(\\d{2}-\\d{2}-\\d{4}\\s+\\d{2}[:]\\d{2})" + ], + "type": "date" + }, + "invoice_number": { + "parser": "regex", + "regex": [ + "Factuurnummer[:]\\s+(\\S+)" + ] + }, + "vat": { + "parser": "regex", + "regex": [ + "OB\\s+nr[:]\\s+((?:BE|NL)\\w+)" + ] + }, + "partner_coc": { + "parser": "regex", + "regex": [ + "K[.]v[.]K[:]\\s+(\\d{8})" + ] + }, + "partner_website": { + "parser": "regex", + "regex": [ + "www[.](\\w+[.]\\w{2})" + ], + "group": "first" + }, + "telephone": { + "parser": "regex", + "regex": [ + "Telefoon[:]\\s+(\\d+[-]\\d{4,9})" + ], + "group": "first" + }, + "partner_name": { + "parser": "regex", + "regex": [ + "(Metro\\sCash\\s[&]\\sCarry\\sNederland\\sB[.]V[.])" + ] + }, + "partner_city": { + "parser": "regex", + "regex": "Wateringen" + }, + "partner_zip": { + "parser": "regex", + "regex": "2290 AD" + }, + "country_code": { + "parser": "regex", + "regex": "[.](nl|be)" + }, + "bic": { + "parser": "regex", + "regex": [ + "BIC\\s.\\s(\\w{8,11})" + ] + }, + "iban": { + "parser": "regex", + "regex": [ + "IBAN.\\s([A-Z]{2}\\d{2} ?\\w{4} ?\\d{4} ?\\d{4} ?\\d{0,2})" + ] + }, + "payment_method": [ + "(?i)(AMEX)", + "(?i)(American express)", + "(?i)(VISA)", + "(?i)(Vpay)", + "(?i)(Mastercard)", + "(?i)(CONTANT)", + "(?i)(KAS):\\s.\\s\\d+\\.\\d+" + ], + "lines": { + "parser": "lines", + "rules": [ + { + "start": "na korting", + "end": "-{134}\\n\\s{18}", + "line": [ + "(?P\\d{13})\\s+(?P(\\w+(?:\\s\\S+)*))\\s+(?P\\d+[,.]\\d+)\\s+(?P(\\d+([,.]\\d{3})?))\\s(?P\\w+)\\s+(?P\\d+[,.]\\d{2})\\s+(?P\\d)\\s+(?P\\d+[,.]\\d{2})\\s+(?P\\d{1,2})\\s+(?P\\d+)?\\s+(?P\\d+[,.]\\d{3})", + "---(?P(\\w+(?:\\s\\S+)*))---" + ] + }, + { + "start": ",\\d{3}\\n-{134}", + "end": "\\Z", + "line": [ + "(?P((?:\\S+\\s)?\\w+(?:\\s\\S+)*))\\s+(?P\\d+[,.]\\d{2}[-])\\s+(?P\\d)\\s+(?P(\\d+))?" + ] + } + ], + "types": { + "qty": "float", + "price_unit": "float", + "line_tax_percent": "float", + "price_subtotal": "float", + "collo": "float", + "stukspereenheid": "float", + "prijs_stuk_nakorting": "float" + } + } + }, + "keywords": [ + "Makro", + "NL001799435B01" + ], + "options": { + "date_formats": [ + "%d %m %Y" + ], + "currency": "EUR", + "languages": [ + "nl" + ], + "decimal_separator": ".", + "replace": [ + [ + ",", + "." + ], + [ + "\\s(?P(\\d+.\\d+))(?P(\\s+))(?P(\\d+(.\\d{3})?))\\s(?P\\w+)\\s+(?P\\d+.\\d{2})\\s+(?P\\d)\\s+(?P\\d+.\\d{2})\\s+(?P1)\\s", + " \\g\\g\\g \\g \\g \\g \\g 21 " + ], + [ + "\\s(?P(\\d+.\\d+))(?P(\\s+))(?P(\\d+(.\\d{3})?))\\s(?P\\w+)\\s+(?P\\d+.\\d{2})\\s+(?P\\d)\\s+(?P\\d+.\\d{2})\\s+(?P5)\\s", + " \\g\\g\\g \\g \\g \\g \\g 9 " + ] + ] + } +} \ No newline at end of file