diff --git a/beancount_import/source/generic_importer_source.py b/beancount_import/source/generic_importer_source.py new file mode 100644 index 00000000..da0e458c --- /dev/null +++ b/beancount_import/source/generic_importer_source.py @@ -0,0 +1,156 @@ +"""This module implements a Source Subclass for wrapping +`beancount.ingest.importer.ImporterProtocol` subclasses importers. +The importers are considered athoritative of the account they represent. + +The Transaction.narration set by each importer is copied to Posting.meta[source_desc] +This helps in predicting postings for similar transaction while allowing the +user to change the Transaction description and payee from UI +(see readme.md for more on source_desc) +This `source_desc` meta is also used for check cleared postings and should not be +changed manually + +Author: Sufiyan Adhikari(github.com/dumbPy) +""" + +import os +from glob import glob +from collections import OrderedDict +import itertools +from typing import Hashable, List, Dict, Optional + +from beancount.core.data import Transaction, Posting, Directive +from beancount.core.amount import Amount +from beancount.ingest.importer import ImporterProtocol +from beancount.ingest.cache import get_file + +from ..matching import FIXME_ACCOUNT, SimpleInventory +from . import ImportResult, SourceResults +from ..journal_editor import JournalEditor +from .description_based_source import DescriptionBasedSource, get_pending_and_invalid_entries +from .mint import _get_key_from_posting + + +class ImporterSource(DescriptionBasedSource): + def __init__(self, + directory: str, + account: str, + importer: ImporterProtocol, + **kwargs) -> None: + super().__init__(**kwargs) + self.directory = os.path.expanduser(directory) + self.importer = importer + self.account = account + + # get _FileMemo object for each file + files = [get_file(f) for f in + filter(os.path.isfile, + glob(os.path.join(directory, '**', '*'), recursive=True) + ) + ] + # filter the valid files for this importer + self.files = [f for f in files if self.importer.identify(f)] + + @property + def name(self) -> str: + return self.importer.name() + + def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None: + results.add_account(self.account) + + entries = OrderedDict() #type: Dict[Hashable, List[Directive]] + for f in self.files: + f_entries = self.importer.extract(f, existing_entries=journal.entries) + # collect all entries in current statement, grouped by hash + hashed_entries = OrderedDict() #type: Dict[Hashable, Directive] + for entry in f_entries: + key_ = self._get_key_from_imported_entry(entry) + self._add_description(entry) + hashed_entries.setdefault(key_, []).append(entry) + # deduplicate across statements + for key_ in hashed_entries: + # skip the existing entries from other statements. add remaining + if not key_ in entries: + n = 0 + else: + n = len(entries[key_]) + entries.setdefault(key_, []).extend(hashed_entries[key_][n:]) + + get_pending_and_invalid_entries( + raw_entries=list(itertools.chain.from_iterable(entries.values())), + journal_entries=journal.all_entries, + account_set=set([self.account]), + get_key_from_posting=_get_key_from_posting, + get_key_from_raw_entry=self._get_key_from_imported_entry, + make_import_result=self._make_import_result, + results=results) + + def _add_description(self, entry: Transaction): + if not isinstance(entry, Transaction): return None + postings = entry.postings #type: List[Posting] + to_mutate = [] + for i, posting in enumerate(postings): + if posting.account != self.account: continue + if isinstance(posting.meta, dict): + posting.meta["source_desc"] = entry.narration + posting.meta["date"] = entry.date + break + else: + to_mutate.append(i) + break + for i in to_mutate: + p = postings.pop(i) + p = Posting(p.account, p.units, p.cost, p.price, p.flag, + {"source_desc":entry.narration, "date": entry.date}) + postings.insert(i, p) + + def _get_source_posting(self, entry:Transaction) -> Optional[Posting]: + for posting in entry.postings: + if posting.account == self.account: + return posting + return None + + def _get_key_from_imported_entry(self, entry:Transaction) -> Hashable: + source_posting = self._get_source_posting(entry) + if source_posting is None: + raise ValueError("entry has no postings for account: {}".format(self.account)) + return (self.account, + entry.date, + source_posting.units, + entry.narration) + + def _make_import_result(self, imported_entry:Directive): + if isinstance(imported_entry, Transaction): balance_amounts(imported_entry) + result = ImportResult( + date=imported_entry.date, info=get_info(imported_entry), entries=[imported_entry]) + # delete filename since it is used by beancount-import to determine if the + # entry is from journal. + imported_entry.meta.pop('filename') + return result + + +def get_info(raw_entry: Directive) -> dict: + return dict( + type=get_file(raw_entry.meta['filename']).mimetype(), + filename=raw_entry.meta['filename'], + line=raw_entry.meta['lineno'], + ) + +def balance_amounts(txn:Transaction)-> None: + """Add FIXME account for the remaing amount to balance accounts""" + inventory = SimpleInventory() + for posting in txn.postings: + inventory += posting.units + for currency in inventory: + txn.postings.append( + Posting( + account=FIXME_ACCOUNT, + units=Amount(currency=currency, number=-inventory[currency]), + cost=None, + price=None, + flag=None, + meta={}, + )) + + +def load(spec, log_status): + return ImporterSource(log_status=log_status, **spec) diff --git a/beancount_import/source/generic_importer_source_test.py b/beancount_import/source/generic_importer_source_test.py new file mode 100644 index 00000000..087a3543 --- /dev/null +++ b/beancount_import/source/generic_importer_source_test.py @@ -0,0 +1,38 @@ +import os + +import pytest + +from .source_test import check_source_example +from beancount.ingest.importers.csv import Importer as CSVImporter, Col + +testdata_dir = os.path.realpath( + os.path.join( + os.path.dirname(__file__), '..', '..', 'testdata', 'source', 'generic_importer')) + +examples = [ + 'test_basic', + 'test_invalid', + 'test_training_examples' +] + +importer = CSVImporter({Col.DATE: 'Date', + Col.NARRATION1: 'Description', + Col.AMOUNT: 'Amount', + }, + 'Assets:Bank', + 'USD', + '"Date","Description","Amount"', + ) + + +@pytest.mark.parametrize('name', examples) +def test_source(name: str): + check_source_example( + example_dir=os.path.join(testdata_dir, name), + source_spec={ + 'module': 'beancount_import.source.generic_importer_source', + 'directory': testdata_dir, + 'account': 'Assets:Bank', + 'importer': importer, + }, + replacements=[(testdata_dir, '')]) diff --git a/testdata/source/generic_importer/generic_statement.csv b/testdata/source/generic_importer/generic_statement.csv new file mode 100644 index 00000000..5d46d425 --- /dev/null +++ b/testdata/source/generic_importer/generic_statement.csv @@ -0,0 +1,7 @@ +"Date","Description","Amount" +2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-,-1 +2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-,-1 +2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1 +2020-01-02,ATM-WD Some Random ATM Machine,500 +2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1 +2020-01-05,Transfer to 1234567890123,300 diff --git a/testdata/source/generic_importer/test_basic/accounts.txt b/testdata/source/generic_importer/test_basic/accounts.txt new file mode 100644 index 00000000..255958da --- /dev/null +++ b/testdata/source/generic_importer/test_basic/accounts.txt @@ -0,0 +1 @@ +Assets:Bank diff --git a/testdata/source/generic_importer/test_basic/import_results.beancount b/testdata/source/generic_importer/test_basic/import_results.beancount new file mode 100644 index 00000000..1912e28a --- /dev/null +++ b/testdata/source/generic_importer/test_basic/import_results.beancount @@ -0,0 +1,113 @@ +;; date: 2020-01-01 +;; info: {"filename": "/generic_statement.csv", "line": 1, "type": "text/csv"} + +; features: [ +; { +; "amount": "-1 USD", +; "date": "2020-01-01", +; "key_value_pairs": { +; "desc": "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" +; }, +; "source_account": "Assets:Bank" +; } +; ] +2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + Assets:Bank -1 USD + date: 2020-01-01 + source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + Expenses:FIXME 1 USD + +;; date: 2020-01-01 +;; info: {"filename": "/generic_statement.csv", "line": 2, "type": "text/csv"} + +; features: [ +; { +; "amount": "-1 USD", +; "date": "2020-01-01", +; "key_value_pairs": { +; "desc": "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" +; }, +; "source_account": "Assets:Bank" +; } +; ] +2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + Assets:Bank -1 USD + date: 2020-01-01 + source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + Expenses:FIXME 1 USD + +;; date: 2020-01-02 +;; info: {"filename": "/generic_statement.csv", "line": 3, "type": "text/csv"} + +; features: [ +; { +; "amount": "1 USD", +; "date": "2020-01-02", +; "key_value_pairs": { +; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" +; }, +; "source_account": "Assets:Bank" +; } +; ] +2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Assets:Bank 1 USD + date: 2020-01-02 + source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Expenses:FIXME -1 USD + +;; date: 2020-01-02 +;; info: {"filename": "/generic_statement.csv", "line": 5, "type": "text/csv"} + +; features: [ +; { +; "amount": "1 USD", +; "date": "2020-01-02", +; "key_value_pairs": { +; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" +; }, +; "source_account": "Assets:Bank" +; } +; ] +2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Assets:Bank 1 USD + date: 2020-01-02 + source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Expenses:FIXME -1 USD + +;; date: 2020-01-02 +;; info: {"filename": "/generic_statement.csv", "line": 4, "type": "text/csv"} + +; features: [ +; { +; "amount": "500 USD", +; "date": "2020-01-02", +; "key_value_pairs": { +; "desc": "ATM-WD Some Random ATM Machine" +; }, +; "source_account": "Assets:Bank" +; } +; ] +2020-01-02 * "ATM-WD Some Random ATM Machine" + Assets:Bank 500 USD + date: 2020-01-02 + source_desc: "ATM-WD Some Random ATM Machine" + Expenses:FIXME -500 USD + +;; date: 2020-01-05 +;; info: {"filename": "/generic_statement.csv", "line": 6, "type": "text/csv"} + +; features: [ +; { +; "amount": "300 USD", +; "date": "2020-01-05", +; "key_value_pairs": { +; "desc": "Transfer to 1234567890123" +; }, +; "source_account": "Assets:Bank" +; } +; ] +2020-01-05 * "Transfer to 1234567890123" + Assets:Bank 300 USD + date: 2020-01-05 + source_desc: "Transfer to 1234567890123" + Expenses:FIXME -300 USD diff --git a/testdata/source/generic_importer/test_basic/journal.beancount b/testdata/source/generic_importer/test_basic/journal.beancount new file mode 100644 index 00000000..431f1819 --- /dev/null +++ b/testdata/source/generic_importer/test_basic/journal.beancount @@ -0,0 +1 @@ +1900-01-01 open Assets:Bank diff --git a/testdata/source/generic_importer/test_basic/training_examples.json b/testdata/source/generic_importer/test_basic/training_examples.json new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/testdata/source/generic_importer/test_basic/training_examples.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/testdata/source/generic_importer/test_invalid/accounts.txt b/testdata/source/generic_importer/test_invalid/accounts.txt new file mode 100644 index 00000000..255958da --- /dev/null +++ b/testdata/source/generic_importer/test_invalid/accounts.txt @@ -0,0 +1 @@ +Assets:Bank diff --git a/testdata/source/generic_importer/test_invalid/import_results.beancount b/testdata/source/generic_importer/test_invalid/import_results.beancount new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/testdata/source/generic_importer/test_invalid/import_results.beancount @@ -0,0 +1 @@ + diff --git a/testdata/source/generic_importer/test_invalid/journal.beancount b/testdata/source/generic_importer/test_invalid/journal.beancount new file mode 100644 index 00000000..2fa032d8 --- /dev/null +++ b/testdata/source/generic_importer/test_invalid/journal.beancount @@ -0,0 +1,68 @@ +1900-01-01 open Assets:Bank +1900-01-01 open Assets:Cash +1900-01-01 open Expenses:Misc +1900-01-01 open Liabilities:JohnDoe + + +2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + Assets:Bank -1 USD + source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + count: 1 + date: 2020-01-01 + cleared: TRUE + invalid0: "1 extra" + Expenses:Misc 1 USD + +2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + Assets:Bank -1 USD + source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + count: 2 + date: 2020-01-01 + cleared: TRUE + invalid0: "1 extra" + Expenses:Misc 1 USD + +2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + Assets:Bank -1 USD + source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + count: 3 + date: 2020-01-01 + cleared: TRUE + Expenses:Misc 1 USD + +2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Assets:Bank 1 USD + source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + date: 2020-01-02 + cleared: TRUE + Expenses:Misc -1 USD + +2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Assets:Bank 1 USD + source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + date: 2020-01-02 + cleared: TRUE + Expenses:Misc -1 USD + +2020-01-02 * "ATM-WD Some Random ATM Machine" + Assets:Bank 500 USD + source_desc: "ATM-WD Some Random ATM Machine" + date: 2020-01-02 + cleared: TRUE + Assets:Cash -500 USD + +2020-01-05 * "Transfer to 1234567890123" + Assets:Bank 300 USD + source_desc: "Transfer to 1234567890123" + date: 2020-01-05 + cleared: TRUE + Liabilities:JohnDoe -300 USD + +2020-01-06 * "Transfer to 1234567890321" + info: "doesn't exist in statement hence invalid" + Assets:Bank 111.11 USD + source_desc: "Transfer to 1234567890123" + date: 2020-01-05 + cleared: TRUE + invalid1: "1 extra" + Liabilities:JohnDoe -111.11 USD diff --git a/testdata/source/generic_importer/test_invalid/training_examples.json b/testdata/source/generic_importer/test_invalid/training_examples.json new file mode 100644 index 00000000..d0e7c237 --- /dev/null +++ b/testdata/source/generic_importer/test_invalid/training_examples.json @@ -0,0 +1,90 @@ +[ + [ + { + "amount": "-1 USD", + "date": "2020-01-01", + "key_value_pairs": { + "desc": "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "-1 USD", + "date": "2020-01-01", + "key_value_pairs": { + "desc": "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "-1 USD", + "date": "2020-01-01", + "key_value_pairs": { + "desc": "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "1 USD", + "date": "2020-01-02", + "key_value_pairs": { + "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "1 USD", + "date": "2020-01-02", + "key_value_pairs": { + "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "500 USD", + "date": "2020-01-02", + "key_value_pairs": { + "desc": "ATM-WD Some Random ATM Machine" + }, + "source_account": "Assets:Bank" + }, + "Assets:Cash" + ], + [ + { + "amount": "300 USD", + "date": "2020-01-05", + "key_value_pairs": { + "desc": "Transfer to 1234567890123" + }, + "source_account": "Assets:Bank" + }, + "Liabilities:JohnDoe" + ], + [ + { + "amount": "111.11 USD", + "date": "2020-01-05", + "key_value_pairs": { + "desc": "Transfer to 1234567890123" + }, + "source_account": "Assets:Bank" + }, + "Liabilities:JohnDoe" + ] +] \ No newline at end of file diff --git a/testdata/source/generic_importer/test_training_examples/accounts.txt b/testdata/source/generic_importer/test_training_examples/accounts.txt new file mode 100644 index 00000000..255958da --- /dev/null +++ b/testdata/source/generic_importer/test_training_examples/accounts.txt @@ -0,0 +1 @@ +Assets:Bank diff --git a/testdata/source/generic_importer/test_training_examples/import_results.beancount b/testdata/source/generic_importer/test_training_examples/import_results.beancount new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/testdata/source/generic_importer/test_training_examples/import_results.beancount @@ -0,0 +1 @@ + diff --git a/testdata/source/generic_importer/test_training_examples/journal.beancount b/testdata/source/generic_importer/test_training_examples/journal.beancount new file mode 100644 index 00000000..0db95193 --- /dev/null +++ b/testdata/source/generic_importer/test_training_examples/journal.beancount @@ -0,0 +1,47 @@ +1900-01-01 open Assets:Bank +1900-01-01 open Assets:Cash +1900-01-01 open Expenses:Misc +1900-01-01 open Liabilities:JohnDoe + + +2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + Assets:Bank -1 USD + source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + date: 2020-01-01 + cleared: TRUE + Expenses:Misc 1 USD + +2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + Assets:Bank -1 USD + source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + date: 2020-01-01 + cleared: TRUE + Expenses:Misc 1 USD + +2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Assets:Bank 1 USD + source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + date: 2020-01-02 + cleared: TRUE + Expenses:Misc -1 USD + +2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + Assets:Bank 1 USD + source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + date: 2020-01-02 + cleared: TRUE + Expenses:Misc -1 USD + +2020-01-02 * "ATM-WD Some Random ATM Machine" + Assets:Bank 500 USD + source_desc: "ATM-WD Some Random ATM Machine" + date: 2020-01-02 + cleared: TRUE + Assets:Cash -500 USD + +2020-01-05 * "Transfer to 1234567890123" + Assets:Bank 300 USD + source_desc: "Transfer to 1234567890123" + date: 2020-01-05 + cleared: TRUE + Liabilities:JohnDoe -300 USD diff --git a/testdata/source/generic_importer/test_training_examples/training_examples.json b/testdata/source/generic_importer/test_training_examples/training_examples.json new file mode 100644 index 00000000..cae7eff6 --- /dev/null +++ b/testdata/source/generic_importer/test_training_examples/training_examples.json @@ -0,0 +1,68 @@ +[ + [ + { + "amount": "-1 USD", + "date": "2020-01-01", + "key_value_pairs": { + "desc": "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "-1 USD", + "date": "2020-01-01", + "key_value_pairs": { + "desc": "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "1 USD", + "date": "2020-01-02", + "key_value_pairs": { + "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "1 USD", + "date": "2020-01-02", + "key_value_pairs": { + "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" + }, + "source_account": "Assets:Bank" + }, + "Expenses:Misc" + ], + [ + { + "amount": "500 USD", + "date": "2020-01-02", + "key_value_pairs": { + "desc": "ATM-WD Some Random ATM Machine" + }, + "source_account": "Assets:Bank" + }, + "Assets:Cash" + ], + [ + { + "amount": "300 USD", + "date": "2020-01-05", + "key_value_pairs": { + "desc": "Transfer to 1234567890123" + }, + "source_account": "Assets:Bank" + }, + "Liabilities:JohnDoe" + ] +] \ No newline at end of file