-
Notifications
You must be signed in to change notification settings - Fork 102
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #62 from dumbPy/feature/generic_importer
Add generic importer source
- Loading branch information
Showing
15 changed files
with
594 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
"""This module implements a Source Subclass for wrapping | ||
`beancount.ingest.importer.ImporterProtocol` subclasses importers. | ||
The importers are considered athoritative of the account they represent. | ||
The Transaction.narration set by each importer is copied to Posting.meta[source_desc] | ||
This helps in predicting postings for similar transaction while allowing the | ||
user to change the Transaction description and payee from UI | ||
(see readme.md for more on source_desc) | ||
This `source_desc` meta is also used for check cleared postings and should not be | ||
changed manually | ||
Author: Sufiyan Adhikari(github.com/dumbPy) | ||
""" | ||
|
||
import os | ||
from glob import glob | ||
from collections import OrderedDict | ||
import itertools | ||
from typing import Hashable, List, Dict, Optional | ||
|
||
from beancount.core.data import Transaction, Posting, Directive | ||
from beancount.core.amount import Amount | ||
from beancount.ingest.importer import ImporterProtocol | ||
from beancount.ingest.cache import get_file | ||
|
||
from ..matching import FIXME_ACCOUNT, SimpleInventory | ||
from . import ImportResult, SourceResults | ||
from ..journal_editor import JournalEditor | ||
from .description_based_source import DescriptionBasedSource, get_pending_and_invalid_entries | ||
from .mint import _get_key_from_posting | ||
|
||
|
||
class ImporterSource(DescriptionBasedSource): | ||
def __init__(self, | ||
directory: str, | ||
account: str, | ||
importer: ImporterProtocol, | ||
**kwargs) -> None: | ||
super().__init__(**kwargs) | ||
self.directory = os.path.expanduser(directory) | ||
self.importer = importer | ||
self.account = account | ||
|
||
# get _FileMemo object for each file | ||
files = [get_file(f) for f in | ||
filter(os.path.isfile, | ||
glob(os.path.join(directory, '**', '*'), recursive=True) | ||
) | ||
] | ||
# filter the valid files for this importer | ||
self.files = [f for f in files if self.importer.identify(f)] | ||
|
||
@property | ||
def name(self) -> str: | ||
return self.importer.name() | ||
|
||
def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None: | ||
results.add_account(self.account) | ||
|
||
entries = OrderedDict() #type: Dict[Hashable, List[Directive]] | ||
for f in self.files: | ||
f_entries = self.importer.extract(f, existing_entries=journal.entries) | ||
# collect all entries in current statement, grouped by hash | ||
hashed_entries = OrderedDict() #type: Dict[Hashable, Directive] | ||
for entry in f_entries: | ||
key_ = self._get_key_from_imported_entry(entry) | ||
self._add_description(entry) | ||
hashed_entries.setdefault(key_, []).append(entry) | ||
# deduplicate across statements | ||
for key_ in hashed_entries: | ||
# skip the existing entries from other statements. add remaining | ||
if not key_ in entries: | ||
n = 0 | ||
else: | ||
n = len(entries[key_]) | ||
entries.setdefault(key_, []).extend(hashed_entries[key_][n:]) | ||
|
||
get_pending_and_invalid_entries( | ||
raw_entries=list(itertools.chain.from_iterable(entries.values())), | ||
journal_entries=journal.all_entries, | ||
account_set=set([self.account]), | ||
get_key_from_posting=_get_key_from_posting, | ||
get_key_from_raw_entry=self._get_key_from_imported_entry, | ||
make_import_result=self._make_import_result, | ||
results=results) | ||
|
||
def _add_description(self, entry: Transaction): | ||
if not isinstance(entry, Transaction): return None | ||
postings = entry.postings #type: List[Posting] | ||
to_mutate = [] | ||
for i, posting in enumerate(postings): | ||
if posting.account != self.account: continue | ||
if isinstance(posting.meta, dict): | ||
posting.meta["source_desc"] = entry.narration | ||
posting.meta["date"] = entry.date | ||
break | ||
else: | ||
to_mutate.append(i) | ||
break | ||
for i in to_mutate: | ||
p = postings.pop(i) | ||
p = Posting(p.account, p.units, p.cost, p.price, p.flag, | ||
{"source_desc":entry.narration, "date": entry.date}) | ||
postings.insert(i, p) | ||
|
||
def _get_source_posting(self, entry:Transaction) -> Optional[Posting]: | ||
for posting in entry.postings: | ||
if posting.account == self.account: | ||
return posting | ||
return None | ||
|
||
def _get_key_from_imported_entry(self, entry:Transaction) -> Hashable: | ||
source_posting = self._get_source_posting(entry) | ||
if source_posting is None: | ||
raise ValueError("entry has no postings for account: {}".format(self.account)) | ||
return (self.account, | ||
entry.date, | ||
source_posting.units, | ||
entry.narration) | ||
|
||
def _make_import_result(self, imported_entry:Directive): | ||
if isinstance(imported_entry, Transaction): balance_amounts(imported_entry) | ||
result = ImportResult( | ||
date=imported_entry.date, info=get_info(imported_entry), entries=[imported_entry]) | ||
# delete filename since it is used by beancount-import to determine if the | ||
# entry is from journal. | ||
imported_entry.meta.pop('filename') | ||
return result | ||
|
||
|
||
def get_info(raw_entry: Directive) -> dict: | ||
return dict( | ||
type=get_file(raw_entry.meta['filename']).mimetype(), | ||
filename=raw_entry.meta['filename'], | ||
line=raw_entry.meta['lineno'], | ||
) | ||
|
||
def balance_amounts(txn:Transaction)-> None: | ||
"""Add FIXME account for the remaing amount to balance accounts""" | ||
inventory = SimpleInventory() | ||
for posting in txn.postings: | ||
inventory += posting.units | ||
for currency in inventory: | ||
txn.postings.append( | ||
Posting( | ||
account=FIXME_ACCOUNT, | ||
units=Amount(currency=currency, number=-inventory[currency]), | ||
cost=None, | ||
price=None, | ||
flag=None, | ||
meta={}, | ||
)) | ||
|
||
|
||
def load(spec, log_status): | ||
return ImporterSource(log_status=log_status, **spec) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
|
||
import pytest | ||
|
||
from .source_test import check_source_example | ||
from beancount.ingest.importers.csv import Importer as CSVImporter, Col | ||
|
||
testdata_dir = os.path.realpath( | ||
os.path.join( | ||
os.path.dirname(__file__), '..', '..', 'testdata', 'source', 'generic_importer')) | ||
|
||
examples = [ | ||
'test_basic', | ||
'test_invalid', | ||
'test_training_examples' | ||
] | ||
|
||
importer = CSVImporter({Col.DATE: 'Date', | ||
Col.NARRATION1: 'Description', | ||
Col.AMOUNT: 'Amount', | ||
}, | ||
'Assets:Bank', | ||
'USD', | ||
'"Date","Description","Amount"', | ||
) | ||
|
||
|
||
@pytest.mark.parametrize('name', examples) | ||
def test_source(name: str): | ||
check_source_example( | ||
example_dir=os.path.join(testdata_dir, name), | ||
source_spec={ | ||
'module': 'beancount_import.source.generic_importer_source', | ||
'directory': testdata_dir, | ||
'account': 'Assets:Bank', | ||
'importer': importer, | ||
}, | ||
replacements=[(testdata_dir, '<testdata>')]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"Date","Description","Amount" | ||
2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-,-1 | ||
2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-,-1 | ||
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1 | ||
2020-01-02,ATM-WD Some Random ATM Machine,500 | ||
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1 | ||
2020-01-05,Transfer to 1234567890123,300 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Assets:Bank |
113 changes: 113 additions & 0 deletions
113
testdata/source/generic_importer/test_basic/import_results.beancount
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
;; date: 2020-01-01 | ||
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 1, "type": "text/csv"} | ||
|
||
; features: [ | ||
; { | ||
; "amount": "-1 USD", | ||
; "date": "2020-01-01", | ||
; "key_value_pairs": { | ||
; "desc": "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" | ||
; }, | ||
; "source_account": "Assets:Bank" | ||
; } | ||
; ] | ||
2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" | ||
Assets:Bank -1 USD | ||
date: 2020-01-01 | ||
source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" | ||
Expenses:FIXME 1 USD | ||
|
||
;; date: 2020-01-01 | ||
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 2, "type": "text/csv"} | ||
|
||
; features: [ | ||
; { | ||
; "amount": "-1 USD", | ||
; "date": "2020-01-01", | ||
; "key_value_pairs": { | ||
; "desc": "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" | ||
; }, | ||
; "source_account": "Assets:Bank" | ||
; } | ||
; ] | ||
2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" | ||
Assets:Bank -1 USD | ||
date: 2020-01-01 | ||
source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" | ||
Expenses:FIXME 1 USD | ||
|
||
;; date: 2020-01-02 | ||
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 3, "type": "text/csv"} | ||
|
||
; features: [ | ||
; { | ||
; "amount": "1 USD", | ||
; "date": "2020-01-02", | ||
; "key_value_pairs": { | ||
; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
; }, | ||
; "source_account": "Assets:Bank" | ||
; } | ||
; ] | ||
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
Assets:Bank 1 USD | ||
date: 2020-01-02 | ||
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
Expenses:FIXME -1 USD | ||
|
||
;; date: 2020-01-02 | ||
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 5, "type": "text/csv"} | ||
|
||
; features: [ | ||
; { | ||
; "amount": "1 USD", | ||
; "date": "2020-01-02", | ||
; "key_value_pairs": { | ||
; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
; }, | ||
; "source_account": "Assets:Bank" | ||
; } | ||
; ] | ||
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
Assets:Bank 1 USD | ||
date: 2020-01-02 | ||
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
Expenses:FIXME -1 USD | ||
|
||
;; date: 2020-01-02 | ||
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 4, "type": "text/csv"} | ||
|
||
; features: [ | ||
; { | ||
; "amount": "500 USD", | ||
; "date": "2020-01-02", | ||
; "key_value_pairs": { | ||
; "desc": "ATM-WD Some Random ATM Machine" | ||
; }, | ||
; "source_account": "Assets:Bank" | ||
; } | ||
; ] | ||
2020-01-02 * "ATM-WD Some Random ATM Machine" | ||
Assets:Bank 500 USD | ||
date: 2020-01-02 | ||
source_desc: "ATM-WD Some Random ATM Machine" | ||
Expenses:FIXME -500 USD | ||
|
||
;; date: 2020-01-05 | ||
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 6, "type": "text/csv"} | ||
|
||
; features: [ | ||
; { | ||
; "amount": "300 USD", | ||
; "date": "2020-01-05", | ||
; "key_value_pairs": { | ||
; "desc": "Transfer to 1234567890123" | ||
; }, | ||
; "source_account": "Assets:Bank" | ||
; } | ||
; ] | ||
2020-01-05 * "Transfer to 1234567890123" | ||
Assets:Bank 300 USD | ||
date: 2020-01-05 | ||
source_desc: "Transfer to 1234567890123" | ||
Expenses:FIXME -300 USD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1900-01-01 open Assets:Bank |
1 change: 1 addition & 0 deletions
1
testdata/source/generic_importer/test_basic/training_examples.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Assets:Bank |
1 change: 1 addition & 0 deletions
1
testdata/source/generic_importer/test_invalid/import_results.beancount
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
68 changes: 68 additions & 0 deletions
68
testdata/source/generic_importer/test_invalid/journal.beancount
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
1900-01-01 open Assets:Bank | ||
1900-01-01 open Assets:Cash | ||
1900-01-01 open Expenses:Misc | ||
1900-01-01 open Liabilities:JohnDoe | ||
|
||
|
||
2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" | ||
Assets:Bank -1 USD | ||
source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" | ||
count: 1 | ||
date: 2020-01-01 | ||
cleared: TRUE | ||
invalid0: "1 extra" | ||
Expenses:Misc 1 USD | ||
|
||
2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" | ||
Assets:Bank -1 USD | ||
source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-" | ||
count: 2 | ||
date: 2020-01-01 | ||
cleared: TRUE | ||
invalid0: "1 extra" | ||
Expenses:Misc 1 USD | ||
|
||
2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" | ||
Assets:Bank -1 USD | ||
source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-" | ||
count: 3 | ||
date: 2020-01-01 | ||
cleared: TRUE | ||
Expenses:Misc 1 USD | ||
|
||
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
Assets:Bank 1 USD | ||
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
date: 2020-01-02 | ||
cleared: TRUE | ||
Expenses:Misc -1 USD | ||
|
||
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
Assets:Bank 1 USD | ||
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-" | ||
date: 2020-01-02 | ||
cleared: TRUE | ||
Expenses:Misc -1 USD | ||
|
||
2020-01-02 * "ATM-WD Some Random ATM Machine" | ||
Assets:Bank 500 USD | ||
source_desc: "ATM-WD Some Random ATM Machine" | ||
date: 2020-01-02 | ||
cleared: TRUE | ||
Assets:Cash -500 USD | ||
|
||
2020-01-05 * "Transfer to 1234567890123" | ||
Assets:Bank 300 USD | ||
source_desc: "Transfer to 1234567890123" | ||
date: 2020-01-05 | ||
cleared: TRUE | ||
Liabilities:JohnDoe -300 USD | ||
|
||
2020-01-06 * "Transfer to 1234567890321" | ||
info: "doesn't exist in statement hence invalid" | ||
Assets:Bank 111.11 USD | ||
source_desc: "Transfer to 1234567890123" | ||
date: 2020-01-05 | ||
cleared: TRUE | ||
invalid1: "1 extra" | ||
Liabilities:JohnDoe -111.11 USD |
Oops, something went wrong.