From 286934831af4a3a665dc8584827fd613a893d289 Mon Sep 17 00:00:00 2001 From: Sufiyan Adhikari Date: Sun, 21 Apr 2024 14:28:41 +0530 Subject: [PATCH 1/4] support multiple postings from source account in generic importers closes #232 --- beancount_import/source/generic_importer_source.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/beancount_import/source/generic_importer_source.py b/beancount_import/source/generic_importer_source.py index b4d905ef..c1e7b2f2 100644 --- a/beancount_import/source/generic_importer_source.py +++ b/beancount_import/source/generic_importer_source.py @@ -95,10 +95,8 @@ def _add_description(self, entry: Transaction): if isinstance(posting.meta, dict): posting.meta["source_desc"] = entry.narration posting.meta["date"] = entry.date - break else: to_mutate.append(i) - break for i in to_mutate: p = postings.pop(i) p = Posting(p.account, p.units, p.cost, p.price, p.flag, From d85406c93ae68ab1af103b31ba37e7adb374ba8f Mon Sep 17 00:00:00 2001 From: Sufiyan Adhikari Date: Sun, 21 Apr 2024 14:31:44 +0530 Subject: [PATCH 2/4] allow importers to override source description closes #204 --- beancount_import/source/generic_importer_source.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beancount_import/source/generic_importer_source.py b/beancount_import/source/generic_importer_source.py index c1e7b2f2..a2df6c2b 100644 --- a/beancount_import/source/generic_importer_source.py +++ b/beancount_import/source/generic_importer_source.py @@ -93,8 +93,8 @@ def _add_description(self, entry: Transaction): for i, posting in enumerate(postings): if posting.account != self.account: continue if isinstance(posting.meta, dict): - posting.meta["source_desc"] = entry.narration - posting.meta["date"] = entry.date + posting.meta.setdefault("source_desc", entry.narration) + posting.meta.setdefault("date", entry.date) else: to_mutate.append(i) for i in to_mutate: From 3a25d01e1a7a02d01fa228304344aeb750c6e08a Mon Sep 17 00:00:00 2001 From: Sufiyan Adhikari Date: Sun, 21 Apr 2024 16:15:00 +0530 Subject: [PATCH 3/4] support non-authoritative generic sources --- .../source/generic_importer_source.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/beancount_import/source/generic_importer_source.py b/beancount_import/source/generic_importer_source.py index a2df6c2b..0cdf6463 100644 --- a/beancount_import/source/generic_importer_source.py +++ b/beancount_import/source/generic_importer_source.py @@ -35,8 +35,8 @@ class ImporterSource(DescriptionBasedSource): def __init__(self, directory: str, - account: str, importer: ImporterProtocol, + account: Optional[str]=None, # use None for importers that are not authoritative and would not clear any postings **kwargs) -> None: super().__init__(**kwargs) self.directory = os.path.expanduser(directory) @@ -57,11 +57,16 @@ def name(self) -> str: return self.importer.name() def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None: - results.add_account(self.account) + if self.account: + results.add_account(self.account) entries = OrderedDict() #type: Dict[Hashable, List[Directive]] for f in self.files: f_entries = self.importer.extract(f, existing_entries=journal.entries) + # if the importer is not authoritative, add all entries to pending + if not self.account: + results.add_pending_entries(map(self._make_import_result, f_entries)) + continue # collect all entries in current statement, grouped by hash hashed_entries = OrderedDict() #type: Dict[Hashable, Directive] for entry in f_entries: @@ -77,14 +82,15 @@ def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None: n = len(entries[key_]) entries.setdefault(key_, []).extend(hashed_entries[key_][n:]) - get_pending_and_invalid_entries( - raw_entries=list(itertools.chain.from_iterable(entries.values())), - journal_entries=journal.all_entries, - account_set=set([self.account]), - get_key_from_posting=_get_key_from_posting, - get_key_from_raw_entry=self._get_key_from_imported_entry, - make_import_result=self._make_import_result, - results=results) + if self.account: + get_pending_and_invalid_entries( + raw_entries=list(itertools.chain.from_iterable(entries.values())), + journal_entries=journal.all_entries, + account_set=set([self.account]), + get_key_from_posting=_get_key_from_posting, + get_key_from_raw_entry=self._get_key_from_imported_entry, + make_import_result=self._make_import_result, + results=results) def _add_description(self, entry: Transaction): if not isinstance(entry, Transaction): return None From 0698811973971d502cce2fe39b2331d6014dd5ad Mon Sep 17 00:00:00 2001 From: Sufiyan Adhikari <> Date: Sat, 8 Jun 2024 17:07:05 +0530 Subject: [PATCH 4/4] Add example for optional clearing of postings --- .../source/generic_importer_source.py | 3 +- examples/README.md | 5 ++ examples/data/importers/bank.csv | 4 -- .../importers/single_transaction_email.html | 30 +++++++++ examples/multiple_imports/accounts.beancount | 5 ++ examples/multiple_imports/config.py | 46 ++++++++++++++ .../foo_bar_email_importer.py | 53 ++++++++++++++++ examples/multiple_imports/ignored.beancount | 0 examples/multiple_imports/journal.beancount | 3 + examples/multiple_imports/prices.beancount | 0 examples/multiple_imports/run.py | 62 +++++++++++++++++++ .../multiple_imports/transactions.beancount | 0 12 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 examples/data/importers/single_transaction_email.html create mode 100644 examples/multiple_imports/accounts.beancount create mode 100644 examples/multiple_imports/config.py create mode 100644 examples/multiple_imports/foo_bar_email_importer.py create mode 100644 examples/multiple_imports/ignored.beancount create mode 100644 examples/multiple_imports/journal.beancount create mode 100644 examples/multiple_imports/prices.beancount create mode 100755 examples/multiple_imports/run.py create mode 100644 examples/multiple_imports/transactions.beancount diff --git a/beancount_import/source/generic_importer_source.py b/beancount_import/source/generic_importer_source.py index 0cdf6463..62594876 100644 --- a/beancount_import/source/generic_importer_source.py +++ b/beancount_import/source/generic_importer_source.py @@ -36,7 +36,8 @@ class ImporterSource(DescriptionBasedSource): def __init__(self, directory: str, importer: ImporterProtocol, - account: Optional[str]=None, # use None for importers that are not authoritative and would not clear any postings + # use None for importers that are not authoritative and would not clear any postings + account: Optional[str]=None, **kwargs) -> None: super().__init__(**kwargs) self.directory = os.path.expanduser(directory) diff --git a/examples/README.md b/examples/README.md index 69d527a1..d7362800 100644 --- a/examples/README.md +++ b/examples/README.md @@ -10,3 +10,8 @@ Examples: - `fresh`: Example of importing transactions starting with an empty journal. - `manually_entered`: Example of importing transactions corresponding to existing, manually-entered transactions. + - `multiple_imports`: Example of importing same transactions from multiple + importers, eg. you receive transaction emails same day while the monthly + statement is received at the end of the month. here, the transaction is + imported from email but not cleared (by setting `account=None` in run.py) + and is cleared only at the end of the month by monthly statement. diff --git a/examples/data/importers/bank.csv b/examples/data/importers/bank.csv index ff457c92..b652f4ae 100644 --- a/examples/data/importers/bank.csv +++ b/examples/data/importers/bank.csv @@ -1,8 +1,4 @@ "Date","Description","Amount" -2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD,-1 -2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD,-1 -2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1 2020-01-02,ATM-WD Some Random ATM Machine,-500 -2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1 2020-01-05,Transfer to 1234567890123,300 2020-01-14,Transfer to Amex 431145642232,-30 diff --git a/examples/data/importers/single_transaction_email.html b/examples/data/importers/single_transaction_email.html new file mode 100644 index 00000000..579a4b4e --- /dev/null +++ b/examples/data/importers/single_transaction_email.html @@ -0,0 +1,30 @@ + + + + Tables Example + + + FooBar Bank Transaction Alert + + + + + + + +
Account
********9876
+
+ + + + + + + + + + + +
DateDescriptionAmount
2020-01-14Cleared Credit Card Bill-30.00
+ + \ No newline at end of file diff --git a/examples/multiple_imports/accounts.beancount b/examples/multiple_imports/accounts.beancount new file mode 100644 index 00000000..7c1e4570 --- /dev/null +++ b/examples/multiple_imports/accounts.beancount @@ -0,0 +1,5 @@ +1900-01-01 open Assets:FooBarBank EUR + +1900-01-01 open Liabilities:Amex-Credit-Card EUR + +2020-01-14 open Expenses:Misc EUR diff --git a/examples/multiple_imports/config.py b/examples/multiple_imports/config.py new file mode 100644 index 00000000..6e4df380 --- /dev/null +++ b/examples/multiple_imports/config.py @@ -0,0 +1,46 @@ +""" +This config is where you would initialize your importers with personal info +like account number or credit card last4 digit. + +you may also define CONFIG:List[ImporterProtocol] for other beancount tools like +bean-identify, bean-file, and other beancount scripts to use +eg. `bean-identify _config.py ~/Downloads` +to identify the files that importers defined here can process + +beancount-import should have it's own run.py where you invoke the +`beancount_import.webserver.main` but import the Importer objects from this config +""" +from beancount.ingest.importers.csv import Importer as CSVImporter, Col +from foo_bar_email_importer import FooBarTransactionEmailImporter + +my_foobar_bank_importer = CSVImporter({ + Col.DATE: 'Date', + Col.NARRATION1: 'Description', + Col.AMOUNT: 'Amount', + }, + 'Assets:FooBarBank', # account + 'EUR', # currency + # regexps used by ImporterProtocol.identify() to identify the correct file + '"Date","Description","Amount"', + ) + +foobar_email_importer = FooBarTransactionEmailImporter(filing_account='Assets:FooBarBank') + + +my_amex_cc_importer = CSVImporter({ + Col.DATE: 'Date', + Col.NARRATION1: 'Description', + Col.AMOUNT: 'Amount', + Col.BALANCE:'Balance' + }, + 'Liabilities:Amex-Credit-Card', # account + 'EUR', # currency + # regexps used by ImporterProtocol.identify() to identify the correct file + ('Date,Description,Amount,Balance', + 'Credit.*7890' + ), + skip_lines=1 + ) + +# beancount's scripts use this +CONFIG = [my_foobar_bank_importer, foobar_email_importer, my_amex_cc_importer] diff --git a/examples/multiple_imports/foo_bar_email_importer.py b/examples/multiple_imports/foo_bar_email_importer.py new file mode 100644 index 00000000..5e28cbbd --- /dev/null +++ b/examples/multiple_imports/foo_bar_email_importer.py @@ -0,0 +1,53 @@ +""" +Imports a single transaction from transaction email received. +The same transaction would also exist in monthly csv statement. +so this importer does not clear the transaction, +by setting `self.account=None` +""" + +import re +from beancount.ingest import importer +from beancount.core import data, flags +from pathlib import Path +from dateutil.parser import parse as date_parse + + +class FooBarTransactionEmailImporter(importer.ImporterProtocol): + def __init__(self, filing_account='Assets:FooBarBank'): + self._filing_account = filing_account + self.account = None + + def identify(self, f): + return ( + f.name.endswith(".html") + and re.search(r"FooBar Bank Transaction Alert", Path(f.name).read_text()) + is not None + ) + + def extract(self, f, existing_entries=None): + pattern = r"\s*Date\s*Description\s*Amount\s*\s*\s*(?P.*)\s*(?P.*)\s*(?P.*)\s*" + match = re.search(pattern, Path(f.name).read_text()) + if not match: + return [] + groups = match.groupdict() + txn = data.Transaction( + meta=data.new_metadata(f.name, 0), + date=date_parse(groups["DATE"]).date(), + flag=flags.FLAG_OKAY, + payee=None, + narration=groups["DESCRIPTION"], + tags=set(), + links=set(), + postings=[ + data.Posting( + account=self._filing_account, + units= data.Amount(data.D(groups["AMOUNT"]), "EUR"), + cost=None, + price=None, + flag=None, + meta={}, + ) + ], + ) + # returns the single transaction imported from the transaction email + return [txn] diff --git a/examples/multiple_imports/ignored.beancount b/examples/multiple_imports/ignored.beancount new file mode 100644 index 00000000..e69de29b diff --git a/examples/multiple_imports/journal.beancount b/examples/multiple_imports/journal.beancount new file mode 100644 index 00000000..9b1b6182 --- /dev/null +++ b/examples/multiple_imports/journal.beancount @@ -0,0 +1,3 @@ +include "accounts.beancount" +include "transactions.beancount" +include "prices.beancount" diff --git a/examples/multiple_imports/prices.beancount b/examples/multiple_imports/prices.beancount new file mode 100644 index 00000000..e69de29b diff --git a/examples/multiple_imports/run.py b/examples/multiple_imports/run.py new file mode 100755 index 00000000..d80719a9 --- /dev/null +++ b/examples/multiple_imports/run.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +import glob +import os +import json +import sys + +from config import my_foobar_bank_importer, my_amex_cc_importer, foobar_email_importer + + +def run_reconcile(extra_args): + import beancount_import.webserver + + journal_dir = os.path.dirname(__file__) + data_dir = os.path.join(os.path.dirname(__file__), "..", "data") + + data_sources = [ + dict( + module="beancount_import.source.generic_importer_source", + # imports monthly bank statements + importer=my_foobar_bank_importer, + account="Assets:FooBarBank", + directory=os.path.join(data_dir, "importers"), + ), + dict( + module="beancount_import.source.generic_importer_source", + # imports individual transactions from email + importer=foobar_email_importer, + # this importer just imports transactions from email + # but does not clear the postings, hence account=None + # note than the importer just above this one clears the postings + # imported by this importer + account=None, + directory=os.path.join(data_dir, "importers"), + ), + dict( + module="beancount_import.source.generic_importer_source", + # imports monthly credit card statements + importer=my_amex_cc_importer, + account="Liabilities:Amex-Credit-Card", + directory=os.path.join(data_dir, "importers"), + ), + ] + + beancount_import.webserver.main( + extra_args, + journal_input=os.path.join(journal_dir, "journal.beancount"), + ignored_journal=os.path.join(journal_dir, "ignored.beancount"), + default_output=os.path.join(journal_dir, "transactions.beancount"), + open_account_output_map=[ + (".*", os.path.join(journal_dir, "accounts.beancount")), + ], + balance_account_output_map=[ + (".*", os.path.join(journal_dir, "accounts.beancount")), + ], + price_output=os.path.join(journal_dir, "prices.beancount"), + data_sources=data_sources, + ) + + +if __name__ == "__main__": + run_reconcile(sys.argv[1:]) diff --git a/examples/multiple_imports/transactions.beancount b/examples/multiple_imports/transactions.beancount new file mode 100644 index 00000000..e69de29b