diff --git a/beancount_import/source/generic_importer_source.py b/beancount_import/source/generic_importer_source.py
index b4d905ef..62594876 100644
--- a/beancount_import/source/generic_importer_source.py
+++ b/beancount_import/source/generic_importer_source.py
@@ -35,8 +35,9 @@
class ImporterSource(DescriptionBasedSource):
def __init__(self,
directory: str,
- account: str,
importer: ImporterProtocol,
+ # use None for importers that are not authoritative and would not clear any postings
+ account: Optional[str]=None,
**kwargs) -> None:
super().__init__(**kwargs)
self.directory = os.path.expanduser(directory)
@@ -57,11 +58,16 @@ def name(self) -> str:
return self.importer.name()
def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None:
- results.add_account(self.account)
+ if self.account:
+ results.add_account(self.account)
entries = OrderedDict() #type: Dict[Hashable, List[Directive]]
for f in self.files:
f_entries = self.importer.extract(f, existing_entries=journal.entries)
+ # if the importer is not authoritative, add all entries to pending
+ if not self.account:
+ results.add_pending_entries(map(self._make_import_result, f_entries))
+ continue
# collect all entries in current statement, grouped by hash
hashed_entries = OrderedDict() #type: Dict[Hashable, Directive]
for entry in f_entries:
@@ -77,14 +83,15 @@ def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None:
n = len(entries[key_])
entries.setdefault(key_, []).extend(hashed_entries[key_][n:])
- get_pending_and_invalid_entries(
- raw_entries=list(itertools.chain.from_iterable(entries.values())),
- journal_entries=journal.all_entries,
- account_set=set([self.account]),
- get_key_from_posting=_get_key_from_posting,
- get_key_from_raw_entry=self._get_key_from_imported_entry,
- make_import_result=self._make_import_result,
- results=results)
+ if self.account:
+ get_pending_and_invalid_entries(
+ raw_entries=list(itertools.chain.from_iterable(entries.values())),
+ journal_entries=journal.all_entries,
+ account_set=set([self.account]),
+ get_key_from_posting=_get_key_from_posting,
+ get_key_from_raw_entry=self._get_key_from_imported_entry,
+ make_import_result=self._make_import_result,
+ results=results)
def _add_description(self, entry: Transaction):
if not isinstance(entry, Transaction): return None
@@ -93,12 +100,10 @@ def _add_description(self, entry: Transaction):
for i, posting in enumerate(postings):
if posting.account != self.account: continue
if isinstance(posting.meta, dict):
- posting.meta["source_desc"] = entry.narration
- posting.meta["date"] = entry.date
- break
+ posting.meta.setdefault("source_desc", entry.narration)
+ posting.meta.setdefault("date", entry.date)
else:
to_mutate.append(i)
- break
for i in to_mutate:
p = postings.pop(i)
p = Posting(p.account, p.units, p.cost, p.price, p.flag,
diff --git a/examples/README.md b/examples/README.md
index 69d527a1..d7362800 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -10,3 +10,8 @@ Examples:
- `fresh`: Example of importing transactions starting with an empty journal.
- `manually_entered`: Example of importing transactions corresponding to
existing, manually-entered transactions.
+ - `multiple_imports`: Example of importing same transactions from multiple
+ importers, eg. you receive transaction emails same day while the monthly
+ statement is received at the end of the month. here, the transaction is
+ imported from email but not cleared (by setting `account=None` in run.py)
+ and is cleared only at the end of the month by monthly statement.
diff --git a/examples/data/importers/bank.csv b/examples/data/importers/bank.csv
index ff457c92..b652f4ae 100644
--- a/examples/data/importers/bank.csv
+++ b/examples/data/importers/bank.csv
@@ -1,8 +1,4 @@
"Date","Description","Amount"
-2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD,-1
-2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD,-1
-2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1
2020-01-02,ATM-WD Some Random ATM Machine,-500
-2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1
2020-01-05,Transfer to 1234567890123,300
2020-01-14,Transfer to Amex 431145642232,-30
diff --git a/examples/data/importers/single_transaction_email.html b/examples/data/importers/single_transaction_email.html
new file mode 100644
index 00000000..579a4b4e
--- /dev/null
+++ b/examples/data/importers/single_transaction_email.html
@@ -0,0 +1,30 @@
+
+
+
+ Tables Example
+
+
+ FooBar Bank Transaction Alert
+
+
+ Account |
+
+
+ ********9876 |
+
+
+
+
+
+ Date |
+ Description |
+ Amount |
+
+
+ 2020-01-14 |
+ Cleared Credit Card Bill |
+ -30.00 |
+
+
+
+
\ No newline at end of file
diff --git a/examples/multiple_imports/accounts.beancount b/examples/multiple_imports/accounts.beancount
new file mode 100644
index 00000000..7c1e4570
--- /dev/null
+++ b/examples/multiple_imports/accounts.beancount
@@ -0,0 +1,5 @@
+1900-01-01 open Assets:FooBarBank EUR
+
+1900-01-01 open Liabilities:Amex-Credit-Card EUR
+
+2020-01-14 open Expenses:Misc EUR
diff --git a/examples/multiple_imports/config.py b/examples/multiple_imports/config.py
new file mode 100644
index 00000000..6e4df380
--- /dev/null
+++ b/examples/multiple_imports/config.py
@@ -0,0 +1,46 @@
+"""
+This config is where you would initialize your importers with personal info
+like account number or credit card last4 digit.
+
+you may also define CONFIG:List[ImporterProtocol] for other beancount tools like
+bean-identify, bean-file, and other beancount scripts to use
+eg. `bean-identify _config.py ~/Downloads`
+to identify the files that importers defined here can process
+
+beancount-import should have it's own run.py where you invoke the
+`beancount_import.webserver.main` but import the Importer objects from this config
+"""
+from beancount.ingest.importers.csv import Importer as CSVImporter, Col
+from foo_bar_email_importer import FooBarTransactionEmailImporter
+
+my_foobar_bank_importer = CSVImporter({
+ Col.DATE: 'Date',
+ Col.NARRATION1: 'Description',
+ Col.AMOUNT: 'Amount',
+ },
+ 'Assets:FooBarBank', # account
+ 'EUR', # currency
+ # regexps used by ImporterProtocol.identify() to identify the correct file
+ '"Date","Description","Amount"',
+ )
+
+foobar_email_importer = FooBarTransactionEmailImporter(filing_account='Assets:FooBarBank')
+
+
+my_amex_cc_importer = CSVImporter({
+ Col.DATE: 'Date',
+ Col.NARRATION1: 'Description',
+ Col.AMOUNT: 'Amount',
+ Col.BALANCE:'Balance'
+ },
+ 'Liabilities:Amex-Credit-Card', # account
+ 'EUR', # currency
+ # regexps used by ImporterProtocol.identify() to identify the correct file
+ ('Date,Description,Amount,Balance',
+ 'Credit.*7890'
+ ),
+ skip_lines=1
+ )
+
+# beancount's scripts use this
+CONFIG = [my_foobar_bank_importer, foobar_email_importer, my_amex_cc_importer]
diff --git a/examples/multiple_imports/foo_bar_email_importer.py b/examples/multiple_imports/foo_bar_email_importer.py
new file mode 100644
index 00000000..5e28cbbd
--- /dev/null
+++ b/examples/multiple_imports/foo_bar_email_importer.py
@@ -0,0 +1,53 @@
+"""
+Imports a single transaction from transaction email received.
+The same transaction would also exist in monthly csv statement.
+so this importer does not clear the transaction,
+by setting `self.account=None`
+"""
+
+import re
+from beancount.ingest import importer
+from beancount.core import data, flags
+from pathlib import Path
+from dateutil.parser import parse as date_parse
+
+
+class FooBarTransactionEmailImporter(importer.ImporterProtocol):
+ def __init__(self, filing_account='Assets:FooBarBank'):
+ self._filing_account = filing_account
+ self.account = None
+
+ def identify(self, f):
+ return (
+ f.name.endswith(".html")
+ and re.search(r"FooBar Bank Transaction Alert", Path(f.name).read_text())
+ is not None
+ )
+
+ def extract(self, f, existing_entries=None):
+ pattern = r"\s*Date | \s*Description | \s*Amount | \s*
\s*\s*(?P.*) | \s*(?P.*) | \s*(?P.*) | \s*
"
+ match = re.search(pattern, Path(f.name).read_text())
+ if not match:
+ return []
+ groups = match.groupdict()
+ txn = data.Transaction(
+ meta=data.new_metadata(f.name, 0),
+ date=date_parse(groups["DATE"]).date(),
+ flag=flags.FLAG_OKAY,
+ payee=None,
+ narration=groups["DESCRIPTION"],
+ tags=set(),
+ links=set(),
+ postings=[
+ data.Posting(
+ account=self._filing_account,
+ units= data.Amount(data.D(groups["AMOUNT"]), "EUR"),
+ cost=None,
+ price=None,
+ flag=None,
+ meta={},
+ )
+ ],
+ )
+ # returns the single transaction imported from the transaction email
+ return [txn]
diff --git a/examples/multiple_imports/ignored.beancount b/examples/multiple_imports/ignored.beancount
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/multiple_imports/journal.beancount b/examples/multiple_imports/journal.beancount
new file mode 100644
index 00000000..9b1b6182
--- /dev/null
+++ b/examples/multiple_imports/journal.beancount
@@ -0,0 +1,3 @@
+include "accounts.beancount"
+include "transactions.beancount"
+include "prices.beancount"
diff --git a/examples/multiple_imports/prices.beancount b/examples/multiple_imports/prices.beancount
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/multiple_imports/run.py b/examples/multiple_imports/run.py
new file mode 100755
index 00000000..d80719a9
--- /dev/null
+++ b/examples/multiple_imports/run.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+import glob
+import os
+import json
+import sys
+
+from config import my_foobar_bank_importer, my_amex_cc_importer, foobar_email_importer
+
+
+def run_reconcile(extra_args):
+ import beancount_import.webserver
+
+ journal_dir = os.path.dirname(__file__)
+ data_dir = os.path.join(os.path.dirname(__file__), "..", "data")
+
+ data_sources = [
+ dict(
+ module="beancount_import.source.generic_importer_source",
+ # imports monthly bank statements
+ importer=my_foobar_bank_importer,
+ account="Assets:FooBarBank",
+ directory=os.path.join(data_dir, "importers"),
+ ),
+ dict(
+ module="beancount_import.source.generic_importer_source",
+ # imports individual transactions from email
+ importer=foobar_email_importer,
+ # this importer just imports transactions from email
+ # but does not clear the postings, hence account=None
+ # note than the importer just above this one clears the postings
+ # imported by this importer
+ account=None,
+ directory=os.path.join(data_dir, "importers"),
+ ),
+ dict(
+ module="beancount_import.source.generic_importer_source",
+ # imports monthly credit card statements
+ importer=my_amex_cc_importer,
+ account="Liabilities:Amex-Credit-Card",
+ directory=os.path.join(data_dir, "importers"),
+ ),
+ ]
+
+ beancount_import.webserver.main(
+ extra_args,
+ journal_input=os.path.join(journal_dir, "journal.beancount"),
+ ignored_journal=os.path.join(journal_dir, "ignored.beancount"),
+ default_output=os.path.join(journal_dir, "transactions.beancount"),
+ open_account_output_map=[
+ (".*", os.path.join(journal_dir, "accounts.beancount")),
+ ],
+ balance_account_output_map=[
+ (".*", os.path.join(journal_dir, "accounts.beancount")),
+ ],
+ price_output=os.path.join(journal_dir, "prices.beancount"),
+ data_sources=data_sources,
+ )
+
+
+if __name__ == "__main__":
+ run_reconcile(sys.argv[1:])
diff --git a/examples/multiple_imports/transactions.beancount b/examples/multiple_imports/transactions.beancount
new file mode 100644
index 00000000..e69de29b