From 273b35faeb5eb20b3abc89f57cd73e56bb5bad43 Mon Sep 17 00:00:00 2001 From: Ankur Dave Date: Wed, 1 Nov 2023 19:18:16 -0400 Subject: [PATCH 1/2] matching.py: Fix incorrect fuzzy amount loop bounds --- beancount_import/matching.py | 6 ++++- beancount_import/matching_test.py | 45 +++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/beancount_import/matching.py b/beancount_import/matching.py index 71961267..54bde06a 100644 --- a/beancount_import/matching.py +++ b/beancount_import/matching.py @@ -444,7 +444,11 @@ def _get_matches( if cur_matches is not None: lower_bound = bisect.bisect_left(cur_matches, (lower, tuple(), None, None)) upper_bound = bisect.bisect_right(cur_matches, (upper, (sys.maxsize,), None, None), lo=lower_bound) - for sp in cur_matches[lower_bound-1 if lower_bound > 0 else 0:upper_bound]: + for sp in cur_matches[lower_bound:upper_bound]: + assert abs(sp.number - amount.number) <= self.fuzzy_match_amount, ( + f'Bug in matching algorithm: {sp} is not within ' + + f'{self.fuzzy_match_amount} of {amount}') + posting = sp.mp.posting # Verify that the account is compatible. if not are_accounts_mergeable(account, posting.account): diff --git a/beancount_import/matching_test.py b/beancount_import/matching_test.py index 151b370a..7b26865b 100755 --- a/beancount_import/matching_test.py +++ b/beancount_import/matching_test.py @@ -721,6 +721,33 @@ def test_match_fuzzy_amount(): note: "B" """) +def test_match_fuzzy_amount_upper_bound(): + # We match despite a skew of 0.01 USD, our configured fuzzy_match_amount. + assert_match( + pending_candidate=""" + 2016-01-01 * "Narration" + note: "A" + Income:A -100 USD + note: "A" + Expenses:FIXME 100 USD + """, + pending=""" + 2016-01-01 * "Narration" + note2: "B" + Assets:B 100.01 STOCK { 1.00 USD } + note: "B" + Expenses:FIXME -100.01 USD + """, + matches=""" + 2016-01-01 * "Narration" + note: "A" + note2: "B" + Income:A -100 USD + note: "A" + Assets:B 100.01 STOCK { 1.00 USD } + note: "B" + """) + def test_nonmatch_fuzzy_amount(): # We don't match with a skew of 0.02, beyond our configured fuzzy_match_amount. assert_match( @@ -739,6 +766,24 @@ def test_nonmatch_fuzzy_amount(): Expenses:FIXME -99.98 USD """) +def test_nonmatch_fuzzy_amount_with_dates(): + # We don't match with a skew of 0.02, beyond our configured fuzzy_match_amount. + assert_match( + pending_candidate=""" + 2023-01-01 * "Transaction 0" + Assets:A 20.00 USD + date: 2023-01-01 + cleared: TRUE + Expenses:FIXME -20.00 USD + """, + pending=""" + 2023-01-01 * "Transaction 1" + Assets:B -19.98 USD + date: 2023-01-01 + cleared: TRUE + Expenses:FIXME 19.98 USD + """) + def test_match_grouped_differing_signs(): # Can group postings of differing signs to make a match. assert_match( From a964adba2733f960c9f55e99bf598366951116e1 Mon Sep 17 00:00:00 2001 From: Ankur Dave Date: Sat, 18 Nov 2023 16:16:00 -0800 Subject: [PATCH 2/2] Clarify the cause of #202: position within `cur_matches` list --- beancount_import/matching_test.py | 35 ++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/beancount_import/matching_test.py b/beancount_import/matching_test.py index 7b26865b..8acae129 100755 --- a/beancount_import/matching_test.py +++ b/beancount_import/matching_test.py @@ -750,6 +750,18 @@ def test_match_fuzzy_amount_upper_bound(): def test_nonmatch_fuzzy_amount(): # We don't match with a skew of 0.02, beyond our configured fuzzy_match_amount. + # + # This test case searches for matches for the "Expenses:FIXME 100 USD" + # posting among the following 4 candidates, ordered by amount: + # + # 1. Income:A -100 USD + # 2. Expenses:FIXME -99.98 USD + # 3. Assets:B 99.98 STOCK { 1.00 USD } + # 4. Expenses:FIXME 100 USD + # + # Only posting #1 should be considered; the others are beyond the + # fuzzy_match_amount. But since posting #1 is from the same transaction as the + # original posting, it should be excluded and no match should be returned. assert_match( pending_candidate=""" 2016-01-01 * "Narration" @@ -766,21 +778,34 @@ def test_nonmatch_fuzzy_amount(): Expenses:FIXME -99.98 USD """) -def test_nonmatch_fuzzy_amount_with_dates(): +def test_nonmatch_fuzzy_amount_2(): # We don't match with a skew of 0.02, beyond our configured fuzzy_match_amount. + # + # This test case searches for matches for the "Expenses:FIXME -20.00 USD" + # posting among the following 4 candidates, ordered by amount: + # + # 1. Expenses:FIXME -20.00 USD + # 2. Assets:B -19.98 USD + # 3. Expenses:FIXME 19.98 USD + # 4. Assets:A 20.00 USD + # + # Only posting #4 should be considered; the others are beyond the + # fuzzy_match_amount. But since posting #4 is from the same transaction as the + # original posting, it should be excluded and no match should be returned. + # + # The difference between this test case and `test_nonmatch_fuzzy_amount()` + # above is the position of the target posting within the candidates list: at + # the end of the list rather than the beginning. This test case reproduces + # issue #202. assert_match( pending_candidate=""" 2023-01-01 * "Transaction 0" Assets:A 20.00 USD - date: 2023-01-01 - cleared: TRUE Expenses:FIXME -20.00 USD """, pending=""" 2023-01-01 * "Transaction 1" Assets:B -19.98 USD - date: 2023-01-01 - cleared: TRUE Expenses:FIXME 19.98 USD """)