From dfe1e4cfbfa1c2a7683feca8d0060eab1878f315 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 27 Oct 2023 12:15:39 -0400 Subject: [PATCH 1/6] Support datetime.date eq comparison for undate with day precision --- src/undate/undate.py | 18 +++++++++++++++++- tests/test_undate.py | 22 ++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index e816adb..9fa4793 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -26,6 +26,9 @@ class DatePrecision(Enum): #: day DAY = auto() + def __str__(self): + return f"{self.name}" + class Undate: """Simple object for representing uncertain, fuzzy or partially unknown dates""" @@ -173,9 +176,20 @@ def __repr__(self) -> str: return "" % (self.label, self) return "" % self - def __eq__(self, other: "Undate") -> bool: + def __eq__(self, other: Union["Undate", datetime.date]) -> bool: # question: should label be taken into account when checking equality? # for now, assuming label differences don't matter for comparing dates + + # support comparison with datetime date ONLY for full day precision + if isinstance(other, datetime.date): + if self.precision == DatePrecision.DAY: + return self.earliest == other + else: + raise NotImplementedError( + "Equality comparision with datetime.date not supported for %s precision" + % self.precision + ) + return ( self.earliest == other.earliest and self.latest == other.latest @@ -186,6 +200,8 @@ def __eq__(self, other: "Undate") -> bool: and self.initial_values == other.initial_values ) + # def __lt__(self, other: "") + @property def known_year(self) -> bool: return self.is_known("year") diff --git a/tests/test_undate.py b/tests/test_undate.py index d02c3d2..a38ee88 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,8 +1,13 @@ -from datetime import timedelta +from datetime import timedelta, date import pytest -from undate.undate import Undate, UndateInterval +from undate.undate import Undate, UndateInterval, DatePrecision + + +class TestDatePrecision: + def test_str(self): + assert str(DatePrecision.YEAR) == "YEAR" class TestUndate: @@ -127,6 +132,19 @@ def test_eq(self): assert Undate(2022, 10, 1) == Undate(2022, 10, 1) assert Undate(month=2, day=7) == Undate(month=2, day=7) + def test_eq_datetime_date(self): + # support comparisons with datetime objects for full day-precision + assert Undate(2022, 10, 1) == date(2022, 10, 1) + assert Undate(2022, 10, 1) != date(2022, 10, 2) + assert Undate(2022, 10, 1) != date(2021, 10, 1) + + # error on attempt to compare when precision is not known to the day + with pytest.raises( + NotImplementedError, + match="Equality comparision with datetime.date not supported for YEAR precision", + ): + assert Undate(2022) == date(2022, 10, 1) + def test_not_eq(self): assert Undate(2022) != Undate(2023) assert Undate(2022, 10) != Undate(2022, 11) From 34b6688118c7c597dacbc81b1ef60d980bb3b5ea Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 27 Oct 2023 15:12:43 -0400 Subject: [PATCH 2/6] Implement and test date comparison methods for simpler cases ref #5 --- src/undate/undate.py | 81 ++++++++++++++++++++++------ tests/test_undate.py | 125 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 15 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 9fa4793..4539924 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -1,6 +1,6 @@ import datetime from calendar import monthrange -from enum import Enum, auto +from enum import IntEnum import re # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None @@ -15,16 +15,19 @@ ONE_DAY = datetime.timedelta(days=1) -class DatePrecision(Enum): +class DatePrecision(IntEnum): """date precision, to indicate date precision independent from how much of the date is known.""" - #: year - YEAR = auto() - #: month - MONTH = auto() + # numbers should be set to allow logical greater than / less than + # comparison, e.g. year precision > month + #: day - DAY = auto() + DAY = 1 + #: month + MONTH = 2 + #: year + YEAR = 3 def __str__(self): return f"{self.name}" @@ -177,8 +180,7 @@ def __repr__(self) -> str: return "" % self def __eq__(self, other: Union["Undate", datetime.date]) -> bool: - # question: should label be taken into account when checking equality? - # for now, assuming label differences don't matter for comparing dates + # Note: assumes label differences don't matter for comparing dates # support comparison with datetime date ONLY for full day precision if isinstance(other, datetime.date): @@ -190,17 +192,66 @@ def __eq__(self, other: Union["Undate", datetime.date]) -> bool: % self.precision ) - return ( + # check for apparent equality + looks_equal = ( self.earliest == other.earliest and self.latest == other.latest - # NOTE: assumes that partially known values can only be written - # in one format (i.e. X for missing digits). - # If we support other formats, will need to normalize to common - # internal format for comparison and self.initial_values == other.initial_values ) + # if everything looks the same, check for any unknowns in initial values + # the same unknown date should NOT be considered equal + + # NOTE: assumes that partially known values can only be written + # in one format (i.e. X for missing digits). + # If we support other formats, will need to normalize to common + # internal format for comparison + if looks_equal and any("X" in str(val) for val in self.initial_values.values()): + return False + return looks_equal + + def __lt__(self, other: "Undate") -> bool: + # TODO: support datetime.date (?) + + # if this date ends before the other date starts, + # return true (this date is earlier, so it is less) + if self.latest < other.earliest: + return True + + # if the other one ends before this one starts, + # return false (this date is later, so it is not less) + if other.latest < self.earliest: + return False + + # if it does not, check if one is included within the other + # (e.g., single date within the same year) + # comparison for those cases is not currently supported + elif other in self or self in other: + raise NotImplementedError( + "Can't compare when one date falls within the other" + ) + + # for any other case (i.e., self == other), return false + return False + + def __le__(self, other: "Undate") -> bool: + return self == other or self < other + + def __contains__(self, other: "Undate") -> bool: + # if the two dates are strictly equal, don't consider + # either one as containing the other + if self == other: + return False + + # TODO: support datetime.date ? - # def __lt__(self, other: "") + return ( + self.earliest <= other.earliest + and self.latest >= other.latest + # precision is not sufficient for comparing partially known dates + and self.precision > other.precision + ) + # TODO: how to compare partially unknown values + # like 19xx and 199x or 1801-XX and 1801-1X @property def known_year(self) -> bool: diff --git a/tests/test_undate.py b/tests/test_undate.py index a38ee88..ba1d6fe 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -153,6 +153,131 @@ def test_not_eq(self): assert Undate(2022) != Undate(2022, 10) assert Undate(2022, 10) != Undate(2022, 10, 1) + # partially unknown dates should NOT be considered equal + assert Undate("19XX") != Undate("19XX") + assert Undate(1980, "XX") != Undate(1980, "XX") + + testdata_lt_gt = [ + # dates to test for gt/lt comparison: earlier date, later date + # - simple cases: same precision where one date is clearly earlier + (Undate(2022), Undate(2023)), + (Undate(1991, 1), Undate(1991, 5)), + (Undate(1856, 3, 3), Undate(1856, 3, 21)), + # - mixed precision where one date is clearly earlier + (Undate(1991, 1), Undate(2000)), + (Undate(1856, 3, 3), Undate(1901)), + # partially known digits where comparison is possible + (Undate("19XX"), Undate("20XX")), + (Undate(1900, "0X"), Undate(1900, "1X")), + ] + + @pytest.mark.parametrize("earlier,later", testdata_lt_gt) + def test_lt(self, earlier, later): + assert earlier < later + assert later > earlier + + testdata_lte_gte = testdata_lt_gt.copy() + # add a few exactly equal cases + testdata_lte_gte.extend( + [ + (Undate(1601), Undate(1601)), + (Undate(1991, 1), Undate(1991, 1)), + (Undate(1492, 5, 3), Undate(1492, 5, 3)), + ] + ) + + def test_lt_when_eq(self): + # strict less than / greater should return false when equal + assert not Undate(1900) > Undate(1900) + assert not Undate(1900) < Undate(1900) + + @pytest.mark.parametrize("earlier,later", testdata_lte_gte) + def test_lte(self, earlier, later): + assert earlier <= later + assert later >= earlier + + def test_lt_notimplemented(self): + # how to compare mixed precision where dates overlap? + # if the second date falls *within* earliest/latest, + # then it is not clearly less; not implemented? + with pytest.raises(NotImplementedError, match="date falls within the other"): + assert Undate(2022) < Undate(2022, 5) + + # same if we attempt to compare in the other direction + with pytest.raises(NotImplementedError, match="date falls within the other"): + assert Undate(2022, 5) < Undate(2022) + + testdata_contains = [ + # first date falls within the range of the other + # dates within range: middle, start, end, varying precision + (Undate(2022, 6), Undate(2022)), + (Undate(2022, 1, 1), Undate(2022)), + (Undate(2022, 12, 31), Undate(2022)), + (Undate(2022, 6, 15), Undate(2022, 6)), + # TODO: support partially known dates that are unambiguously in range + # (Undate("199X"), Undate("19XX")), + ] + + @pytest.mark.parametrize("date1,date2", testdata_contains) + def test_contains(self, date1, date2): + assert date1 in date2 + + testdata_not_contains = [ + # dates not in range + (Undate(1980), Undate(2020)), + (Undate(1980), Undate(2020, 6)), + (Undate(1980, 6), Undate(2020, 6)), + ] + + @pytest.mark.parametrize("date1,date2", testdata_not_contains) + def test_not_contains(self, date1, date2): + assert date1 not in date2 + + def test_contains_ambiguous(self): + # date not in range due to precision + # TODO: can we return an unknown instead of false? + # or should this raise a not implemented error? + + # these are cases where dates *might* overlap, + # but we don't have enough information to determine + # - specific month to unknown month + assert Undate(1980, 6) not in Undate(1980, "XX") + # - unknown month to unknown month + assert Undate(1980, "XX") not in Undate(1980, "XX") + assert Undate(1980, 6) not in Undate(1980, "XX") + assert Undate(1801, "1X") not in Undate(1801, "XX") + + def test_sorting(self): + # sorting should be possible based on gt/lt + # test simple cases for sorting + d1980 = Undate(1980) + d2002_10 = Undate(2002, 10) + d2002_12 = Undate(2002, 12) + d2012_05_01 = Undate(2012, 5, 1) + + assert sorted([d2012_05_01, d2002_12, d2002_10, d1980]) == [ + d1980, + d2002_10, + d2002_12, + d2012_05_01, + ] + + # what about semi-ambigous cases? + d1991_XX = Undate(1991, "XX") + d1992_01_XX = Undate(1992, 1, "XX") + assert sorted([d1992_01_XX, d1991_XX, d1980]) == [d1980, d1991_XX, d1992_01_XX] + + # what about things we can't compare? + d1991 = Undate(1991) + d1991_02 = Undate(1991, 2) + # for now, this will raise a not implemented error + with pytest.raises(NotImplementedError): + sorted([d1991_02, d1991, d1991_XX]) + + # TODO: partially known year? + # someyear = Undate("1XXX") + # assert sorted([d1991, someyear]) == [someyear, d1991] + def test_duration(self): day_duration = Undate(2022, 11, 7).duration() assert isinstance(day_duration, timedelta) From 8cfa8f062e8d69e7c19317d73c6ff5093f26712c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 27 Oct 2023 15:19:52 -0400 Subject: [PATCH 3/6] Refine eq and contains checks for unknown and partial unknowns --- src/undate/undate.py | 4 +--- tests/test_undate.py | 27 +++++++++++---------------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 4539924..aeed091 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -247,11 +247,9 @@ def __contains__(self, other: "Undate") -> bool: return ( self.earliest <= other.earliest and self.latest >= other.latest - # precision is not sufficient for comparing partially known dates + # is precision sufficient for comparing partially known dates? and self.precision > other.precision ) - # TODO: how to compare partially unknown values - # like 19xx and 199x or 1801-XX and 1801-1X @property def known_year(self) -> bool: diff --git a/tests/test_undate.py b/tests/test_undate.py index ba1d6fe..210419f 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -214,8 +214,6 @@ def test_lt_notimplemented(self): (Undate(2022, 1, 1), Undate(2022)), (Undate(2022, 12, 31), Undate(2022)), (Undate(2022, 6, 15), Undate(2022, 6)), - # TODO: support partially known dates that are unambiguously in range - # (Undate("199X"), Undate("19XX")), ] @pytest.mark.parametrize("date1,date2", testdata_contains) @@ -227,26 +225,23 @@ def test_contains(self, date1, date2): (Undate(1980), Undate(2020)), (Undate(1980), Undate(2020, 6)), (Undate(1980, 6), Undate(2020, 6)), + # partially known dates that are similar but same precision, + # so one does not contain the other + (Undate("199X"), Undate("19XX")), + # - specific month to unknown month + (Undate(1980, 6), Undate(1980, "XX")), + # some of these might overlap, but we don't have enough + # information to determine + # - unknown month to unknown month + (Undate(1980, "XX"), Undate(1980, "XX")), + # - partially unknown month to unknown month + (Undate(1801, "1X"), Undate(1801, "XX")), ] @pytest.mark.parametrize("date1,date2", testdata_not_contains) def test_not_contains(self, date1, date2): assert date1 not in date2 - def test_contains_ambiguous(self): - # date not in range due to precision - # TODO: can we return an unknown instead of false? - # or should this raise a not implemented error? - - # these are cases where dates *might* overlap, - # but we don't have enough information to determine - # - specific month to unknown month - assert Undate(1980, 6) not in Undate(1980, "XX") - # - unknown month to unknown month - assert Undate(1980, "XX") not in Undate(1980, "XX") - assert Undate(1980, 6) not in Undate(1980, "XX") - assert Undate(1801, "1X") not in Undate(1801, "XX") - def test_sorting(self): # sorting should be possible based on gt/lt # test simple cases for sorting From e017a495f785a95320beb493fe3959cba7cd7137 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 27 Oct 2023 15:39:22 -0400 Subject: [PATCH 4/6] Improve comparison/contains support for datetime.date --- src/undate/undate.py | 38 ++++++++++++++++++++++++-------------- tests/test_undate.py | 30 +++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index aeed091..3e3d514 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -182,15 +182,9 @@ def __repr__(self) -> str: def __eq__(self, other: Union["Undate", datetime.date]) -> bool: # Note: assumes label differences don't matter for comparing dates - # support comparison with datetime date ONLY for full day precision + # only a day-precision fully known undate can be equal to a datetime.date if isinstance(other, datetime.date): - if self.precision == DatePrecision.DAY: - return self.earliest == other - else: - raise NotImplementedError( - "Equality comparision with datetime.date not supported for %s precision" - % self.precision - ) + return self.earliest == other and self.latest == other # check for apparent equality looks_equal = ( @@ -209,8 +203,10 @@ def __eq__(self, other: Union["Undate", datetime.date]) -> bool: return False return looks_equal - def __lt__(self, other: "Undate") -> bool: - # TODO: support datetime.date (?) + def __lt__(self, other: Union["Undate", datetime.date]) -> bool: + # support datetime.date by converting to undate + if isinstance(other, datetime.date): + other = Undate.from_datetime_date(other) # if this date ends before the other date starts, # return true (this date is earlier, so it is less) @@ -233,17 +229,26 @@ def __lt__(self, other: "Undate") -> bool: # for any other case (i.e., self == other), return false return False - def __le__(self, other: "Undate") -> bool: + def __gt__(self, other: Union["Undate", datetime.date]) -> bool: + # define gt ourselves so we can support > comparison with datetime.date, + # but rely on existing less than implementation. + # strictly greater than must rule out equals + return not (self < other or self == other) + + def __le__(self, other: Union["Undate", datetime.date]) -> bool: return self == other or self < other - def __contains__(self, other: "Undate") -> bool: + def __contains__(self, other: Union["Undate", datetime.date]) -> bool: # if the two dates are strictly equal, don't consider # either one as containing the other + + # support comparison with datetime by converting to undate + if isinstance(other, datetime.date): + other = Undate.from_datetime_date(other) + if self == other: return False - # TODO: support datetime.date ? - return ( self.earliest <= other.earliest and self.latest >= other.latest @@ -251,6 +256,11 @@ def __contains__(self, other: "Undate") -> bool: and self.precision > other.precision ) + @staticmethod + def from_datetime_date(dt_date): + """Initialize an :class:`Undate` object from a :class:`datetime.date`""" + return Undate(dt_date.year, dt_date.month, dt_date.day) + @property def known_year(self) -> bool: return self.is_known("year") diff --git a/tests/test_undate.py b/tests/test_undate.py index 210419f..723d1a0 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -126,6 +126,11 @@ def test_invalid_date(self): with pytest.raises(ValueError): Undate(1990, 22) + def test_from_datetime_date(self): + undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + assert isinstance(undate_from_date, Undate) + assert undate_from_date == Undate(2001, 3, 5) + def test_eq(self): assert Undate(2022) == Undate(2022) assert Undate(2022, 10) == Undate(2022, 10) @@ -136,14 +141,11 @@ def test_eq_datetime_date(self): # support comparisons with datetime objects for full day-precision assert Undate(2022, 10, 1) == date(2022, 10, 1) assert Undate(2022, 10, 1) != date(2022, 10, 2) - assert Undate(2022, 10, 1) != date(2021, 10, 1) + assert Undate(1980, 10, 1) != date(2022, 10, 1) - # error on attempt to compare when precision is not known to the day - with pytest.raises( - NotImplementedError, - match="Equality comparision with datetime.date not supported for YEAR precision", - ): - assert Undate(2022) == date(2022, 10, 1) + # other date precisions are not equal + assert Undate(2022) != date(2022, 10, 1) + assert Undate(2022, 10) != date(2022, 10, 1) def test_not_eq(self): assert Undate(2022) != Undate(2023) @@ -169,6 +171,9 @@ def test_not_eq(self): # partially known digits where comparison is possible (Undate("19XX"), Undate("20XX")), (Undate(1900, "0X"), Undate(1900, "1X")), + # compare with datetime.date objects + (Undate("19XX"), date(2020, 1, 1)), + (Undate(1991, 1), date(1992, 3, 4)), ] @pytest.mark.parametrize("earlier,later", testdata_lt_gt) @@ -183,6 +188,8 @@ def test_lt(self, earlier, later): (Undate(1601), Undate(1601)), (Undate(1991, 1), Undate(1991, 1)), (Undate(1492, 5, 3), Undate(1492, 5, 3)), + # compare with datetime.date also + (Undate(1492, 5, 3), date(1492, 5, 3)), ] ) @@ -190,6 +197,9 @@ def test_lt_when_eq(self): # strict less than / greater should return false when equal assert not Undate(1900) > Undate(1900) assert not Undate(1900) < Undate(1900) + # same for datetime.date + assert not Undate(1903, 1, 5) < date(1903, 1, 5) + assert not Undate(1903, 1, 5) > date(1903, 1, 5) @pytest.mark.parametrize("earlier,later", testdata_lte_gte) def test_lte(self, earlier, later): @@ -214,6 +224,9 @@ def test_lt_notimplemented(self): (Undate(2022, 1, 1), Undate(2022)), (Undate(2022, 12, 31), Undate(2022)), (Undate(2022, 6, 15), Undate(2022, 6)), + # support contains with datetime.date + (date(2022, 6, 1), Undate(2022)), + (date(2022, 6, 1), Undate(2022, 6)), ] @pytest.mark.parametrize("date1,date2", testdata_contains) @@ -225,6 +238,9 @@ def test_contains(self, date1, date2): (Undate(1980), Undate(2020)), (Undate(1980), Undate(2020, 6)), (Undate(1980, 6), Undate(2020, 6)), + # support contains with datetime.date + (date(1980, 6, 1), Undate(2022)), + (date(3001, 6, 1), Undate(2022, 6)), # partially known dates that are similar but same precision, # so one does not contain the other (Undate("199X"), Undate("19XX")), From 1094d0118eec3cbb64cddcef12d0bcfa80ed8676 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 27 Oct 2023 16:06:01 -0400 Subject: [PATCH 5/6] Clear cache before testing that foramtters are only loaded once --- tests/test_dateformat/test_base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_dateformat/test_base.py b/tests/test_dateformat/test_base.py index 63568f0..3687a37 100644 --- a/tests/test_dateformat/test_base.py +++ b/tests/test_dateformat/test_base.py @@ -31,9 +31,12 @@ def test_parse_to_string(self): BaseDateFormat().to_string(1991) -@pytest.mark.first def test_import_formatters_import_only_once(caplog): - # run first so we can confirm it runs once + # clear the cache, since any instantiation of an Undate + # object anywhere in the test suite will populate it + BaseDateFormat.import_formatters.cache_clear() + + # run first, and confirm it runs and loads formatters with caplog.at_level(logging.DEBUG): import_count = BaseDateFormat.import_formatters() # should import at least one thing (iso8601) From fcdd4c2abe8b4bef45383ac097d399d7f91b5f88 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 27 Oct 2023 17:00:31 -0400 Subject: [PATCH 6/6] Add a note about not implemented exceptions for unknown comparisons --- src/undate/undate.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/undate/undate.py b/src/undate/undate.py index 3e3d514..db356f5 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -225,6 +225,12 @@ def __lt__(self, other: Union["Undate", datetime.date]) -> bool: raise NotImplementedError( "Can't compare when one date falls within the other" ) + # NOTE: unsupported comparisons are supposed to return NotImplemented + # However, doing that in this case results in a confusing TypeError! + # TypeError: '<' not supported between instances of 'Undate' and 'Undate' + # How to handle when the comparison is ambiguous / indeterminate? + # we may need a tribool / ternary type (true, false, unknown), + # but not sure what python builtin methods will do with it (unknown = false?) # for any other case (i.e., self == other), return false return False