diff --git a/src/undate/undate.py b/src/undate/undate.py index e816adb..db356f5 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -1,6 +1,6 @@ import datetime from calendar import monthrange -from enum import Enum, auto +from enum import IntEnum import re # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None @@ -15,16 +15,22 @@ ONE_DAY = datetime.timedelta(days=1) -class DatePrecision(Enum): +class DatePrecision(IntEnum): """date precision, to indicate date precision independent from how much of the date is known.""" - #: year - YEAR = auto() - #: month - MONTH = auto() + # numbers should be set to allow logical greater than / less than + # comparison, e.g. year precision > month + #: day - DAY = auto() + DAY = 1 + #: month + MONTH = 2 + #: year + YEAR = 3 + + def __str__(self): + return f"{self.name}" class Undate: @@ -173,18 +179,93 @@ def __repr__(self) -> str: return "" % (self.label, self) return "" % self - def __eq__(self, other: "Undate") -> bool: - # question: should label be taken into account when checking equality? - # for now, assuming label differences don't matter for comparing dates - return ( + def __eq__(self, other: Union["Undate", datetime.date]) -> bool: + # Note: assumes label differences don't matter for comparing dates + + # only a day-precision fully known undate can be equal to a datetime.date + if isinstance(other, datetime.date): + return self.earliest == other and self.latest == other + + # check for apparent equality + looks_equal = ( self.earliest == other.earliest and self.latest == other.latest - # NOTE: assumes that partially known values can only be written - # in one format (i.e. X for missing digits). - # If we support other formats, will need to normalize to common - # internal format for comparison and self.initial_values == other.initial_values ) + # if everything looks the same, check for any unknowns in initial values + # the same unknown date should NOT be considered equal + + # NOTE: assumes that partially known values can only be written + # in one format (i.e. X for missing digits). + # If we support other formats, will need to normalize to common + # internal format for comparison + if looks_equal and any("X" in str(val) for val in self.initial_values.values()): + return False + return looks_equal + + def __lt__(self, other: Union["Undate", datetime.date]) -> bool: + # support datetime.date by converting to undate + if isinstance(other, datetime.date): + other = Undate.from_datetime_date(other) + + # if this date ends before the other date starts, + # return true (this date is earlier, so it is less) + if self.latest < other.earliest: + return True + + # if the other one ends before this one starts, + # return false (this date is later, so it is not less) + if other.latest < self.earliest: + return False + + # if it does not, check if one is included within the other + # (e.g., single date within the same year) + # comparison for those cases is not currently supported + elif other in self or self in other: + raise NotImplementedError( + "Can't compare when one date falls within the other" + ) + # NOTE: unsupported comparisons are supposed to return NotImplemented + # However, doing that in this case results in a confusing TypeError! + # TypeError: '<' not supported between instances of 'Undate' and 'Undate' + # How to handle when the comparison is ambiguous / indeterminate? + # we may need a tribool / ternary type (true, false, unknown), + # but not sure what python builtin methods will do with it (unknown = false?) + + # for any other case (i.e., self == other), return false + return False + + def __gt__(self, other: Union["Undate", datetime.date]) -> bool: + # define gt ourselves so we can support > comparison with datetime.date, + # but rely on existing less than implementation. + # strictly greater than must rule out equals + return not (self < other or self == other) + + def __le__(self, other: Union["Undate", datetime.date]) -> bool: + return self == other or self < other + + def __contains__(self, other: Union["Undate", datetime.date]) -> bool: + # if the two dates are strictly equal, don't consider + # either one as containing the other + + # support comparison with datetime by converting to undate + if isinstance(other, datetime.date): + other = Undate.from_datetime_date(other) + + if self == other: + return False + + return ( + self.earliest <= other.earliest + and self.latest >= other.latest + # is precision sufficient for comparing partially known dates? + and self.precision > other.precision + ) + + @staticmethod + def from_datetime_date(dt_date): + """Initialize an :class:`Undate` object from a :class:`datetime.date`""" + return Undate(dt_date.year, dt_date.month, dt_date.day) @property def known_year(self) -> bool: diff --git a/tests/test_dateformat/test_base.py b/tests/test_dateformat/test_base.py index 63568f0..3687a37 100644 --- a/tests/test_dateformat/test_base.py +++ b/tests/test_dateformat/test_base.py @@ -31,9 +31,12 @@ def test_parse_to_string(self): BaseDateFormat().to_string(1991) -@pytest.mark.first def test_import_formatters_import_only_once(caplog): - # run first so we can confirm it runs once + # clear the cache, since any instantiation of an Undate + # object anywhere in the test suite will populate it + BaseDateFormat.import_formatters.cache_clear() + + # run first, and confirm it runs and loads formatters with caplog.at_level(logging.DEBUG): import_count = BaseDateFormat.import_formatters() # should import at least one thing (iso8601) diff --git a/tests/test_undate.py b/tests/test_undate.py index d02c3d2..723d1a0 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,8 +1,13 @@ -from datetime import timedelta +from datetime import timedelta, date import pytest -from undate.undate import Undate, UndateInterval +from undate.undate import Undate, UndateInterval, DatePrecision + + +class TestDatePrecision: + def test_str(self): + assert str(DatePrecision.YEAR) == "YEAR" class TestUndate: @@ -121,12 +126,27 @@ def test_invalid_date(self): with pytest.raises(ValueError): Undate(1990, 22) + def test_from_datetime_date(self): + undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + assert isinstance(undate_from_date, Undate) + assert undate_from_date == Undate(2001, 3, 5) + def test_eq(self): assert Undate(2022) == Undate(2022) assert Undate(2022, 10) == Undate(2022, 10) assert Undate(2022, 10, 1) == Undate(2022, 10, 1) assert Undate(month=2, day=7) == Undate(month=2, day=7) + def test_eq_datetime_date(self): + # support comparisons with datetime objects for full day-precision + assert Undate(2022, 10, 1) == date(2022, 10, 1) + assert Undate(2022, 10, 1) != date(2022, 10, 2) + assert Undate(1980, 10, 1) != date(2022, 10, 1) + + # other date precisions are not equal + assert Undate(2022) != date(2022, 10, 1) + assert Undate(2022, 10) != date(2022, 10, 1) + def test_not_eq(self): assert Undate(2022) != Undate(2023) assert Undate(2022, 10) != Undate(2022, 11) @@ -135,6 +155,140 @@ def test_not_eq(self): assert Undate(2022) != Undate(2022, 10) assert Undate(2022, 10) != Undate(2022, 10, 1) + # partially unknown dates should NOT be considered equal + assert Undate("19XX") != Undate("19XX") + assert Undate(1980, "XX") != Undate(1980, "XX") + + testdata_lt_gt = [ + # dates to test for gt/lt comparison: earlier date, later date + # - simple cases: same precision where one date is clearly earlier + (Undate(2022), Undate(2023)), + (Undate(1991, 1), Undate(1991, 5)), + (Undate(1856, 3, 3), Undate(1856, 3, 21)), + # - mixed precision where one date is clearly earlier + (Undate(1991, 1), Undate(2000)), + (Undate(1856, 3, 3), Undate(1901)), + # partially known digits where comparison is possible + (Undate("19XX"), Undate("20XX")), + (Undate(1900, "0X"), Undate(1900, "1X")), + # compare with datetime.date objects + (Undate("19XX"), date(2020, 1, 1)), + (Undate(1991, 1), date(1992, 3, 4)), + ] + + @pytest.mark.parametrize("earlier,later", testdata_lt_gt) + def test_lt(self, earlier, later): + assert earlier < later + assert later > earlier + + testdata_lte_gte = testdata_lt_gt.copy() + # add a few exactly equal cases + testdata_lte_gte.extend( + [ + (Undate(1601), Undate(1601)), + (Undate(1991, 1), Undate(1991, 1)), + (Undate(1492, 5, 3), Undate(1492, 5, 3)), + # compare with datetime.date also + (Undate(1492, 5, 3), date(1492, 5, 3)), + ] + ) + + def test_lt_when_eq(self): + # strict less than / greater should return false when equal + assert not Undate(1900) > Undate(1900) + assert not Undate(1900) < Undate(1900) + # same for datetime.date + assert not Undate(1903, 1, 5) < date(1903, 1, 5) + assert not Undate(1903, 1, 5) > date(1903, 1, 5) + + @pytest.mark.parametrize("earlier,later", testdata_lte_gte) + def test_lte(self, earlier, later): + assert earlier <= later + assert later >= earlier + + def test_lt_notimplemented(self): + # how to compare mixed precision where dates overlap? + # if the second date falls *within* earliest/latest, + # then it is not clearly less; not implemented? + with pytest.raises(NotImplementedError, match="date falls within the other"): + assert Undate(2022) < Undate(2022, 5) + + # same if we attempt to compare in the other direction + with pytest.raises(NotImplementedError, match="date falls within the other"): + assert Undate(2022, 5) < Undate(2022) + + testdata_contains = [ + # first date falls within the range of the other + # dates within range: middle, start, end, varying precision + (Undate(2022, 6), Undate(2022)), + (Undate(2022, 1, 1), Undate(2022)), + (Undate(2022, 12, 31), Undate(2022)), + (Undate(2022, 6, 15), Undate(2022, 6)), + # support contains with datetime.date + (date(2022, 6, 1), Undate(2022)), + (date(2022, 6, 1), Undate(2022, 6)), + ] + + @pytest.mark.parametrize("date1,date2", testdata_contains) + def test_contains(self, date1, date2): + assert date1 in date2 + + testdata_not_contains = [ + # dates not in range + (Undate(1980), Undate(2020)), + (Undate(1980), Undate(2020, 6)), + (Undate(1980, 6), Undate(2020, 6)), + # support contains with datetime.date + (date(1980, 6, 1), Undate(2022)), + (date(3001, 6, 1), Undate(2022, 6)), + # partially known dates that are similar but same precision, + # so one does not contain the other + (Undate("199X"), Undate("19XX")), + # - specific month to unknown month + (Undate(1980, 6), Undate(1980, "XX")), + # some of these might overlap, but we don't have enough + # information to determine + # - unknown month to unknown month + (Undate(1980, "XX"), Undate(1980, "XX")), + # - partially unknown month to unknown month + (Undate(1801, "1X"), Undate(1801, "XX")), + ] + + @pytest.mark.parametrize("date1,date2", testdata_not_contains) + def test_not_contains(self, date1, date2): + assert date1 not in date2 + + def test_sorting(self): + # sorting should be possible based on gt/lt + # test simple cases for sorting + d1980 = Undate(1980) + d2002_10 = Undate(2002, 10) + d2002_12 = Undate(2002, 12) + d2012_05_01 = Undate(2012, 5, 1) + + assert sorted([d2012_05_01, d2002_12, d2002_10, d1980]) == [ + d1980, + d2002_10, + d2002_12, + d2012_05_01, + ] + + # what about semi-ambigous cases? + d1991_XX = Undate(1991, "XX") + d1992_01_XX = Undate(1992, 1, "XX") + assert sorted([d1992_01_XX, d1991_XX, d1980]) == [d1980, d1991_XX, d1992_01_XX] + + # what about things we can't compare? + d1991 = Undate(1991) + d1991_02 = Undate(1991, 2) + # for now, this will raise a not implemented error + with pytest.raises(NotImplementedError): + sorted([d1991_02, d1991, d1991_XX]) + + # TODO: partially known year? + # someyear = Undate("1XXX") + # assert sorted([d1991, someyear]) == [someyear, d1991] + def test_duration(self): day_duration = Undate(2022, 11, 7).duration() assert isinstance(day_duration, timedelta)