From 0ea001e94120c6e7cfb8fcbd7fb573bf169f51cb Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Mar 2025 19:10:59 -0500 Subject: [PATCH 1/3] Implement UnInt and UnDelta for uncertain date durations Based on previous experiments with uncertainties and portion libraries and feedback from @taylor-arnold --- src/undate/converters/calendars/gregorian.py | 5 +- src/undate/date.py | 137 +++++++++++++++++- src/undate/undate.py | 47 +++++-- tests/test_date.py | 140 ++++++++++++++++++- tests/test_undate.py | 23 ++- 5 files changed, 335 insertions(+), 17 deletions(-) diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 5a1d2dc..c0e0a19 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter): #: calendar calendar_name: str = "Gregorian" - #: known non-leap year + #: arbitrary known non-leap year NON_LEAP_YEAR: int = 2022 + #: arbitrary known leap year + LEAP_YEAR: int = 2024 def min_month(self) -> int: """First month for the Gregorian calendar.""" @@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int: _, max_day = monthrange(year, month) else: # if year and month are unknown, return maximum possible + # TODO: should this return an IntervalRange? max_day = 31 return max_day diff --git a/src/undate/date.py b/src/undate/date.py index 27f6efa..10d9776 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -1,7 +1,9 @@ from enum import IntEnum +from dataclasses import dataclass, replace +import operator # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None -from typing import Optional, Union +from typing import Optional, Union, Iterable import numpy as np @@ -29,6 +31,139 @@ def days(self) -> int: return int(self.astype("datetime64[D]").astype("int")) +@dataclass +class UnInt: + lower: int + upper: int + + def __post_init__(self): + # validate that lower value is less than upper + if not self.lower < self.upper: + raise ValueError( + f"Lower value ({self.lower}) must be less than upper ({self.upper})" + ) + + def __iter__(self) -> Iterable: + # yield all integers in range from lower to upper, inclusive + yield from range(self.lower, self.upper + 1) + + def __gt__(self, other: object) -> bool: + match other: + case int(): + return self.upper > other + case UnInt(): + return self.upper > other.lower + case _: + return NotImplemented + + def __lt__(self, other: object) -> bool: + match other: + case int(): + return self.upper < other + case UnInt(): + return self.upper < other.lower + case _: + return NotImplemented + + def __contains__(self, other: object) -> bool: + match other: + case int(): + return other >= self.lower and other <= self.upper + case UnInt(): + return other.lower >= self.lower and other.upper <= self.upper + case _: + # unsupported type: return false + return False + + def _replace_with(self, other_lower, other_upper, op): + """Create and return a new instance of UnInt using the specified + operator (e.g. add, subtract) and other values to modify the values in + the current UnInt instance.""" + return replace( + self, lower=op(self.lower, other_lower), upper=op(self.upper, other_upper) + ) + + def __add__(self, other: object) -> bool: + match other: + case int(): + # increase both values by the added amount + add_values = (other, other) + case UnInt(): + # subtract the upper and lower values by the other lower and upper + # to include the largest range of possible values + # (when calculating with uncertain values, the uncertainty increases) + add_values = (other.lower, other.upper) + case _: + return NotImplemented + + return self._replace_with(*add_values, operator.add) + + def __sub__(self, other): + match other: + case int(): + # decrease both values by the subtracted amount + sub_values = (other, other) + case UnInt(): + # to determine the largest range of possible values, + # subtract the other upper value from current lower + # and other lower value from current upper + sub_values = (other.upper, other.lower) + case _: + return NotImplemented + + return self._replace_with(*sub_values, operator.sub) + + +@dataclass +class UnDelta: + """ + An uncertain timedelta, for durations where the number of days is uncertain. + Initialize with a list of possible durations in days as integers, which are used + to calculate a value for duration in :attr:`days` as an + instance of :class:`UnInt`. + """ + + # NOTE: we will probably need other timedelta-like logic here besides days... + + #: possible durations days, as an instance of :class:`UnInt` + days: UnInt + + def __init__(self, *days: int): + if len(days) < 2: + raise ValueError( + "Must specify at least two values for an uncertain duration" + ) + self.days = UnInt(min(days), max(days)) + + def __repr__(self): + # customize string representation for simpler notation; default + # specifies full UnInt initialization with upper and lower keywords + return f"{self.__class__.__name__}(days=[{self.days.lower},{self.days.upper}])" + + # TODO: what does equality for an uncertain range mean? + # is an uncertain range ever equal to another uncertain range? + + def __eq__(self, other: object) -> bool: + # is an uncertain duration ever *equal* another, even if the values are the same? + if other is self: + return True + return False + + def __lt__(self, other: object) -> bool: + match other: + case Timedelta() | UnDelta(): + return self.days < other.days + case _: + return NotImplemented + + def __gt__(self, other: object) -> bool: + match other: + case Timedelta() | UnDelta(): + return self.days > other.days + case _: + return NotImplemented + + #: timedelta for single day ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1) #: timedelta for a single year (non-leap year) diff --git a/src/undate/undate.py b/src/undate/undate.py index be4454a..74e27d1 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -20,7 +20,7 @@ from typing import Dict, Optional, Union from undate.converters.base import BaseDateConverter -from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta +from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta, UnDelta class Calendar(StrEnum): @@ -439,13 +439,14 @@ def _get_date_part(self, part: str) -> Optional[str]: value = self.initial_values.get(part) return str(value) if value else None - def duration(self) -> Timedelta: + def duration(self) -> Timedelta | UnDelta: """What is the duration of this date? Calculate based on earliest and latest date within range, taking into account the precision of the date even if not all parts of the date are known. Note that durations are inclusive (i.e., a closed interval) and include both the earliest and latest - date rather than the difference between them.""" + date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when + possible, and an :class:`undate.date.UnDelta` when the duration is uncertain.""" # if precision is a single day, duration is one day # no matter when it is or what else is known @@ -456,20 +457,48 @@ def duration(self) -> Timedelta: # calculate month duration within a single year (not min/max) if self.precision == DatePrecision.MONTH: latest = self.latest + # if year is unknown, calculate month duration in + # leap year and non-leap year, in case length varies if not self.known_year: - # if year is unknown, calculate month duration in - # a single year - latest = Date(self.earliest.year, self.latest.month, self.latest.day) + # TODO: should leap-year specific logic shift to the calendars, + # since it works differently depending on the calendar? + possible_years = [ + self.calendar_converter.LEAP_YEAR, + self.calendar_converter.NON_LEAP_YEAR, + ] + # TODO: what about partially known years like 191X ? + else: + # otherwise, get possible durations for all possible months + # for a known year + possible_years = [self.earliest.year] + + # for every possible month and year, get max days for that month, + possible_max_days = set() + # appease mypy, which says month values could be None here + if self.earliest.month is not None and self.latest.month is not None: + for possible_month in range(self.earliest.month, self.latest.month + 1): + for year in possible_years: + possible_max_days.add( + self.calendar_converter.max_day(year, possible_month) + ) + + # if there is more than one possible value for month length, + # whether due to leap year / non-leap year or ambiguous month, + # return an uncertain delta + if len(possible_max_days) > 1: + return UnDelta(*possible_max_days) + + # otherwise, calculate timedelta normally + max_day = list(possible_max_days)[0] + latest = Date(self.earliest.year, self.earliest.month, max_day) - # latest = datetime.date( - # self.earliest.year, self.latest.month, self.latest.day - # ) delta = latest - self.earliest + ONE_DAY # month duration can't ever be more than 31 days # (could we ever know if it's smaller?) # if granularity == month but not known month, duration = 31 if delta.astype(int) > 31: + # FIXME: this depends on calendar! return ONE_MONTH_MAX return delta diff --git a/tests/test_date.py b/tests/test_date.py index 5ff017d..2044b4f 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -1,5 +1,16 @@ import numpy as np -from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta +import pytest + +from undate.date import ( + ONE_DAY, + ONE_YEAR, + ONE_MONTH_MAX, + Date, + DatePrecision, + Timedelta, + UnDelta, + UnInt, +) class TestDatePrecision: @@ -77,3 +88,130 @@ def test_init_from_np_timedelta64(self): def test_days(self): assert Timedelta(10).days == 10 + + +class TestUnInt: + def test_init(self): + february_days = UnInt(28, 29) # 28 or 29 + assert february_days.lower == 28 + assert february_days.upper == 29 + + # also supports keyword args + anymonth_days = UnInt(lower=28, upper=31) + assert anymonth_days.lower == 28 + assert anymonth_days.upper == 31 + + def test_init_validation(self): + with pytest.raises( + ValueError, match=r"Lower value \(10\) must be less than upper \(4\)" + ): + UnInt(10, 4) + + def test_contains(self): + anymonth_days = UnInt(lower=28, upper=31) + # integer + assert 28 in anymonth_days + assert 29 in anymonth_days + assert 31 in anymonth_days + assert 32 not in anymonth_days + # unint + assert UnInt(28, 29) in anymonth_days + + # other types are assumed not in range + assert "twenty-eight" not in anymonth_days + + def test_iterable(self): + anymonth_days = UnInt(lower=28, upper=31) + assert list(anymonth_days) == [28, 29, 30, 31] + + def test_add(self): + february_days = UnInt(28, 29) + # add integer + assert february_days + 1 == UnInt(29, 30) + # add UnInt - minimum is 28 + 1, maximum is 29 + 2 + assert february_days + UnInt(1, 2) == UnInt(29, 31) + # other types are not supported + with pytest.raises(TypeError, match="unsupported operand"): + february_days + "two" + + def test_subtract(self): + february_days = UnInt(28, 29) + # subtract integer + assert february_days - 10 == UnInt(18, 19) + # subtract UnInt - minimum is lower - largest value, maximum is upper - smallest value + # difference between number of days in any month and the month of February? + # [28,31] - [28,29] = [-1, 3] + anymonth_days = UnInt(lower=28, upper=31) + assert anymonth_days - february_days == UnInt(-1, 3) + # what if we go the other direction? + assert february_days - anymonth_days == UnInt(-3, 1) + # other types are not supported + with pytest.raises(TypeError, match="unsupported operand"): + february_days - "two" + + +class TestUnDelta: + def test_init(self): + # February in an unknown year in Gregorian calendar could be 28 or 29 days + february_days = UnInt(28, 29) # 28 or 29 + udelt = UnDelta(28, 29) + assert isinstance(udelt.days, UnInt) + assert udelt.days.lower == 28 + assert udelt.days.upper == 29 + + # NOTE: default portion interval comparison may not be what we want here, + # since this is an unknown value within the range... + # (maybe handled in undelta class comparison methods) + assert udelt.days == february_days + + # do the right thing with more than one value, out of order + unknown_month_duration = UnDelta(30, 31, 28) + assert isinstance(unknown_month_duration.days, UnInt) + assert unknown_month_duration.days.lower == 28 + assert unknown_month_duration.days.upper == 31 + + def test_init_validation(self): + with pytest.raises(ValueError, match="Must specify at least two values"): + UnDelta(10) + + def test_repr(self): + # customized string representation + assert repr(UnDelta(28, 29)) == "UnDelta(days=[28,29])" + + def test_eq(self): + # uncertain deltas are not equivalent + udelt1 = UnDelta(30, 31) + udelt2 = UnDelta(30, 31) + # not equal to equivalent undelta range + assert udelt1 != udelt2 + # equal to self + assert udelt1 is udelt1 + + def test_lt(self): + week_or_tenday = UnDelta(7, 10) + # compare undelta with undelta + month = UnDelta(28, 31) + # a week or ten-day is unambiguously less than a month + assert week_or_tenday < month + # compare undelta with Timedelta + # NOTE: currently requires this direction, until we update Timedelta + assert not week_or_tenday < ONE_DAY + # an uncertain month is unambiguously less than a year + assert month < ONE_YEAR + # an uncertain month may or may not be less than one month max + assert not month < ONE_MONTH_MAX + + def test_gt(self): + week_or_tenday = UnDelta(7, 10) + # compare undelta with undelta + month = UnDelta(28, 31) + # a month is unambiguously longer than week or ten-day + assert month > week_or_tenday + # compare undelta with Timedelta + # NOTE: currently requires this direction, until we update Timedelta + # to support the reverse comparison + assert week_or_tenday > ONE_DAY + # an uncertain month is not greater than a year + assert not month > ONE_YEAR + # an uncertain month may or may not be greater than one month max + assert not month > ONE_MONTH_MAX diff --git a/tests/test_undate.py b/tests/test_undate.py index 18e03b0..6dc90b9 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -4,7 +4,7 @@ from undate import Undate, UndateInterval, Calendar from undate.converters.base import BaseCalendarConverter -from undate.date import Date, DatePrecision, Timedelta +from undate.date import Date, DatePrecision, Timedelta, UnDelta, UnInt class TestUndate: @@ -404,10 +404,23 @@ def test_partiallyknown_duration(self): # month in unknown year assert Undate(month=6).duration().days == 30 # partially known month - assert Undate(year=1900, month="1X").duration().days == 31 - # what about february? - # could vary with leap years, but assume non-leapyear - assert Undate(month=2).duration().days == 28 + # 1X = October, November, or December = 30 or 31 days + # should return a Undelta object + unknown_month_duration = Undate(year=1900, month="1X").duration() + assert isinstance(unknown_month_duration, UnDelta) + assert unknown_month_duration.days == UnInt(30, 31) + + # completely unknown month should also return a Undelta object + unknown_month_duration = Undate(year=1900, month="XX").duration() + assert isinstance(unknown_month_duration, UnDelta) + # possible range is 28 to 31 days + assert unknown_month_duration.days == UnInt(28, 31) + + # the number of days in February of an unknown year is uncertain, since + # it could vary with leap years; either 28 or 29 days + feb_duration = Undate(month=2).duration() + assert isinstance(feb_duration, UnDelta) + assert feb_duration.days == UnInt(28, 29) def test_known_year(self): assert Undate(2022).known_year is True From 4e5b243c7e48dc7c437bfe8440ed04886891b002 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 9 May 2025 09:58:44 -0400 Subject: [PATCH 2/3] Add a docstring describing UnInlt --- src/undate/date.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/undate/date.py b/src/undate/date.py index 10d9776..450360d 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -33,6 +33,13 @@ def days(self) -> int: @dataclass class UnInt: + """An uncertain integer intended for use with uncertain durations (:class:`UnDelta`), + to convey a range of possible integer values between an upper + and lower bound (inclusive). Supports comparison, addition and subtraction, + checking if a value is included in the range, and iterating over numbers + included in the range. + """ + lower: int upper: int From 20a9f8df5e5583a660b63129af16c72994bfb9eb Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 9 May 2025 14:25:34 -0400 Subject: [PATCH 3/3] Fix comparison methods and add tests; incorporate @coderabbitai comments --- src/undate/date.py | 22 ++++++++++------------ tests/test_date.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/undate/date.py b/src/undate/date.py index 450360d..2a9fa98 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -57,9 +57,9 @@ def __iter__(self) -> Iterable: def __gt__(self, other: object) -> bool: match other: case int(): - return self.upper > other + return self.lower > other case UnInt(): - return self.upper > other.lower + return self.lower > other.upper case _: return NotImplemented @@ -90,14 +90,14 @@ def _replace_with(self, other_lower, other_upper, op): self, lower=op(self.lower, other_lower), upper=op(self.upper, other_upper) ) - def __add__(self, other: object) -> bool: + def __add__(self, other: object) -> "UnInt": match other: case int(): # increase both values by the added amount add_values = (other, other) case UnInt(): - # subtract the upper and lower values by the other lower and upper - # to include the largest range of possible values + # add other lower value to current lower and other upper + # to current upper to include the largest range of possible values # (when calculating with uncertain values, the uncertainty increases) add_values = (other.lower, other.upper) case _: @@ -105,7 +105,7 @@ def __add__(self, other: object) -> bool: return self._replace_with(*add_values, operator.add) - def __sub__(self, other): + def __sub__(self, other) -> "UnInt": match other: case int(): # decrease both values by the subtracted amount @@ -147,14 +147,12 @@ def __repr__(self): # specifies full UnInt initialization with upper and lower keywords return f"{self.__class__.__name__}(days=[{self.days.lower},{self.days.upper}])" - # TODO: what does equality for an uncertain range mean? - # is an uncertain range ever equal to another uncertain range? - def __eq__(self, other: object) -> bool: # is an uncertain duration ever *equal* another, even if the values are the same? - if other is self: - return True - return False + # for now, make the assumption that we only want identity equality + # and not value equality; perhaps in future we can revisit + # or add functions to check value equality / equivalence / similarity + return other is self def __lt__(self, other: object) -> bool: match other: diff --git a/tests/test_date.py b/tests/test_date.py index 2044b4f..fb38283 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -120,6 +120,34 @@ def test_contains(self): # other types are assumed not in range assert "twenty-eight" not in anymonth_days + def test_gt(self): + ten_twelve = UnInt(10, 12) + # compare with integer + assert 13 > ten_twelve + assert not 12 > ten_twelve + assert not 9 > ten_twelve + # compare with unint + assert UnInt(13, 23) > ten_twelve + assert not UnInt(12, 24) > ten_twelve + assert not UnInt(2, 4) > ten_twelve + # unsupported type + with pytest.raises(TypeError): + ten_twelve > "three" + + def test_lt(self): + ten_twelve = UnInt(10, 12) + # compare with integer + assert 9 < ten_twelve + assert not 12 < ten_twelve + assert not 13 < ten_twelve + # compare with unint + assert UnInt(2, 4) < ten_twelve + assert not UnInt(12, 24) < ten_twelve + assert not UnInt(13, 23) < ten_twelve + # unsupported type + with pytest.raises(TypeError): + ten_twelve < "three" + def test_iterable(self): anymonth_days = UnInt(lower=28, upper=31) assert list(anymonth_days) == [28, 29, 30, 31]