From be9d92c85928b88d3cd3fa6aeed232d9fef84c58 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 25 Apr 2025 14:53:01 -0400 Subject: [PATCH 1/5] Add portions to dependencies for variable/ambiguous durations --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2dc6515..0abb5e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" license = { text = "Apache-2" } requires-python = ">= 3.10" dynamic = ["version"] -dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] +dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'", "portion"] authors = [ { name = "Rebecca Sutton Koeser" }, { name = "Cole Crawford" }, From d564516f8375ed2c9b2fa85db7eee5c400ed5ab6 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Mar 2025 19:10:59 -0500 Subject: [PATCH 2/5] Preliminary implementation of Udelta (uncertain duration) with portion --- src/undate/date.py | 28 ++++++++++++++++++++++++++++ src/undate/undate.py | 4 ++++ tests/test_date.py | 33 ++++++++++++++++++++++++++++++++- tests/test_undate.py | 11 ++++++----- 4 files changed, 70 insertions(+), 6 deletions(-) diff --git a/src/undate/date.py b/src/undate/date.py index 27f6efa..0373dc0 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -1,9 +1,11 @@ from enum import IntEnum +from dataclasses import dataclass # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Optional, Union import numpy as np +import portion class Timedelta(np.ndarray): @@ -29,6 +31,32 @@ def days(self) -> int: return int(self.astype("datetime64[D]").astype("int")) +class IntegerRange(portion.AbstractDiscreteInterval): + """Range of integer values. Implemented as a closed integer interval, + subclass of :class:`portion.AbstractDiscreteInterval` with a + step of 1. + + Initialize by passing in the lower (min) and upper (max) values + included in the range. + + """ + + _step = 1 + + def __init__(self, lower: int, upper: int): + # base init method takes one or more intervals; we want a single closed interval + if not lower < upper: + raise ValueError(f"Lower value {lower} must be less than upper {upper}") + super().__init__(portion.closed(lower, upper)) + + +@dataclass +class Udelta: + days: IntegerRange + # def __init__(self, deltadays: ufloat): + # self.days = deltadays + + #: timedelta for single day ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1) #: timedelta for a single year (non-leap year) diff --git a/src/undate/undate.py b/src/undate/undate.py index be4454a..217236f 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -461,6 +461,10 @@ def duration(self) -> Timedelta: # a single year latest = Date(self.earliest.year, self.latest.month, self.latest.day) + # TODO: calculate duration for a leap year and a non-leap year, + # then return a udelta if they vary + # TODO: how does this logic work for other calendars? + # latest = datetime.date( # self.earliest.year, self.latest.month, self.latest.day # ) diff --git a/tests/test_date.py b/tests/test_date.py index 5ff017d..cb3b3d4 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -1,5 +1,8 @@ import numpy as np -from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta +import portion +import pytest + +from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta, Udelta, IntegerRange class TestDatePrecision: @@ -77,3 +80,31 @@ def test_init_from_np_timedelta64(self): def test_days(self): assert Timedelta(10).days == 10 + + +class TestIntegerRange: + def test_init(self): + february_days = IntegerRange(28, 29) # 28 or 29 + assert february_days.lower == 28 + assert february_days.upper == 29 + assert february_days.left == portion.CLOSED + assert february_days.right == portion.CLOSED + assert 28 in february_days + assert 29 in february_days + assert 30 not in february_days + + def test_init_validation(self): + with pytest.raises(ValueError): + IntegerRange(10, 4) + + # TODO: test/implement comparisons + # NOTE: this results in a deprecation warning; + # implement conversion to singleton in the class? + # assert 30 > february_days + + +class TestUdelta: + def test_init(self): + february_days = IntegerRange(28, 29) # 28 or 29 + udelt = Udelta(february_days) + assert udelt.days == february_days diff --git a/tests/test_undate.py b/tests/test_undate.py index 18e03b0..8e4355d 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -4,7 +4,7 @@ from undate import Undate, UndateInterval, Calendar from undate.converters.base import BaseCalendarConverter -from undate.date import Date, DatePrecision, Timedelta +from undate.date import Date, DatePrecision, Timedelta, IntegerRange class TestUndate: @@ -404,10 +404,11 @@ def test_partiallyknown_duration(self): # month in unknown year assert Undate(month=6).duration().days == 30 # partially known month - assert Undate(year=1900, month="1X").duration().days == 31 - # what about february? - # could vary with leap years, but assume non-leapyear - assert Undate(month=2).duration().days == 28 + # 1X = October, November, or December = 30 or 31 days + assert Undate(year=1900, month="1X").duration().days == IntegerRange(30, 31) + # what about February? + # could vary with leap years; either 28 or 29 days + assert Undate(month=2).duration().days == IntegerRange(28, 29) def test_known_year(self): assert Undate(2022).known_year is True From 710f7471b750048b7f5914d965c847e9732e1075 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 17:57:56 -0400 Subject: [PATCH 3/5] Use Udelta and interval range for uncertain durations --- src/undate/converters/calendars/gregorian.py | 5 +- src/undate/date.py | 15 +++++- src/undate/undate.py | 51 +++++++++++++++----- tests/test_date.py | 16 +++++- tests/test_undate.py | 22 +++++++-- 5 files changed, 87 insertions(+), 22 deletions(-) diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 5a1d2dc..4843e24 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter): #: calendar calendar_name: str = "Gregorian" - #: known non-leap year + #: arbitrary known non-leap year NON_LEAP_YEAR: int = 2022 + #: arbitrary known leap year + LEAP_YEAR: int = 2024 def min_month(self) -> int: """First month for the Gregorian calendar.""" @@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int: _, max_day = monthrange(year, month) else: # if year and month are unknown, return maximum possible + # TODO: should this return a ufloat? max_day = 31 return max_day diff --git a/src/undate/date.py b/src/undate/date.py index 0373dc0..3533a3f 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -52,9 +52,20 @@ def __init__(self, lower: int, upper: int): @dataclass class Udelta: + """ + An uncertain timedelta, for durations where the number of days is uncertain. + Initialize with a list of possible durations in days as integers, which are used + to calculate a value for duration in :attr:`days` as an + instance of :class:`IntegerRange`. + """ + + # NOTE: we will probably need other timedelta-like logic here besides days... + + #: number of days, as an instance of :class:`IntegerRange` days: IntegerRange - # def __init__(self, deltadays: ufloat): - # self.days = deltadays + + def __init__(self, *days: int): + self.days = IntegerRange(min(days), max(days)) #: timedelta for single day diff --git a/src/undate/undate.py b/src/undate/undate.py index 217236f..d3e9653 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -20,7 +20,7 @@ from typing import Dict, Optional, Union from undate.converters.base import BaseDateConverter -from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta +from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta, Udelta class Calendar(StrEnum): @@ -439,13 +439,14 @@ def _get_date_part(self, part: str) -> Optional[str]: value = self.initial_values.get(part) return str(value) if value else None - def duration(self) -> Timedelta: + def duration(self) -> Timedelta | Udelta: """What is the duration of this date? Calculate based on earliest and latest date within range, taking into account the precision of the date even if not all parts of the date are known. Note that durations are inclusive (i.e., a closed interval) and include both the earliest and latest - date rather than the difference between them.""" + date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when + possible, and an :class:`undate.date.Udelta` when the duration is uncertain.""" # if precision is a single day, duration is one day # no matter when it is or what else is known @@ -456,24 +457,48 @@ def duration(self) -> Timedelta: # calculate month duration within a single year (not min/max) if self.precision == DatePrecision.MONTH: latest = self.latest + # if year is unknown, calculate month duration in + # leap year and non-leap year, in case length varies if not self.known_year: - # if year is unknown, calculate month duration in - # a single year - latest = Date(self.earliest.year, self.latest.month, self.latest.day) + # TODO: should leap-year specific logic shift to the calendars, + # since it works differently depending on the calendar? + possible_years = [ + self.calendar_converter.LEAP_YEAR, + self.calendar_converter.NON_LEAP_YEAR, + ] + # TODO: what about partially known years like 191X ? + else: + # otherwise, get possible durations for all possible months + # for a known year + possible_years = [self.earliest.year] + + # for every possible month and year, get max days for that month, + possible_max_days = set() + # appease mypy, which says month values could be None here + if self.earliest.month is not None and self.latest.month is not None: + for possible_month in range(self.earliest.month, self.latest.month + 1): + for year in possible_years: + possible_max_days.add( + self.calendar_converter.max_day(year, possible_month) + ) + + # if there is more than one possible value for month length, + # whether due to leap year / non-leap year or ambiguous month, + # return an uncertain delta + if len(possible_max_days) > 1: + return Udelta(*possible_max_days) + + # otherwise, calculate timedelta normally + max_day = list(possible_max_days)[0] + latest = Date(self.earliest.year, self.earliest.month, max_day) - # TODO: calculate duration for a leap year and a non-leap year, - # then return a udelta if they vary - # TODO: how does this logic work for other calendars? - - # latest = datetime.date( - # self.earliest.year, self.latest.month, self.latest.day - # ) delta = latest - self.earliest + ONE_DAY # month duration can't ever be more than 31 days # (could we ever know if it's smaller?) # if granularity == month but not known month, duration = 31 if delta.astype(int) > 31: + # FIXME: this depends on calendar! return ONE_MONTH_MAX return delta diff --git a/tests/test_date.py b/tests/test_date.py index cb3b3d4..a4b654c 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -105,6 +105,20 @@ def test_init_validation(self): class TestUdelta: def test_init(self): + # February in an unknown year in Gregorian calendar could be 28 or 29 days february_days = IntegerRange(28, 29) # 28 or 29 - udelt = Udelta(february_days) + udelt = Udelta(28, 29) + assert isinstance(udelt.days, IntegerRange) + assert udelt.days.lower == 28 + assert udelt.days.upper == 29 + + # NOTE: default portion interval comparison may not be what we want here, + # since this is an unknown value within the range... + # (maybe handled in udelta class comparison methods) assert udelt.days == february_days + + # do the right thing with more than one value, out of order + unknown_month_duration = Udelta(30, 31, 28) + assert isinstance(unknown_month_duration.days, IntegerRange) + assert unknown_month_duration.days.lower == 28 + assert unknown_month_duration.days.upper == 31 diff --git a/tests/test_undate.py b/tests/test_undate.py index 8e4355d..ac9d46c 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -4,7 +4,7 @@ from undate import Undate, UndateInterval, Calendar from undate.converters.base import BaseCalendarConverter -from undate.date import Date, DatePrecision, Timedelta, IntegerRange +from undate.date import Date, DatePrecision, Timedelta, Udelta, IntegerRange class TestUndate: @@ -405,10 +405,22 @@ def test_partiallyknown_duration(self): assert Undate(month=6).duration().days == 30 # partially known month # 1X = October, November, or December = 30 or 31 days - assert Undate(year=1900, month="1X").duration().days == IntegerRange(30, 31) - # what about February? - # could vary with leap years; either 28 or 29 days - assert Undate(month=2).duration().days == IntegerRange(28, 29) + # should return a Udelta object + unknown_month_duration = Undate(year=1900, month="1X").duration() + assert isinstance(unknown_month_duration, Udelta) + assert unknown_month_duration.days == IntegerRange(30, 31) + + # completely unknown month should also return a Udelta object + unknown_month_duration = Undate(year=1900, month="XX").duration() + assert isinstance(unknown_month_duration, Udelta) + # possible range is 28 to 31 days + assert unknown_month_duration.days == IntegerRange(28, 31) + + # the number of days in February of an unknown year is uncertain, since + # it could vary with leap years; either 28 or 29 days + feb_duration = Undate(month=2).duration() + assert isinstance(feb_duration, Udelta) + assert feb_duration.days == IntegerRange(28, 29) def test_known_year(self): assert Undate(2022).known_year is True From 9af31c0f90ec45b481c4af725507485b299d2801 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 25 Apr 2025 16:59:24 -0400 Subject: [PATCH 4/5] Add comparison methods for Udelta class --- src/undate/date.py | 31 ++++++++++++++++++++++- tests/test_date.py | 61 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/undate/date.py b/src/undate/date.py index 3533a3f..db9820b 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -46,7 +46,7 @@ class IntegerRange(portion.AbstractDiscreteInterval): def __init__(self, lower: int, upper: int): # base init method takes one or more intervals; we want a single closed interval if not lower < upper: - raise ValueError(f"Lower value {lower} must be less than upper {upper}") + raise ValueError(f"Lower value ({lower}) must be less than upper ({upper})") super().__init__(portion.closed(lower, upper)) @@ -65,8 +65,37 @@ class Udelta: days: IntegerRange def __init__(self, *days: int): + if len(days) < 2: + raise ValueError( + "Must specify at least two values for an uncertain duration" + ) self.days = IntegerRange(min(days), max(days)) + # TODO: what does equality for an uncertain range mean? + # is an uncertain range ever equal to another uncertain range? + + def __eq__(self, other: object) -> bool: + # is an uncertain duration ever *equal* another, even if the values are the same? + if other is self: + return True + return False + + def __lt__(self, other: object) -> bool: + if isinstance(other, Timedelta): + return self.days < portion.singleton(other.days) + elif isinstance(other, Udelta): + return self.days < other.days + + return NotImplemented + + def __gt__(self, other: object) -> bool: + if isinstance(other, Timedelta): + return self.days > portion.singleton(other.days) + elif isinstance(other, Udelta): + return self.days > other.days + + return NotImplemented + #: timedelta for single day ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1) diff --git a/tests/test_date.py b/tests/test_date.py index a4b654c..e1e6d14 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -2,7 +2,16 @@ import portion import pytest -from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta, Udelta, IntegerRange +from undate.date import ( + ONE_DAY, + ONE_YEAR, + ONE_MONTH_MAX, + Date, + DatePrecision, + Timedelta, + Udelta, + IntegerRange, +) class TestDatePrecision: @@ -94,7 +103,9 @@ def test_init(self): assert 30 not in february_days def test_init_validation(self): - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match=r"Lower value \(10\) must be less than upper \(4\)" + ): IntegerRange(10, 4) # TODO: test/implement comparisons @@ -122,3 +133,49 @@ def test_init(self): assert isinstance(unknown_month_duration.days, IntegerRange) assert unknown_month_duration.days.lower == 28 assert unknown_month_duration.days.upper == 31 + + def test_init_validation(self): + with pytest.raises(ValueError, match="Must specify at least two values"): + Udelta(10) + + def test_repr(self): + # default dataclass repr + assert repr(Udelta(28, 29)) == "Udelta(days=[28,29])" + + def test_eq(self): + # uncertain deltas are not equivalent + udelt1 = Udelta(30, 31) + udelt2 = Udelta(30, 31) + # not equal to equivalent udelta range + assert udelt1 != udelt2 + # equal to self + assert udelt1 is udelt1 + + def test_lt(self): + week_or_tenday = Udelta(7, 10) + # compare udelta with udelta + month = Udelta(28, 31) + # a week or ten-day is unambiguously less than a month + assert week_or_tenday < month + # compare udelta with Timedelta + # NOTE: currently requires this direction, until we update Timedelta + assert not week_or_tenday < ONE_DAY + # an uncertain month is unambiguously less than a year + assert month < ONE_YEAR + # an uncertain month may or may not be less than one month max + assert not month < ONE_MONTH_MAX + + def test_gt(self): + week_or_tenday = Udelta(7, 10) + # compare udelta with udelta + month = Udelta(28, 31) + # a month is unambiguously longer than week or ten-day + assert month > week_or_tenday + # compare udelta with Timedelta + # NOTE: currently requires this direction, until we update Timedelta + # to support the reverse comparison + assert week_or_tenday > ONE_DAY + # an uncertain month is not greater than a year + assert not month > ONE_YEAR + # an uncertain month may or may not be greater than one month max + assert not month > ONE_MONTH_MAX From 2207fc6dcd79abf78b831e9e54d12b22069a928c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 25 Apr 2025 17:31:17 -0400 Subject: [PATCH 5/5] Preliminary interval range add/subtract [doesn't work for all cases] --- src/undate/converters/calendars/gregorian.py | 2 +- src/undate/date.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 4843e24..c0e0a19 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -40,7 +40,7 @@ def max_day(self, year: int, month: int) -> int: _, max_day = monthrange(year, month) else: # if year and month are unknown, return maximum possible - # TODO: should this return a ufloat? + # TODO: should this return an IntervalRange? max_day = 31 return max_day diff --git a/src/undate/date.py b/src/undate/date.py index db9820b..7ac9d81 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -49,6 +49,24 @@ def __init__(self, lower: int, upper: int): raise ValueError(f"Lower value ({lower}) must be less than upper ({upper})") super().__init__(portion.closed(lower, upper)) + def __sub__(self, other): + # portion default subtraction is to remove from the set, but + # we want to decrease the values + if isinstance(other, int): + return IntegerRange(self.lower - other, self.upper - other) + elif isinstance(other, IntegerRange): + # subtract the range respective endpoints + return IntegerRange(self.lower - other.lower, self.upper - other.upper) + + def __add__(self, other): + # portion default subtraction is to remove from the set, but + # we want to decrease the values + if isinstance(other, int): + return IntegerRange(self.lower + other, self.upper + other) + elif isinstance(other, IntegerRange): + # the new range is the smallest possible value to the highest + return IntegerRange(self.lower + other.lower, self.upper - other.upper) + @dataclass class Udelta: