From a43b775248cb625bc97ec9e5e4f62d1a06645ed2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Mar 2025 19:10:59 -0500 Subject: [PATCH 1/4] Preliminary work to use uncertainties ufloat for uncertain values --- pyproject.toml | 2 +- src/undate/date.py | 9 +++++++++ src/undate/undate.py | 4 ++++ tests/test_date.py | 11 ++++++++++- tests/test_undate.py | 8 +++++--- 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f1ad9a7..68accce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" license = { text = "Apache-2" } requires-python = ">= 3.9" dynamic = ["version"] -dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] +dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'", "uncertainties"] authors = [ { name = "Rebecca Sutton Koeser" }, { name = "Cole Crawford" }, diff --git a/src/undate/date.py b/src/undate/date.py index 27f6efa..b184ed3 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -1,9 +1,11 @@ from enum import IntEnum +from dataclasses import dataclass # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Optional, Union import numpy as np +from uncertainties import ufloat class Timedelta(np.ndarray): @@ -29,6 +31,13 @@ def days(self) -> int: return int(self.astype("datetime64[D]").astype("int")) +@dataclass +class Udelta: + days: ufloat + # def __init__(self, deltadays: ufloat): + # self.days = deltadays + + #: timedelta for single day ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1) #: timedelta for a single year (non-leap year) diff --git a/src/undate/undate.py b/src/undate/undate.py index 2008914..398718c 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -442,6 +442,10 @@ def duration(self) -> Timedelta: # a single year latest = Date(self.earliest.year, self.latest.month, self.latest.day) + # TODO: calculate duration for a leap year and a non-leap year, + # then return a udelta if they vary + # TODO: how does this logic work for other calendars? + # latest = datetime.date( # self.earliest.year, self.latest.month, self.latest.day # ) diff --git a/tests/test_date.py b/tests/test_date.py index 5ff017d..9d1c095 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -1,5 +1,7 @@ import numpy as np -from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta +from uncertainties import ufloat + +from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta, Udelta class TestDatePrecision: @@ -77,3 +79,10 @@ def test_init_from_np_timedelta64(self): def test_days(self): assert Timedelta(10).days == 10 + + +class TestUdelta: + def test_init(self): + february_days = ufloat(28.5, 0.5) # 28 or 29 + udelt = Udelta(february_days) + assert udelt.days == february_days diff --git a/tests/test_undate.py b/tests/test_undate.py index 8f8a5c8..e0e51f9 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,6 +1,7 @@ from datetime import date import pytest +from uncertainties import ufloat from undate import Undate, UndateInterval, Calendar from undate.converters.base import BaseCalendarConverter @@ -383,10 +384,11 @@ def test_partiallyknown_duration(self): # month in unknown year assert Undate(month=6).duration().days == 30 # partially known month - assert Undate(year=1900, month="1X").duration().days == 31 + # 1X = October, November, or December = 30 or 31 days + assert Undate(year=1900, month="1X").duration().days == ufloat(30.5, 0.5) # what about february? - # could vary with leap years, but assume non-leapyear - assert Undate(month=2).duration().days == 28 + # could vary with leap years; either 28 or 29 days + assert Undate(month=2).duration().days == ufloat(28.5, 0.5) def test_known_year(self): assert Undate(2022).known_year is True From da82917e7ecf27b56090ca035e463e64ef312dba Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 17:57:56 -0400 Subject: [PATCH 2/4] Use Udelta and ufloat for uncertain durations --- src/undate/converters/calendars/gregorian.py | 5 +- src/undate/date.py | 18 ++++++- src/undate/undate.py | 51 +++++++++++++++----- tests/test_date.py | 9 +++- tests/test_undate.py | 26 +++++++--- 5 files changed, 85 insertions(+), 24 deletions(-) diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 5a1d2dc..4843e24 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter): #: calendar calendar_name: str = "Gregorian" - #: known non-leap year + #: arbitrary known non-leap year NON_LEAP_YEAR: int = 2022 + #: arbitrary known leap year + LEAP_YEAR: int = 2024 def min_month(self) -> int: """First month for the Gregorian calendar.""" @@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int: _, max_day = monthrange(year, month) else: # if year and month are unknown, return maximum possible + # TODO: should this return a ufloat? max_day = 31 return max_day diff --git a/src/undate/date.py b/src/undate/date.py index b184ed3..11194d1 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -33,9 +33,23 @@ def days(self) -> int: @dataclass class Udelta: + """An uncertain timedelta, for durations where the number of days is uncertain. + Initialize with a list of possible day durations as integers, which are used + to calculate a value for duration in :attr:`days` as an + instance of :class:`uncertainties.ufloat`. + """ + + # NOTE: we will probably need other timedelta-like logic here besides days... + + #: number of days, as an instance of :class:`uncertainties.ufloat` days: ufloat - # def __init__(self, deltadays: ufloat): - # self.days = deltadays + + def __init__(self, *days: int): + min_days = min(days) + max_days = max(days) + half_diff = (max_days - min_days) / 2 + midpoint = min_days + half_diff + self.days = ufloat(midpoint, half_diff) #: timedelta for single day diff --git a/src/undate/undate.py b/src/undate/undate.py index 398718c..b457d1b 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -18,7 +18,7 @@ from typing import Dict, Optional, Union from undate.converters.base import BaseDateConverter -from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta +from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta, Udelta class Calendar(StrEnum): @@ -420,13 +420,14 @@ def _get_date_part(self, part: str) -> Optional[str]: value = self.initial_values.get(part) return str(value) if value else None - def duration(self) -> Timedelta: + def duration(self) -> Timedelta | Udelta: """What is the duration of this date? Calculate based on earliest and latest date within range, taking into account the precision of the date even if not all parts of the date are known. Note that durations are inclusive (i.e., a closed interval) and include both the earliest and latest - date rather than the difference between them.""" + date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when + possible, and an :class:`undate.date.Udelta` when the duration is uncertain.""" # if precision is a single day, duration is one day # no matter when it is or what else is known @@ -437,24 +438,48 @@ def duration(self) -> Timedelta: # calculate month duration within a single year (not min/max) if self.precision == DatePrecision.MONTH: latest = self.latest + # if year is unknown, calculate month duration in + # leap year and non-leap year, in case length varies if not self.known_year: - # if year is unknown, calculate month duration in - # a single year - latest = Date(self.earliest.year, self.latest.month, self.latest.day) + # TODO: should leap-year specific logic shift to the calendars, + # since it works differently depending on the calendar? + possible_years = [ + self.calendar_converter.LEAP_YEAR, + self.calendar_converter.NON_LEAP_YEAR, + ] + # TODO: what about partially known years like 191X ? + else: + # otherwise, get possible durations for all possible months + # for a known year + possible_years = [self.earliest.year] + + # for every possible month and year, get max days for that month, + possible_max_days = set() + # appease mypy, which says month values could be None here + if self.earliest.month is not None and self.latest.month is not None: + for possible_month in range(self.earliest.month, self.latest.month + 1): + for year in possible_years: + possible_max_days.add( + self.calendar_converter.max_day(year, possible_month) + ) + + # if there is more than one possible value for month length, + # whether due to leap year / non-leap year or ambiguous month, + # return a uncertain delta + if len(possible_max_days) > 1: + return Udelta(*possible_max_days) + + # otherwise, calculate timedelta normally + max_day = list(possible_max_days)[0] + latest = Date(self.earliest.year, self.earliest.month, max_day) - # TODO: calculate duration for a leap year and a non-leap year, - # then return a udelta if they vary - # TODO: how does this logic work for other calendars? - - # latest = datetime.date( - # self.earliest.year, self.latest.month, self.latest.day - # ) delta = latest - self.earliest + ONE_DAY # month duration can't ever be more than 31 days # (could we ever know if it's smaller?) # if granularity == month but not known month, duration = 31 if delta.astype(int) > 31: + # FIXME: this depends on calendar! return ONE_MONTH_MAX return delta diff --git a/tests/test_date.py b/tests/test_date.py index 9d1c095..3dcf2ae 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -83,6 +83,11 @@ def test_days(self): class TestUdelta: def test_init(self): + # february in an unknown year in Gregorian calendar could be 28 or 29 days february_days = ufloat(28.5, 0.5) # 28 or 29 - udelt = Udelta(february_days) - assert udelt.days == february_days + udelt = Udelta(28, 29) + # two ufloat values don't actually compare as equal, due to the variance + assert udelt != february_days + # so inspect the expected values + assert udelt.days.nominal_value == 28.5 + assert udelt.days.std_dev == 0.5 diff --git a/tests/test_undate.py b/tests/test_undate.py index e0e51f9..ef03115 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,11 +1,10 @@ from datetime import date import pytest -from uncertainties import ufloat from undate import Undate, UndateInterval, Calendar from undate.converters.base import BaseCalendarConverter -from undate.date import DatePrecision, Timedelta +from undate.date import DatePrecision, Timedelta, Udelta class TestUndate: @@ -385,10 +384,25 @@ def test_partiallyknown_duration(self): assert Undate(month=6).duration().days == 30 # partially known month # 1X = October, November, or December = 30 or 31 days - assert Undate(year=1900, month="1X").duration().days == ufloat(30.5, 0.5) - # what about february? - # could vary with leap years; either 28 or 29 days - assert Undate(month=2).duration().days == ufloat(28.5, 0.5) + # should return a Udelta object + unknown_month_duration = Undate(year=1900, month="1X").duration() + assert isinstance(unknown_month_duration, Udelta) + assert unknown_month_duration.days.nominal_value == 30.5 + assert unknown_month_duration.days.std_dev == 0.5 + + # completely unknown month should also return a Udelta object + unknown_month_duration = Undate(year=1900, month="XX").duration() + assert isinstance(unknown_month_duration, Udelta) + # possible range is 28 to 31 days + assert unknown_month_duration.days.nominal_value == 29.5 + assert unknown_month_duration.days.std_dev == 1.5 + + # the number of days in feburary of an unknow year is uncertain, since + # it could vary with leap years; either 28 or 29 days + feb_duration = Undate(month=2).duration() + assert isinstance(feb_duration, Udelta) + assert feb_duration.days.nominal_value == 28.5 + assert feb_duration.days.std_dev == 0.5 def test_known_year(self): assert Undate(2022).known_year is True From 0be9a768a8fe4467506ba84839a308560417a52b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 18:08:16 -0400 Subject: [PATCH 3/4] Add comment to skip type checking on untyped uncertainties library --- src/undate/date.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/undate/date.py b/src/undate/date.py index 11194d1..7b68577 100644 --- a/src/undate/date.py +++ b/src/undate/date.py @@ -5,7 +5,7 @@ from typing import Optional, Union import numpy as np -from uncertainties import ufloat +from uncertainties import ufloat # type: ignore class Timedelta(np.ndarray): From 34c31ebf26817cf513c257c380b8f1475b68d611 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 22:28:10 -0400 Subject: [PATCH 4/4] Add todo for uncertain interval durations --- src/undate/interval.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33ec200..eecb0ae 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -104,6 +104,11 @@ def duration(self) -> Timedelta: elif not self.latest.known_year and not self.earliest.known_year: # under what circumstances can we assume that if both years # are unknown the dates are in the same year or sequential? + + # TODO: for Gregorian calendars, if this interval spans end + # of February we should return a udelta object since the interval + # may or may not include February 29 + duration = self.latest.earliest - self.earliest.earliest # if we get a negative, we've wrapped from end of one year # to the beginning of the next;