Skip to content

Implement UnInt and UnDelta for uncertain date durations #129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/undate/converters/calendars/gregorian.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter):
#: calendar
calendar_name: str = "Gregorian"

#: known non-leap year
#: arbitrary known non-leap year
NON_LEAP_YEAR: int = 2022
#: arbitrary known leap year
LEAP_YEAR: int = 2024

def min_month(self) -> int:
"""First month for the Gregorian calendar."""
Expand All @@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int:
_, max_day = monthrange(year, month)
else:
# if year and month are unknown, return maximum possible
# TODO: should this return an IntervalRange?
max_day = 31

return max_day
Expand Down
142 changes: 141 additions & 1 deletion src/undate/date.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from enum import IntEnum
from dataclasses import dataclass, replace
import operator

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
from typing import Optional, Union
from typing import Optional, Union, Iterable

import numpy as np

Expand Down Expand Up @@ -29,6 +31,144 @@ def days(self) -> int:
return int(self.astype("datetime64[D]").astype("int"))


@dataclass
class UnInt:
"""An uncertain integer intended for use with uncertain durations (:class:`UnDelta`),
to convey a range of possible integer values between an upper
and lower bound (both inclusive). Supports comparison, addition and subtraction,
checking if a value is included in the range, and iterating over numbers
included in the range.
"""

lower: int
upper: int

def __post_init__(self):
# validate that lower value is less than upper
if not self.lower < self.upper:
raise ValueError(
f"Lower value ({self.lower}) must be less than upper ({self.upper})"
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to allow UnInts for exact values, meaning to allow an UnInt(1,1)? I'm thinking of cases where maybe someone would iterate over an UnInt making the error margin smaller and smaller until it reaches an exact number. Not sure if that's a use case or if this would need to be allowed here, but I thought I raise the question :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for asking, this is why I need someone else thinking about it with me!

I think in that case we would want to return an actual integer but without a use case it's hard for me to think about how we would implement something like that....

Right now the only way the code implements an UnInt is through the uncertain time delta UnDelta object - there are more places I need to integrate it, so maybe as we use it more we'll have more clarity on what functionality we might need.


def __iter__(self) -> Iterable:
# yield all integers in range from lower to upper, inclusive
yield from range(self.lower, self.upper + 1)

def __gt__(self, other: object) -> bool:
match other:
case int():
return self.lower > other
case UnInt():
return self.lower > other.upper
case _:
return NotImplemented

def __lt__(self, other: object) -> bool:
match other:
case int():
return self.upper < other
case UnInt():
return self.upper < other.lower
case _:
return NotImplemented

def __contains__(self, other: object) -> bool:
match other:
case int():
return other >= self.lower and other <= self.upper
case UnInt():
return other.lower >= self.lower and other.upper <= self.upper
case _:
# unsupported type: return false
return False

def _replace_with(self, other_lower, other_upper, op):
"""Create and return a new instance of UnInt using the specified
operator (e.g. add, subtract) and other values to modify the values in
the current UnInt instance."""
return replace(
self, lower=op(self.lower, other_lower), upper=op(self.upper, other_upper)
)

def __add__(self, other: object) -> "UnInt":
match other:
case int():
# increase both values by the added amount
add_values = (other, other)
case UnInt():
# add other lower value to current lower and other upper
# to current upper to include the largest range of possible values
# (when calculating with uncertain values, the uncertainty increases)
add_values = (other.lower, other.upper)
case _:
return NotImplemented

return self._replace_with(*add_values, operator.add)

def __sub__(self, other) -> "UnInt":
match other:
case int():
# decrease both values by the subtracted amount
sub_values = (other, other)
case UnInt():
# to determine the largest range of possible values,
# subtract the other upper value from current lower
# and other lower value from current upper
sub_values = (other.upper, other.lower)
case _:
return NotImplemented

return self._replace_with(*sub_values, operator.sub)


@dataclass
class UnDelta:
"""
An uncertain timedelta, for durations where the number of days is uncertain.
Initialize with a list of possible durations in days as integers, which are used
to calculate a value for duration in :attr:`days` as an
instance of :class:`UnInt`.
"""

# NOTE: we will probably need other timedelta-like logic here besides days...

#: possible durations days, as an instance of :class:`UnInt`
days: UnInt

def __init__(self, *days: int):
if len(days) < 2:
raise ValueError(
"Must specify at least two values for an uncertain duration"
)
self.days = UnInt(min(days), max(days))

def __repr__(self):
# customize string representation for simpler notation; default
# specifies full UnInt initialization with upper and lower keywords
return f"{self.__class__.__name__}(days=[{self.days.lower},{self.days.upper}])"

def __eq__(self, other: object) -> bool:
# is an uncertain duration ever *equal* another, even if the values are the same?
# for now, make the assumption that we only want identity equality
# and not value equality; perhaps in future we can revisit
# or add functions to check value equality / equivalence / similarity
return other is self

def __lt__(self, other: object) -> bool:
match other:
case Timedelta() | UnDelta():
return self.days < other.days
case _:
return NotImplemented

def __gt__(self, other: object) -> bool:
match other:
case Timedelta() | UnDelta():
return self.days > other.days
case _:
return NotImplemented


#: timedelta for single day
ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1)
#: timedelta for a single year (non-leap year)
Expand Down
65 changes: 46 additions & 19 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import Dict, Optional, Union

from undate.converters.base import BaseDateConverter
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta
from undate.date import ONE_DAY, Date, DatePrecision, Timedelta, UnDelta


class Calendar(StrEnum):
Expand Down Expand Up @@ -439,13 +439,14 @@ def _get_date_part(self, part: str) -> Optional[str]:
value = self.initial_values.get(part)
return str(value) if value else None

def duration(self) -> Timedelta:
def duration(self) -> Timedelta | UnDelta:
"""What is the duration of this date?
Calculate based on earliest and latest date within range,
taking into account the precision of the date even if not all
parts of the date are known. Note that durations are inclusive
(i.e., a closed interval) and include both the earliest and latest
date rather than the difference between them."""
date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when
possible, and an :class:`undate.date.UnDelta` when the duration is uncertain."""

# if precision is a single day, duration is one day
# no matter when it is or what else is known
Expand All @@ -456,25 +457,51 @@ def duration(self) -> Timedelta:
# calculate month duration within a single year (not min/max)
if self.precision == DatePrecision.MONTH:
latest = self.latest
# if year is unknown, calculate month duration in
# leap year and non-leap year, in case length varies
if not self.known_year:
# if year is unknown, calculate month duration in
# a single year
latest = Date(self.earliest.year, self.latest.month, self.latest.day)

# latest = datetime.date(
# self.earliest.year, self.latest.month, self.latest.day
# )
delta = latest - self.earliest + ONE_DAY
# month duration can't ever be more than 31 days
# (could we ever know if it's smaller?)

# if granularity == month but not known month, duration = 31
if delta.astype(int) > 31:
return ONE_MONTH_MAX
return delta
# TODO: should leap-year specific logic shift to the calendars,
# since it works differently depending on the calendar?
possible_years = [
self.calendar_converter.LEAP_YEAR,
self.calendar_converter.NON_LEAP_YEAR,
]
# TODO: handle partially known years like 191X,
# switch to representative years (depends on calendar)
# (to be implemented as part of ambiguous year duration)
else:
# otherwise, get possible durations for all possible months
# for a known year
possible_years = [self.earliest.year]

# for every possible month and year, get max days for that month,
possible_max_days = set()
# appease mypy, which says month values could be None here;
# Date object allows optional month, but earliest/latest initialization
# should always be day-precision dates
if self.earliest.month is not None and self.latest.month is not None:
for possible_month in range(self.earliest.month, self.latest.month + 1):
for year in possible_years:
possible_max_days.add(
self.calendar_converter.max_day(year, possible_month)
)

# if there is more than one possible value for month length,
# whether due to leap year / non-leap year or ambiguous month,
# return an uncertain delta
if len(possible_max_days) > 1:
return UnDelta(*possible_max_days)

# otherwise, calculate timedelta normally based on maximum day
max_day = list(possible_max_days)[0]
latest = Date(self.earliest.year, self.earliest.month, max_day)

return latest - self.earliest + ONE_DAY

# TODO: handle year precision + unknown/partially known year
# (will be handled in separate branch)

# otherwise, calculate based on earliest/latest range

# subtract earliest from latest and add a day to count start day
return self.latest - self.earliest + ONE_DAY

Expand Down
Loading