Skip to content

Try portion library for uncertain durations #128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ readme = "README.md"
license = { text = "Apache-2" }
requires-python = ">= 3.10"
dynamic = ["version"]
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"]
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'", "portion"]
authors = [
{ name = "Rebecca Sutton Koeser" },
{ name = "Cole Crawford" },
Expand Down
5 changes: 4 additions & 1 deletion src/undate/converters/calendars/gregorian.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter):
#: calendar
calendar_name: str = "Gregorian"

#: known non-leap year
#: arbitrary known non-leap year
NON_LEAP_YEAR: int = 2022
#: arbitrary known leap year
LEAP_YEAR: int = 2024

def min_month(self) -> int:
"""First month for the Gregorian calendar."""
Expand All @@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int:
_, max_day = monthrange(year, month)
else:
# if year and month are unknown, return maximum possible
# TODO: should this return an IntervalRange?
max_day = 31

return max_day
Expand Down
86 changes: 86 additions & 0 deletions src/undate/date.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from enum import IntEnum
from dataclasses import dataclass

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
from typing import Optional, Union

import numpy as np
import portion


class Timedelta(np.ndarray):
Expand All @@ -29,6 +31,90 @@ def days(self) -> int:
return int(self.astype("datetime64[D]").astype("int"))


class IntegerRange(portion.AbstractDiscreteInterval):
"""Range of integer values. Implemented as a closed integer interval,
subclass of :class:`portion.AbstractDiscreteInterval` with a
step of 1.

Initialize by passing in the lower (min) and upper (max) values
included in the range.

"""

_step = 1

def __init__(self, lower: int, upper: int):
# base init method takes one or more intervals; we want a single closed interval
if not lower < upper:
raise ValueError(f"Lower value ({lower}) must be less than upper ({upper})")
super().__init__(portion.closed(lower, upper))

def __sub__(self, other):
# portion default subtraction is to remove from the set, but
# we want to decrease the values
if isinstance(other, int):
return IntegerRange(self.lower - other, self.upper - other)
elif isinstance(other, IntegerRange):
# subtract the range respective endpoints
return IntegerRange(self.lower - other.lower, self.upper - other.upper)

def __add__(self, other):
# portion default subtraction is to remove from the set, but
# we want to decrease the values
if isinstance(other, int):
return IntegerRange(self.lower + other, self.upper + other)
elif isinstance(other, IntegerRange):
# the new range is the smallest possible value to the highest
return IntegerRange(self.lower + other.lower, self.upper - other.upper)


@dataclass
class Udelta:
"""
An uncertain timedelta, for durations where the number of days is uncertain.
Initialize with a list of possible durations in days as integers, which are used
to calculate a value for duration in :attr:`days` as an
instance of :class:`IntegerRange`.
"""

# NOTE: we will probably need other timedelta-like logic here besides days...

#: number of days, as an instance of :class:`IntegerRange`
days: IntegerRange

def __init__(self, *days: int):
if len(days) < 2:
raise ValueError(
"Must specify at least two values for an uncertain duration"
)
self.days = IntegerRange(min(days), max(days))

# TODO: what does equality for an uncertain range mean?
# is an uncertain range ever equal to another uncertain range?

def __eq__(self, other: object) -> bool:
# is an uncertain duration ever *equal* another, even if the values are the same?
if other is self:
return True
return False

def __lt__(self, other: object) -> bool:
if isinstance(other, Timedelta):
return self.days < portion.singleton(other.days)
elif isinstance(other, Udelta):
return self.days < other.days

return NotImplemented

def __gt__(self, other: object) -> bool:
if isinstance(other, Timedelta):
return self.days > portion.singleton(other.days)
elif isinstance(other, Udelta):
return self.days > other.days

return NotImplemented


#: timedelta for single day
ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1)
#: timedelta for a single year (non-leap year)
Expand Down
47 changes: 38 additions & 9 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import Dict, Optional, Union

from undate.converters.base import BaseDateConverter
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta, Udelta


class Calendar(StrEnum):
Expand Down Expand Up @@ -439,13 +439,14 @@ def _get_date_part(self, part: str) -> Optional[str]:
value = self.initial_values.get(part)
return str(value) if value else None

def duration(self) -> Timedelta:
def duration(self) -> Timedelta | Udelta:
"""What is the duration of this date?
Calculate based on earliest and latest date within range,
taking into account the precision of the date even if not all
parts of the date are known. Note that durations are inclusive
(i.e., a closed interval) and include both the earliest and latest
date rather than the difference between them."""
date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when
possible, and an :class:`undate.date.Udelta` when the duration is uncertain."""

# if precision is a single day, duration is one day
# no matter when it is or what else is known
Expand All @@ -456,20 +457,48 @@ def duration(self) -> Timedelta:
# calculate month duration within a single year (not min/max)
if self.precision == DatePrecision.MONTH:
latest = self.latest
# if year is unknown, calculate month duration in
# leap year and non-leap year, in case length varies
if not self.known_year:
# if year is unknown, calculate month duration in
# a single year
latest = Date(self.earliest.year, self.latest.month, self.latest.day)
# TODO: should leap-year specific logic shift to the calendars,
# since it works differently depending on the calendar?
possible_years = [
self.calendar_converter.LEAP_YEAR,
self.calendar_converter.NON_LEAP_YEAR,
]
# TODO: what about partially known years like 191X ?
else:
# otherwise, get possible durations for all possible months
# for a known year
possible_years = [self.earliest.year]

# for every possible month and year, get max days for that month,
possible_max_days = set()
# appease mypy, which says month values could be None here
if self.earliest.month is not None and self.latest.month is not None:
for possible_month in range(self.earliest.month, self.latest.month + 1):
for year in possible_years:
possible_max_days.add(
self.calendar_converter.max_day(year, possible_month)
)

# if there is more than one possible value for month length,
# whether due to leap year / non-leap year or ambiguous month,
# return an uncertain delta
if len(possible_max_days) > 1:
return Udelta(*possible_max_days)

# otherwise, calculate timedelta normally
max_day = list(possible_max_days)[0]
latest = Date(self.earliest.year, self.earliest.month, max_day)

# latest = datetime.date(
# self.earliest.year, self.latest.month, self.latest.day
# )
delta = latest - self.earliest + ONE_DAY
# month duration can't ever be more than 31 days
# (could we ever know if it's smaller?)

# if granularity == month but not known month, duration = 31
if delta.astype(int) > 31:
# FIXME: this depends on calendar!
return ONE_MONTH_MAX
return delta

Expand Down
104 changes: 103 additions & 1 deletion tests/test_date.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
import numpy as np
from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta
import portion
import pytest

from undate.date import (
ONE_DAY,
ONE_YEAR,
ONE_MONTH_MAX,
Date,
DatePrecision,
Timedelta,
Udelta,
IntegerRange,
)


class TestDatePrecision:
Expand Down Expand Up @@ -77,3 +89,93 @@ def test_init_from_np_timedelta64(self):

def test_days(self):
assert Timedelta(10).days == 10


class TestIntegerRange:
def test_init(self):
february_days = IntegerRange(28, 29) # 28 or 29
assert february_days.lower == 28
assert february_days.upper == 29
assert february_days.left == portion.CLOSED
assert february_days.right == portion.CLOSED
assert 28 in february_days
assert 29 in february_days
assert 30 not in february_days

def test_init_validation(self):
with pytest.raises(
ValueError, match=r"Lower value \(10\) must be less than upper \(4\)"
):
IntegerRange(10, 4)

# TODO: test/implement comparisons
# NOTE: this results in a deprecation warning;
# implement conversion to singleton in the class?
# assert 30 > february_days


class TestUdelta:
def test_init(self):
# February in an unknown year in Gregorian calendar could be 28 or 29 days
february_days = IntegerRange(28, 29) # 28 or 29
udelt = Udelta(28, 29)
assert isinstance(udelt.days, IntegerRange)
assert udelt.days.lower == 28
assert udelt.days.upper == 29

# NOTE: default portion interval comparison may not be what we want here,
# since this is an unknown value within the range...
# (maybe handled in udelta class comparison methods)
assert udelt.days == february_days

# do the right thing with more than one value, out of order
unknown_month_duration = Udelta(30, 31, 28)
assert isinstance(unknown_month_duration.days, IntegerRange)
assert unknown_month_duration.days.lower == 28
assert unknown_month_duration.days.upper == 31

def test_init_validation(self):
with pytest.raises(ValueError, match="Must specify at least two values"):
Udelta(10)

def test_repr(self):
# default dataclass repr
assert repr(Udelta(28, 29)) == "Udelta(days=[28,29])"

def test_eq(self):
# uncertain deltas are not equivalent
udelt1 = Udelta(30, 31)
udelt2 = Udelta(30, 31)
# not equal to equivalent udelta range
assert udelt1 != udelt2
# equal to self
assert udelt1 is udelt1

def test_lt(self):
week_or_tenday = Udelta(7, 10)
# compare udelta with udelta
month = Udelta(28, 31)
# a week or ten-day is unambiguously less than a month
assert week_or_tenday < month
# compare udelta with Timedelta
# NOTE: currently requires this direction, until we update Timedelta
assert not week_or_tenday < ONE_DAY
# an uncertain month is unambiguously less than a year
assert month < ONE_YEAR
# an uncertain month may or may not be less than one month max
assert not month < ONE_MONTH_MAX

def test_gt(self):
week_or_tenday = Udelta(7, 10)
# compare udelta with udelta
month = Udelta(28, 31)
# a month is unambiguously longer than week or ten-day
assert month > week_or_tenday
# compare udelta with Timedelta
# NOTE: currently requires this direction, until we update Timedelta
# to support the reverse comparison
assert week_or_tenday > ONE_DAY
# an uncertain month is not greater than a year
assert not month > ONE_YEAR
# an uncertain month may or may not be greater than one month max
assert not month > ONE_MONTH_MAX
23 changes: 18 additions & 5 deletions tests/test_undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from undate import Undate, UndateInterval, Calendar
from undate.converters.base import BaseCalendarConverter
from undate.date import Date, DatePrecision, Timedelta
from undate.date import Date, DatePrecision, Timedelta, Udelta, IntegerRange


class TestUndate:
Expand Down Expand Up @@ -404,10 +404,23 @@ def test_partiallyknown_duration(self):
# month in unknown year
assert Undate(month=6).duration().days == 30
# partially known month
assert Undate(year=1900, month="1X").duration().days == 31
# what about february?
# could vary with leap years, but assume non-leapyear
assert Undate(month=2).duration().days == 28
# 1X = October, November, or December = 30 or 31 days
# should return a Udelta object
unknown_month_duration = Undate(year=1900, month="1X").duration()
assert isinstance(unknown_month_duration, Udelta)
assert unknown_month_duration.days == IntegerRange(30, 31)

# completely unknown month should also return a Udelta object
unknown_month_duration = Undate(year=1900, month="XX").duration()
assert isinstance(unknown_month_duration, Udelta)
# possible range is 28 to 31 days
assert unknown_month_duration.days == IntegerRange(28, 31)

# the number of days in February of an unknown year is uncertain, since
# it could vary with leap years; either 28 or 29 days
feb_duration = Undate(month=2).duration()
assert isinstance(feb_duration, Udelta)
assert feb_duration.days == IntegerRange(28, 29)

def test_known_year(self):
assert Undate(2022).known_year is True
Expand Down
Loading