Skip to content

Use uncertainties.ufloat for uncertain durations #116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ readme = "README.md"
license = { text = "Apache-2" }
requires-python = ">= 3.9"
dynamic = ["version"]
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"]
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'", "uncertainties"]
authors = [
{ name = "Rebecca Sutton Koeser" },
{ name = "Cole Crawford" },
Expand Down
5 changes: 4 additions & 1 deletion src/undate/converters/calendars/gregorian.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter):
#: calendar
calendar_name: str = "Gregorian"

#: known non-leap year
#: arbitrary known non-leap year
NON_LEAP_YEAR: int = 2022
#: arbitrary known leap year
LEAP_YEAR: int = 2024

def min_month(self) -> int:
"""First month for the Gregorian calendar."""
Expand All @@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int:
_, max_day = monthrange(year, month)
else:
# if year and month are unknown, return maximum possible
# TODO: should this return a ufloat?
max_day = 31

return max_day
Expand Down
23 changes: 23 additions & 0 deletions src/undate/date.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from enum import IntEnum
from dataclasses import dataclass

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
from typing import Optional, Union

import numpy as np
from uncertainties import ufloat # type: ignore


class Timedelta(np.ndarray):
Expand All @@ -29,6 +31,27 @@ def days(self) -> int:
return int(self.astype("datetime64[D]").astype("int"))


@dataclass
class Udelta:
"""An uncertain timedelta, for durations where the number of days is uncertain.
Initialize with a list of possible day durations as integers, which are used
to calculate a value for duration in :attr:`days` as an
instance of :class:`uncertainties.ufloat`.
"""

# NOTE: we will probably need other timedelta-like logic here besides days...

#: number of days, as an instance of :class:`uncertainties.ufloat`
days: ufloat

def __init__(self, *days: int):
min_days = min(days)
max_days = max(days)
half_diff = (max_days - min_days) / 2
midpoint = min_days + half_diff
self.days = ufloat(midpoint, half_diff)


#: timedelta for single day
ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1)
#: timedelta for a single year (non-leap year)
Expand Down
5 changes: 5 additions & 0 deletions src/undate/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ def duration(self) -> Timedelta:
elif not self.latest.known_year and not self.earliest.known_year:
# under what circumstances can we assume that if both years
# are unknown the dates are in the same year or sequential?

# TODO: for Gregorian calendars, if this interval spans end
# of February we should return a udelta object since the interval
# may or may not include February 29

duration = self.latest.earliest - self.earliest.earliest
# if we get a negative, we've wrapped from end of one year
# to the beginning of the next;
Expand Down
47 changes: 38 additions & 9 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from typing import Dict, Optional, Union

from undate.converters.base import BaseDateConverter
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta, Udelta


class Calendar(StrEnum):
Expand Down Expand Up @@ -420,13 +420,14 @@ def _get_date_part(self, part: str) -> Optional[str]:
value = self.initial_values.get(part)
return str(value) if value else None

def duration(self) -> Timedelta:
def duration(self) -> Timedelta | Udelta:
"""What is the duration of this date?
Calculate based on earliest and latest date within range,
taking into account the precision of the date even if not all
parts of the date are known. Note that durations are inclusive
(i.e., a closed interval) and include both the earliest and latest
date rather than the difference between them."""
date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when
possible, and an :class:`undate.date.Udelta` when the duration is uncertain."""

# if precision is a single day, duration is one day
# no matter when it is or what else is known
Expand All @@ -437,20 +438,48 @@ def duration(self) -> Timedelta:
# calculate month duration within a single year (not min/max)
if self.precision == DatePrecision.MONTH:
latest = self.latest
# if year is unknown, calculate month duration in
# leap year and non-leap year, in case length varies
if not self.known_year:
# if year is unknown, calculate month duration in
# a single year
latest = Date(self.earliest.year, self.latest.month, self.latest.day)
# TODO: should leap-year specific logic shift to the calendars,
# since it works differently depending on the calendar?
possible_years = [
self.calendar_converter.LEAP_YEAR,
self.calendar_converter.NON_LEAP_YEAR,
]
# TODO: what about partially known years like 191X ?
else:
# otherwise, get possible durations for all possible months
# for a known year
possible_years = [self.earliest.year]

# for every possible month and year, get max days for that month,
possible_max_days = set()
# appease mypy, which says month values could be None here
if self.earliest.month is not None and self.latest.month is not None:
for possible_month in range(self.earliest.month, self.latest.month + 1):
for year in possible_years:
possible_max_days.add(
self.calendar_converter.max_day(year, possible_month)
)

# if there is more than one possible value for month length,
# whether due to leap year / non-leap year or ambiguous month,
# return a uncertain delta
if len(possible_max_days) > 1:
return Udelta(*possible_max_days)

# otherwise, calculate timedelta normally
max_day = list(possible_max_days)[0]
latest = Date(self.earliest.year, self.earliest.month, max_day)

# latest = datetime.date(
# self.earliest.year, self.latest.month, self.latest.day
# )
delta = latest - self.earliest + ONE_DAY
# month duration can't ever be more than 31 days
# (could we ever know if it's smaller?)

# if granularity == month but not known month, duration = 31
if delta.astype(int) > 31:
# FIXME: this depends on calendar!
return ONE_MONTH_MAX
return delta

Expand Down
16 changes: 15 additions & 1 deletion tests/test_date.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np
from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta
from uncertainties import ufloat

from undate.date import ONE_YEAR, Date, DatePrecision, Timedelta, Udelta


class TestDatePrecision:
Expand Down Expand Up @@ -77,3 +79,15 @@ def test_init_from_np_timedelta64(self):

def test_days(self):
assert Timedelta(10).days == 10


class TestUdelta:
def test_init(self):
# february in an unknown year in Gregorian calendar could be 28 or 29 days
february_days = ufloat(28.5, 0.5) # 28 or 29
udelt = Udelta(28, 29)
# two ufloat values don't actually compare as equal, due to the variance
assert udelt != february_days
# so inspect the expected values
assert udelt.days.nominal_value == 28.5
assert udelt.days.std_dev == 0.5
26 changes: 21 additions & 5 deletions tests/test_undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from undate import Undate, UndateInterval, Calendar
from undate.converters.base import BaseCalendarConverter
from undate.date import DatePrecision, Timedelta
from undate.date import DatePrecision, Timedelta, Udelta


class TestUndate:
Expand Down Expand Up @@ -383,10 +383,26 @@ def test_partiallyknown_duration(self):
# month in unknown year
assert Undate(month=6).duration().days == 30
# partially known month
assert Undate(year=1900, month="1X").duration().days == 31
# what about february?
# could vary with leap years, but assume non-leapyear
assert Undate(month=2).duration().days == 28
# 1X = October, November, or December = 30 or 31 days
# should return a Udelta object
unknown_month_duration = Undate(year=1900, month="1X").duration()
assert isinstance(unknown_month_duration, Udelta)
assert unknown_month_duration.days.nominal_value == 30.5
assert unknown_month_duration.days.std_dev == 0.5

# completely unknown month should also return a Udelta object
unknown_month_duration = Undate(year=1900, month="XX").duration()
assert isinstance(unknown_month_duration, Udelta)
# possible range is 28 to 31 days
assert unknown_month_duration.days.nominal_value == 29.5
assert unknown_month_duration.days.std_dev == 1.5

# the number of days in feburary of an unknow year is uncertain, since
# it could vary with leap years; either 28 or 29 days
feb_duration = Undate(month=2).duration()
assert isinstance(feb_duration, Udelta)
assert feb_duration.days.nominal_value == 28.5
assert feb_duration.days.std_dev == 0.5

def test_known_year(self):
assert Undate(2022).known_year is True
Expand Down