Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

preliminary date comparison methods #65

Merged
merged 6 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 96 additions & 15 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from calendar import monthrange
from enum import Enum, auto
from enum import IntEnum
import re

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
Expand All @@ -15,16 +15,22 @@
ONE_DAY = datetime.timedelta(days=1)


class DatePrecision(Enum):
class DatePrecision(IntEnum):
"""date precision, to indicate date precision independent from how much
of the date is known."""

#: year
YEAR = auto()
#: month
MONTH = auto()
# numbers should be set to allow logical greater than / less than
# comparison, e.g. year precision > month

#: day
DAY = auto()
DAY = 1
#: month
MONTH = 2
#: year
YEAR = 3

def __str__(self):
return f"{self.name}"


class Undate:
Expand Down Expand Up @@ -173,18 +179,93 @@ def __repr__(self) -> str:
return "<Undate '%s' (%s)>" % (self.label, self)
return "<Undate %s>" % self

def __eq__(self, other: "Undate") -> bool:
# question: should label be taken into account when checking equality?
# for now, assuming label differences don't matter for comparing dates
return (
def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
# Note: assumes label differences don't matter for comparing dates

# only a day-precision fully known undate can be equal to a datetime.date
if isinstance(other, datetime.date):
return self.earliest == other and self.latest == other

# check for apparent equality
looks_equal = (
self.earliest == other.earliest
and self.latest == other.latest
# NOTE: assumes that partially known values can only be written
# in one format (i.e. X for missing digits).
# If we support other formats, will need to normalize to common
# internal format for comparison
and self.initial_values == other.initial_values
)
# if everything looks the same, check for any unknowns in initial values
# the same unknown date should NOT be considered equal

# NOTE: assumes that partially known values can only be written
# in one format (i.e. X for missing digits).
# If we support other formats, will need to normalize to common
# internal format for comparison
if looks_equal and any("X" in str(val) for val in self.initial_values.values()):
return False
return looks_equal

def __lt__(self, other: Union["Undate", datetime.date]) -> bool:
# support datetime.date by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)

# if this date ends before the other date starts,
# return true (this date is earlier, so it is less)
if self.latest < other.earliest:
return True

# if the other one ends before this one starts,
# return false (this date is later, so it is not less)
if other.latest < self.earliest:
return False

# if it does not, check if one is included within the other
# (e.g., single date within the same year)
# comparison for those cases is not currently supported
elif other in self or self in other:
raise NotImplementedError(
"Can't compare when one date falls within the other"
)
# NOTE: unsupported comparisons are supposed to return NotImplemented
# However, doing that in this case results in a confusing TypeError!
# TypeError: '<' not supported between instances of 'Undate' and 'Undate'
# How to handle when the comparison is ambiguous / indeterminate?
# we may need a tribool / ternary type (true, false, unknown),
# but not sure what python builtin methods will do with it (unknown = false?)

# for any other case (i.e., self == other), return false
return False

def __gt__(self, other: Union["Undate", datetime.date]) -> bool:
# define gt ourselves so we can support > comparison with datetime.date,
# but rely on existing less than implementation.
# strictly greater than must rule out equals
return not (self < other or self == other)

def __le__(self, other: Union["Undate", datetime.date]) -> bool:
return self == other or self < other

def __contains__(self, other: Union["Undate", datetime.date]) -> bool:
# if the two dates are strictly equal, don't consider
# either one as containing the other

# support comparison with datetime by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)

if self == other:
return False

return (
self.earliest <= other.earliest
and self.latest >= other.latest
# is precision sufficient for comparing partially known dates?
and self.precision > other.precision
)

@staticmethod
def from_datetime_date(dt_date):
"""Initialize an :class:`Undate` object from a :class:`datetime.date`"""
return Undate(dt_date.year, dt_date.month, dt_date.day)

@property
def known_year(self) -> bool:
Expand Down
7 changes: 5 additions & 2 deletions tests/test_dateformat/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ def test_parse_to_string(self):
BaseDateFormat().to_string(1991)


@pytest.mark.first
def test_import_formatters_import_only_once(caplog):
# run first so we can confirm it runs once
# clear the cache, since any instantiation of an Undate
# object anywhere in the test suite will populate it
BaseDateFormat.import_formatters.cache_clear()

# run first, and confirm it runs and loads formatters
with caplog.at_level(logging.DEBUG):
import_count = BaseDateFormat.import_formatters()
# should import at least one thing (iso8601)
Expand Down
158 changes: 156 additions & 2 deletions tests/test_undate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from datetime import timedelta
from datetime import timedelta, date

import pytest

from undate.undate import Undate, UndateInterval
from undate.undate import Undate, UndateInterval, DatePrecision


class TestDatePrecision:
def test_str(self):
assert str(DatePrecision.YEAR) == "YEAR"


class TestUndate:
Expand Down Expand Up @@ -121,12 +126,27 @@ def test_invalid_date(self):
with pytest.raises(ValueError):
Undate(1990, 22)

def test_from_datetime_date(self):
undate_from_date = Undate.from_datetime_date(date(2001, 3, 5))
assert isinstance(undate_from_date, Undate)
assert undate_from_date == Undate(2001, 3, 5)

def test_eq(self):
assert Undate(2022) == Undate(2022)
assert Undate(2022, 10) == Undate(2022, 10)
assert Undate(2022, 10, 1) == Undate(2022, 10, 1)
assert Undate(month=2, day=7) == Undate(month=2, day=7)

def test_eq_datetime_date(self):
# support comparisons with datetime objects for full day-precision
assert Undate(2022, 10, 1) == date(2022, 10, 1)
assert Undate(2022, 10, 1) != date(2022, 10, 2)
assert Undate(1980, 10, 1) != date(2022, 10, 1)

# other date precisions are not equal
assert Undate(2022) != date(2022, 10, 1)
assert Undate(2022, 10) != date(2022, 10, 1)

def test_not_eq(self):
assert Undate(2022) != Undate(2023)
assert Undate(2022, 10) != Undate(2022, 11)
Expand All @@ -135,6 +155,140 @@ def test_not_eq(self):
assert Undate(2022) != Undate(2022, 10)
assert Undate(2022, 10) != Undate(2022, 10, 1)

# partially unknown dates should NOT be considered equal
assert Undate("19XX") != Undate("19XX")
assert Undate(1980, "XX") != Undate(1980, "XX")

testdata_lt_gt = [
# dates to test for gt/lt comparison: earlier date, later date
# - simple cases: same precision where one date is clearly earlier
(Undate(2022), Undate(2023)),
(Undate(1991, 1), Undate(1991, 5)),
(Undate(1856, 3, 3), Undate(1856, 3, 21)),
# - mixed precision where one date is clearly earlier
(Undate(1991, 1), Undate(2000)),
(Undate(1856, 3, 3), Undate(1901)),
# partially known digits where comparison is possible
(Undate("19XX"), Undate("20XX")),
(Undate(1900, "0X"), Undate(1900, "1X")),
# compare with datetime.date objects
(Undate("19XX"), date(2020, 1, 1)),
(Undate(1991, 1), date(1992, 3, 4)),
]

@pytest.mark.parametrize("earlier,later", testdata_lt_gt)
def test_lt(self, earlier, later):
assert earlier < later
assert later > earlier

testdata_lte_gte = testdata_lt_gt.copy()
# add a few exactly equal cases
testdata_lte_gte.extend(
[
(Undate(1601), Undate(1601)),
(Undate(1991, 1), Undate(1991, 1)),
(Undate(1492, 5, 3), Undate(1492, 5, 3)),
# compare with datetime.date also
(Undate(1492, 5, 3), date(1492, 5, 3)),
]
)

def test_lt_when_eq(self):
# strict less than / greater should return false when equal
assert not Undate(1900) > Undate(1900)
assert not Undate(1900) < Undate(1900)
# same for datetime.date
assert not Undate(1903, 1, 5) < date(1903, 1, 5)
assert not Undate(1903, 1, 5) > date(1903, 1, 5)

@pytest.mark.parametrize("earlier,later", testdata_lte_gte)
def test_lte(self, earlier, later):
assert earlier <= later
assert later >= earlier

def test_lt_notimplemented(self):
# how to compare mixed precision where dates overlap?
# if the second date falls *within* earliest/latest,
# then it is not clearly less; not implemented?
with pytest.raises(NotImplementedError, match="date falls within the other"):
assert Undate(2022) < Undate(2022, 5)

# same if we attempt to compare in the other direction
with pytest.raises(NotImplementedError, match="date falls within the other"):
assert Undate(2022, 5) < Undate(2022)

testdata_contains = [
# first date falls within the range of the other
# dates within range: middle, start, end, varying precision
(Undate(2022, 6), Undate(2022)),
(Undate(2022, 1, 1), Undate(2022)),
(Undate(2022, 12, 31), Undate(2022)),
(Undate(2022, 6, 15), Undate(2022, 6)),
# support contains with datetime.date
(date(2022, 6, 1), Undate(2022)),
(date(2022, 6, 1), Undate(2022, 6)),
]

@pytest.mark.parametrize("date1,date2", testdata_contains)
def test_contains(self, date1, date2):
assert date1 in date2

testdata_not_contains = [
# dates not in range
(Undate(1980), Undate(2020)),
(Undate(1980), Undate(2020, 6)),
(Undate(1980, 6), Undate(2020, 6)),
# support contains with datetime.date
(date(1980, 6, 1), Undate(2022)),
(date(3001, 6, 1), Undate(2022, 6)),
# partially known dates that are similar but same precision,
# so one does not contain the other
(Undate("199X"), Undate("19XX")),
# - specific month to unknown month
(Undate(1980, 6), Undate(1980, "XX")),
# some of these might overlap, but we don't have enough
# information to determine
# - unknown month to unknown month
(Undate(1980, "XX"), Undate(1980, "XX")),
# - partially unknown month to unknown month
(Undate(1801, "1X"), Undate(1801, "XX")),
]

@pytest.mark.parametrize("date1,date2", testdata_not_contains)
def test_not_contains(self, date1, date2):
assert date1 not in date2

def test_sorting(self):
# sorting should be possible based on gt/lt
# test simple cases for sorting
d1980 = Undate(1980)
d2002_10 = Undate(2002, 10)
d2002_12 = Undate(2002, 12)
d2012_05_01 = Undate(2012, 5, 1)

assert sorted([d2012_05_01, d2002_12, d2002_10, d1980]) == [
d1980,
d2002_10,
d2002_12,
d2012_05_01,
]

# what about semi-ambigous cases?
d1991_XX = Undate(1991, "XX")
d1992_01_XX = Undate(1992, 1, "XX")
assert sorted([d1992_01_XX, d1991_XX, d1980]) == [d1980, d1991_XX, d1992_01_XX]

# what about things we can't compare?
d1991 = Undate(1991)
d1991_02 = Undate(1991, 2)
# for now, this will raise a not implemented error
with pytest.raises(NotImplementedError):
sorted([d1991_02, d1991, d1991_XX])

# TODO: partially known year?
# someyear = Undate("1XXX")
# assert sorted([d1991, someyear]) == [someyear, d1991]

def test_duration(self):
day_duration = Undate(2022, 11, 7).duration()
assert isinstance(day_duration, timedelta)
Expand Down