From 640705c9597657c4a497870fafaf5b89641994aa Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 29 Aug 2024 18:17:04 -0400 Subject: [PATCH 1/4] Require formatter to_string to support undate and undate interval --- src/undate/dateformat/base.py | 4 ++-- src/undate/dateformat/edtf/formatter.py | 12 +++++++++++- src/undate/dateformat/iso8601.py | 15 ++++++++++++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/undate/dateformat/base.py b/src/undate/dateformat/base.py index f4435f4..7349f7f 100644 --- a/src/undate/dateformat/base.py +++ b/src/undate/dateformat/base.py @@ -15,8 +15,8 @@ import importlib import logging import pkgutil -from typing import Dict, Type -from functools import lru_cache # functools.cache not available until 3.9 +from typing import Dict, Type, Union +from functools import lru_cache logger = logging.getLogger(__name__) diff --git a/src/undate/dateformat/edtf/formatter.py b/src/undate/dateformat/edtf/formatter.py index dee193f..e9a88f6 100644 --- a/src/undate/dateformat/edtf/formatter.py +++ b/src/undate/dateformat/edtf/formatter.py @@ -35,7 +35,17 @@ def _convert_missing_digits( return value.replace(old_missing_digit, EDTF_UNSPECIFIED_DIGIT) return None - def to_string(self, undate: Undate) -> str: + def to_string(self, undate: Union[Undate, UndateInterval]) -> str: + if isinstance(undate, Undate): + return self._undate_to_string(undate) + elif isinstance(undate, UndateInterval): + # NOTE: what is the difference between an open interval and unknown start/end? + # spec distinguishes between these, open is ".." but unknown is "" + start = self._undate_to_string(undate.earliest) if undate.earliest else ".." + end = self._undate_to_string(undate.latest) if undate.latest else ".." + return f"{start}/{end}" + + def _undate_to_string(self, undate: Undate) -> str: # in theory it's possible to use the parser and reconstruct using a tree, # but that seems much more complicated and would be harder to read parts = [] diff --git a/src/undate/dateformat/iso8601.py b/src/undate/dateformat/iso8601.py index 0b3a3b5..ff8cb7e 100644 --- a/src/undate/dateformat/iso8601.py +++ b/src/undate/dateformat/iso8601.py @@ -49,12 +49,25 @@ def _parse_single_date(self, value: str) -> Undate: # Argument of type "int | None" cannot be assigned to parameter "formatter" of type "BaseDateFormat | None" in function "__init__" return Undate(*date_parts) # type: ignore - def to_string(self, undate: Undate) -> str: + def to_string(self, undate: Union[Undate, UndateInterval]) -> str: + if isinstance(undate, Undate): + return self._undate_to_string(undate) + elif isinstance(undate, UndateInterval): + # strictly speaking I don't think ISO8601 supports open-ended ranges + # should we add an exception for dates that can't be represented by a particular format? + # (we'll likely need it for uncertain/approx, which ISO8601 doesn't handle') + start = self._undate_to_string(undate.earliest) if undate.earliest else "" + end = self._undate_to_string(undate.latest) if undate.latest else "" + return f"{start}/{end}" + + def _undate_to_string(self, undate: Undate) -> str: # serialize to iso format for simplicity, for now date_parts: List[Union[str, None]] = [] # for each part of the date that is known, generate the string format # then combine # TODO: should error if we have year and day but no month + # TODO: may want to refactor and take advantage of the year/month/day properties + # added for use in EDTF formatter code for date_portion, iso_format in self.iso_format.items(): if undate.is_known(date_portion): # NOTE: datetime strftime for %Y for 3-digit year From a8802d6fce30adf3cac8869028338850049a8b30 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 29 Aug 2024 18:18:02 -0400 Subject: [PATCH 2/4] Add string format and parse methods to undate and undate interval --- src/undate/undate.py | 30 ++++++++++++++++++++++++ tests/test_undate.py | 54 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/src/undate/undate.py b/src/undate/undate.py index 86b3f6b..3a506d7 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -163,6 +163,27 @@ def __repr__(self) -> str: return "" % (self.label, self) return "" % self + @classmethod + def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]: + """parse a string to an undate or undate interval using the specified format; + for now, only supports named formatters""" + formatter_cls = BaseDateFormat.available_formatters().get(format, None) + if formatter_cls: + # NOTE: some parsers may return intervals; is that ok here? + return formatter_cls().parse(date_string) + + raise ValueError(f"Unsupported format '{format}'") + + def format(self, format) -> str: + """format this undate as a string using the specified format; + for now, only supports named formatters""" + formatter_cls = BaseDateFormat.available_formatters().get(format, None) + if formatter_cls: + # NOTE: some parsers may return intervals; is that ok here? + return formatter_cls().to_string(self) + + raise ValueError(f"Unsupported format '{format}'") + def _comparison_type(self, other: object) -> "Undate": """Common logic for type handling in comparison methods. Converts to Undate object if possible, otherwise raises @@ -424,6 +445,15 @@ def __str__(self) -> str: # using EDTF syntax for open ranges return "%s/%s" % (self.earliest or "..", self.latest or "") + def format(self, format) -> str: + """format this undate interval as a string using the specified format; + for now, only supports named formatters""" + formatter_cls = BaseDateFormat.available_formatters().get(format, None) + if formatter_cls: + return formatter_cls().to_string(self) + + raise ValueError(f"Unsupported format '{format}'") + def __repr__(self) -> str: if self.label: return "" % (self.label, self) diff --git a/tests/test_undate.py b/tests/test_undate.py index 08d4104..5a18131 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -379,6 +379,46 @@ def test_is_known_day(self): assert Undate(month=1, day="X5").is_known("day") is False assert Undate(month=1, day="XX").is_known("day") is False + def test_parse(self): + assert Undate.parse("1984", "EDTF") == Undate(1984) + assert Undate.parse("1984-04", "EDTF") == Undate(1984, 4) + assert Undate.parse("1984-04", "EDTF") == Undate(1984, 4) + assert Undate.parse("2000/2001", "EDTF") == UndateInterval( + Undate(2000), Undate(2001) + ) + + assert Undate.parse("1984", "ISO8601") == Undate(1984) + assert Undate.parse("1984-04", "ISO8601") == Undate(1984, 4) + assert Undate.parse("--12-31", "ISO8601") == Undate(month=12, day=31) + + # unsupported format + with pytest.raises(ValueError, match="Unsupported format"): + Undate.parse("1984", "foobar") + with pytest.raises(ValueError, match="Unsupported format"): + Undate.parse("1984", "%Y-%m") + + def test_format(self): + # EDTF format + assert Undate(1984).format("EDTF") == "1984" + assert Undate(1984, 4).format("EDTF") == "1984-04" + assert Undate(1984, 4, 15).format("EDTF") == "1984-04-15" + assert Undate("19XX").format("EDTF") == "19XX" + assert Undate(1984, "XX").format("EDTF") == "1984-XX" + assert Undate(1984, 4, "XX").format("EDTF") == "1984-04-XX" + assert Undate(month=12, day=31).format("EDTF") == "XXXX-12-31" + + # ISO8601 format + assert Undate(1984).format("ISO8601") == "1984" + assert Undate(1984, 4).format("ISO8601") == "1984-04" + assert Undate(1984, 4, 15).format("ISO8601") == "1984-04-15" + assert Undate(month=12, day=31).format("ISO8601") == "--12-31" + + # unsupported format + with pytest.raises(ValueError, match="Unsupported format"): + Undate(1984).format("foobar") + with pytest.raises(ValueError, match="Unsupported format"): + Undate(1984).format("%Y-%m") + class TestUndateInterval: def test_str(self): @@ -392,6 +432,20 @@ def test_str(self): == "2022-11-01/2023-11-07" ) + def test_format(self): + interval = UndateInterval(Undate(2000), Undate(2001)) + assert interval.format("EDTF") == "2000/2001" + assert interval.format("ISO8601") == "2000/2001" + + # Open-ended intervals + open_start = UndateInterval(latest=Undate(2000)) + assert open_start.format("EDTF") == "../2000" + assert open_start.format("ISO8601") == "/2000" + + open_end = UndateInterval(earliest=Undate(2000)) + assert open_end.format("EDTF") == "2000/.." + assert open_end.format("ISO8601") == "2000/" + def test_repr(self): assert ( repr(UndateInterval(Undate(2022), Undate(2023))) From 25ba4add0dd0dbccb3d5c2c1e7e54e5280aa6344 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Nov 2024 17:37:28 -0500 Subject: [PATCH 3/4] Cleanup unused assertion flagged by kypso-bot --- tests/test_dateformat/test_edtf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_dateformat/test_edtf.py b/tests/test_dateformat/test_edtf.py index 13d3e5a..32ec014 100644 --- a/tests/test_dateformat/test_edtf.py +++ b/tests/test_dateformat/test_edtf.py @@ -1,5 +1,4 @@ import pytest - from undate.dateformat.edtf import EDTFDateFormat from undate.undate import Undate, UndateInterval @@ -25,7 +24,7 @@ def test_parse_singledate_unequal(self): def test_parse_invalid(self): with pytest.raises(ValueError): - assert EDTFDateFormat().parse("1991-5") == Undate(1991, 5) + EDTFDateFormat().parse("1991-5") def test_parse_range(self): assert EDTFDateFormat().parse("1800/1900") == UndateInterval( From 540bd1085224ff729163ca0f4bcc716edcc6b1f7 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Nov 2024 17:57:41 -0500 Subject: [PATCH 4/4] Add tests for lines missing coverage --- src/undate/undate.py | 7 +++++-- tests/test_undate.py | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index bef5540..c7335c9 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -210,12 +210,14 @@ def _comparison_type(self, other: object) -> "Undate": def __eq__(self, other: object) -> bool: # Note: assumes label differences don't matter for comparing dates - other = self._comparison_type(other) - # only a day-precision fully known undate can be equal to a datetime.date if isinstance(other, datetime.date): return self.earliest == other and self.latest == other + other = self._comparison_type(other) + if other is NotImplemented: + return NotImplemented + # check for apparent equality looks_equal = ( self.earliest == other.earliest @@ -346,6 +348,7 @@ def day(self) -> Optional[str]: if day: return f"{day:>02}" # if value is unset but date precision is day, return unknown day + # (may not be possible to have day precision with day part set in normal use) elif self.precision == DatePrecision.DAY: return self.MISSING_DIGIT * 2 return None diff --git a/tests/test_undate.py b/tests/test_undate.py index 100d96a..65360d3 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -2,7 +2,7 @@ from datetime import date import pytest -from undate.date import Timedelta +from undate.date import DatePrecision, Timedelta from undate.undate import Undate, UndateInterval @@ -143,6 +143,11 @@ def test_year_property(self): # unset year assert Undate(month=12, day=31).year == "XXXX" + # force method to hit conditional for date precision + some_century = Undate() + some_century.precision = DatePrecision.CENTURY + assert some_century.year is None + def test_month_property(self): # one, two digit month assert Undate(2023, 1).month == "01" @@ -172,12 +177,20 @@ def test_day_property(self): # Day without year or month assert Undate(day=15).day == "15" + # force str based on date precision without day part set + someday = Undate(2023) + someday.precision = DatePrecision.DAY + assert someday.day == "XX" + def test_eq(self): assert Undate(2022) == Undate(2022) assert Undate(2022, 10) == Undate(2022, 10) assert Undate(2022, 10, 1) == Undate(2022, 10, 1) assert Undate(month=2, day=7) == Undate(month=2, day=7) + # something we can't convert for comparison should return NotImplemented + assert Undate(2022).__eq__("not a date") == NotImplemented + def test_eq_datetime_date(self): # support comparisons with datetime objects for full day-precision assert Undate(2022, 10, 1) == date(2022, 10, 1) @@ -535,3 +548,7 @@ def test_duration(self): # duration is not supported for open-ended intervals assert UndateInterval(Undate(2000), None).duration() == NotImplemented + + # one year set and the other not currently raises not implemented error + with pytest.raises(NotImplementedError): + UndateInterval(Undate(2000), Undate()).duration()