Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

preliminary str parse and format methods #90

Merged
merged 5 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/undate/dateformat/edtf/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,17 @@ def _convert_missing_digits(
return value.replace(old_missing_digit, EDTF_UNSPECIFIED_DIGIT)
return None

def to_string(self, undate: Undate) -> str:
def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
if isinstance(undate, Undate):
return self._undate_to_string(undate)
elif isinstance(undate, UndateInterval):
# NOTE: what is the difference between an open interval and unknown start/end?
# spec distinguishes between these, open is ".." but unknown is ""
start = self._undate_to_string(undate.earliest) if undate.earliest else ".."
end = self._undate_to_string(undate.latest) if undate.latest else ".."
return f"{start}/{end}"

def _undate_to_string(self, undate: Undate) -> str:
# in theory it's possible to use the parser and reconstruct using a tree,
# but that seems much more complicated and would be harder to read
parts = []
Expand Down
15 changes: 14 additions & 1 deletion src/undate/dateformat/iso8601.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,25 @@ def _parse_single_date(self, value: str) -> Undate:
# Argument of type "int | None" cannot be assigned to parameter "formatter" of type "BaseDateFormat | None" in function "__init__"
return Undate(*date_parts) # type: ignore

def to_string(self, undate: Undate) -> str:
def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
if isinstance(undate, Undate):
return self._undate_to_string(undate)
elif isinstance(undate, UndateInterval):
# strictly speaking I don't think ISO8601 supports open-ended ranges
# should we add an exception for dates that can't be represented by a particular format?
# (we'll likely need it for uncertain/approx, which ISO8601 doesn't handle')
start = self._undate_to_string(undate.earliest) if undate.earliest else ""
end = self._undate_to_string(undate.latest) if undate.latest else ""
return f"{start}/{end}"

def _undate_to_string(self, undate: Undate) -> str:
# serialize to iso format for simplicity, for now
date_parts: List[Union[str, None]] = []
# for each part of the date that is known, generate the string format
# then combine
# TODO: should error if we have year and day but no month
# TODO: may want to refactor and take advantage of the year/month/day properties
# added for use in EDTF formatter code
for date_portion, iso_format in self.iso_format.items():
if undate.is_known(date_portion):
# NOTE: datetime strftime for %Y for 3-digit year
Expand Down
37 changes: 35 additions & 2 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,27 @@ def __repr__(self) -> str:
return "<Undate '%s' (%s)>" % (self.label, self)
return "<Undate %s>" % self

@classmethod
def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]:
"""parse a string to an undate or undate interval using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
# NOTE: some parsers may return intervals; is that ok here?
return formatter_cls().parse(date_string)

raise ValueError(f"Unsupported format '{format}'")
Comment on lines +172 to +181
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Review the implementation of the parse method in the Undate class.

The parse method correctly retrieves a formatter class based on the provided format and uses it to parse the date string. However, the method assumes that all formatters will correctly handle the input without further validation or error handling for malformed inputs. Consider adding input validation or more detailed error handling to ensure robustness.

Additionally, the comment on line 172 notes that some parsers may return intervals. It's important to clarify whether this behavior is acceptable or if additional handling is needed when an interval is not expected.

rlskoeser marked this conversation as resolved.
Show resolved Hide resolved

def format(self, format) -> str:
"""format this undate as a string using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
# NOTE: some parsers may return intervals; is that ok here?
return formatter_cls().to_string(self)

raise ValueError(f"Unsupported format '{format}'")
rlskoeser marked this conversation as resolved.
Show resolved Hide resolved

def _comparison_type(self, other: object) -> "Undate":
"""Common logic for type handling in comparison methods.
Converts to Undate object if possible, otherwise raises
Expand All @@ -189,12 +210,14 @@ def _comparison_type(self, other: object) -> "Undate":
def __eq__(self, other: object) -> bool:
# Note: assumes label differences don't matter for comparing dates

other = self._comparison_type(other)

# only a day-precision fully known undate can be equal to a datetime.date
if isinstance(other, datetime.date):
return self.earliest == other and self.latest == other

other = self._comparison_type(other)
if other is NotImplemented:
return NotImplemented
rlskoeser marked this conversation as resolved.
Show resolved Hide resolved

# check for apparent equality
looks_equal = (
self.earliest == other.earliest
Expand Down Expand Up @@ -325,6 +348,7 @@ def day(self) -> Optional[str]:
if day:
return f"{day:>02}"
# if value is unset but date precision is day, return unknown day
# (may not be possible to have day precision with day part set in normal use)
elif self.precision == DatePrecision.DAY:
return self.MISSING_DIGIT * 2
return None
Expand Down Expand Up @@ -433,6 +457,15 @@ def __str__(self) -> str:
# using EDTF syntax for open ranges
return "%s/%s" % (self.earliest or "..", self.latest or "")

def format(self, format) -> str:
"""format this undate interval as a string using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
return formatter_cls().to_string(self)

raise ValueError(f"Unsupported format '{format}'")

def __repr__(self) -> str:
if self.label:
return "<UndateInterval '%s' (%s)>" % (self.label, self)
Expand Down
3 changes: 1 addition & 2 deletions tests/test_dateformat/test_edtf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pytest

from undate.dateformat.edtf import EDTFDateFormat
from undate.undate import Undate, UndateInterval

Expand All @@ -25,7 +24,7 @@ def test_parse_singledate_unequal(self):

def test_parse_invalid(self):
with pytest.raises(ValueError):
assert EDTFDateFormat().parse("1991-5") == Undate(1991, 5)
EDTFDateFormat().parse("1991-5")
rlskoeser marked this conversation as resolved.
Show resolved Hide resolved

def test_parse_range(self):
assert EDTFDateFormat().parse("1800/1900") == UndateInterval(
Expand Down
73 changes: 72 additions & 1 deletion tests/test_undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from datetime import date

import pytest
from undate.date import Timedelta
from undate.date import DatePrecision, Timedelta
from undate.undate import Undate, UndateInterval


Expand Down Expand Up @@ -143,6 +143,11 @@ def test_year_property(self):
# unset year
assert Undate(month=12, day=31).year == "XXXX"

# force method to hit conditional for date precision
some_century = Undate()
some_century.precision = DatePrecision.CENTURY
assert some_century.year is None

def test_month_property(self):
# one, two digit month
assert Undate(2023, 1).month == "01"
Expand Down Expand Up @@ -172,12 +177,20 @@ def test_day_property(self):
# Day without year or month
assert Undate(day=15).day == "15"

# force str based on date precision without day part set
someday = Undate(2023)
someday.precision = DatePrecision.DAY
assert someday.day == "XX"

def test_eq(self):
assert Undate(2022) == Undate(2022)
assert Undate(2022, 10) == Undate(2022, 10)
assert Undate(2022, 10, 1) == Undate(2022, 10, 1)
assert Undate(month=2, day=7) == Undate(month=2, day=7)

# something we can't convert for comparison should return NotImplemented
assert Undate(2022).__eq__("not a date") == NotImplemented

def test_eq_datetime_date(self):
# support comparisons with datetime objects for full day-precision
assert Undate(2022, 10, 1) == date(2022, 10, 1)
Expand Down Expand Up @@ -384,6 +397,46 @@ def test_is_known_day(self):
assert Undate(month=1, day="X5").is_known("day") is False
assert Undate(month=1, day="XX").is_known("day") is False

def test_parse(self):
assert Undate.parse("1984", "EDTF") == Undate(1984)
assert Undate.parse("1984-04", "EDTF") == Undate(1984, 4)
assert Undate.parse("1984-04", "EDTF") == Undate(1984, 4)
assert Undate.parse("2000/2001", "EDTF") == UndateInterval(
Undate(2000), Undate(2001)
)

assert Undate.parse("1984", "ISO8601") == Undate(1984)
assert Undate.parse("1984-04", "ISO8601") == Undate(1984, 4)
assert Undate.parse("--12-31", "ISO8601") == Undate(month=12, day=31)

# unsupported format
with pytest.raises(ValueError, match="Unsupported format"):
Undate.parse("1984", "foobar")
with pytest.raises(ValueError, match="Unsupported format"):
Undate.parse("1984", "%Y-%m")

def test_format(self):
# EDTF format
assert Undate(1984).format("EDTF") == "1984"
assert Undate(1984, 4).format("EDTF") == "1984-04"
assert Undate(1984, 4, 15).format("EDTF") == "1984-04-15"
assert Undate("19XX").format("EDTF") == "19XX"
assert Undate(1984, "XX").format("EDTF") == "1984-XX"
assert Undate(1984, 4, "XX").format("EDTF") == "1984-04-XX"
assert Undate(month=12, day=31).format("EDTF") == "XXXX-12-31"

# ISO8601 format
assert Undate(1984).format("ISO8601") == "1984"
assert Undate(1984, 4).format("ISO8601") == "1984-04"
assert Undate(1984, 4, 15).format("ISO8601") == "1984-04-15"
assert Undate(month=12, day=31).format("ISO8601") == "--12-31"

# unsupported format
with pytest.raises(ValueError, match="Unsupported format"):
Undate(1984).format("foobar")
with pytest.raises(ValueError, match="Unsupported format"):
Undate(1984).format("%Y-%m")


class TestUndateInterval:
def test_str(self):
Expand All @@ -397,6 +450,20 @@ def test_str(self):
== "2022-11-01/2023-11-07"
)

def test_format(self):
interval = UndateInterval(Undate(2000), Undate(2001))
assert interval.format("EDTF") == "2000/2001"
assert interval.format("ISO8601") == "2000/2001"

# Open-ended intervals
open_start = UndateInterval(latest=Undate(2000))
assert open_start.format("EDTF") == "../2000"
assert open_start.format("ISO8601") == "/2000"

open_end = UndateInterval(earliest=Undate(2000))
assert open_end.format("EDTF") == "2000/.."
assert open_end.format("ISO8601") == "2000/"

def test_repr(self):
assert (
repr(UndateInterval(Undate(2022), Undate(2023)))
Expand Down Expand Up @@ -481,3 +548,7 @@ def test_duration(self):

# duration is not supported for open-ended intervals
assert UndateInterval(Undate(2000), None).duration() == NotImplemented

# one year set and the other not currently raises not implemented error
with pytest.raises(NotImplementedError):
UndateInterval(Undate(2000), Undate()).duration()
Loading