Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

preliminary str parse and format methods #90

Merged
merged 5 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/undate/dateformat/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import importlib
import logging
import pkgutil
from typing import Dict, Type
from functools import lru_cache # functools.cache not available until 3.9
from typing import Dict, Type, Union
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove unused import: Union.

The import of Union from the typing module is flagged as unused by the static analysis tool. If it's not needed, consider removing it to keep the code clean and efficient.

-from typing import Dict, Type, Union
+from typing import Dict, Type
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from typing import Dict, Type, Union
from typing import Dict, Type
Tools
Ruff

18-18: typing.Union imported but unused

Remove unused import: typing.Union

(F401)

from functools import lru_cache


logger = logging.getLogger(__name__)
Expand Down
12 changes: 11 additions & 1 deletion src/undate/dateformat/edtf/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,17 @@ def _convert_missing_digits(
return value.replace(old_missing_digit, EDTF_UNSPECIFIED_DIGIT)
return None

def to_string(self, undate: Undate) -> str:
def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
if isinstance(undate, Undate):
return self._undate_to_string(undate)
elif isinstance(undate, UndateInterval):
# NOTE: what is the difference between an open interval and unknown start/end?
# spec distinguishes between these, open is ".." but unknown is ""
start = self._undate_to_string(undate.earliest) if undate.earliest else ".."
end = self._undate_to_string(undate.latest) if undate.latest else ".."
return f"{start}/{end}"

def _undate_to_string(self, undate: Undate) -> str:
# in theory it's possible to use the parser and reconstruct using a tree,
# but that seems much more complicated and would be harder to read
parts = []
Expand Down
15 changes: 14 additions & 1 deletion src/undate/dateformat/iso8601.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,25 @@ def _parse_single_date(self, value: str) -> Undate:
# Argument of type "int | None" cannot be assigned to parameter "formatter" of type "BaseDateFormat | None" in function "__init__"
return Undate(*date_parts) # type: ignore

def to_string(self, undate: Undate) -> str:
def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
if isinstance(undate, Undate):
return self._undate_to_string(undate)
elif isinstance(undate, UndateInterval):
# strictly speaking I don't think ISO8601 supports open-ended ranges
# should we add an exception for dates that can't be represented by a particular format?
# (we'll likely need it for uncertain/approx, which ISO8601 doesn't handle')
start = self._undate_to_string(undate.earliest) if undate.earliest else ""
end = self._undate_to_string(undate.latest) if undate.latest else ""
return f"{start}/{end}"

def _undate_to_string(self, undate: Undate) -> str:
# serialize to iso format for simplicity, for now
date_parts: List[Union[str, None]] = []
# for each part of the date that is known, generate the string format
# then combine
# TODO: should error if we have year and day but no month
# TODO: may want to refactor and take advantage of the year/month/day properties
# added for use in EDTF formatter code
for date_portion, iso_format in self.iso_format.items():
if undate.is_known(date_portion):
# NOTE: datetime strftime for %Y for 3-digit year
Expand Down
30 changes: 30 additions & 0 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,27 @@ def __repr__(self) -> str:
return "<Undate '%s' (%s)>" % (self.label, self)
return "<Undate %s>" % self

@classmethod
def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]:
"""parse a string to an undate or undate interval using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
# NOTE: some parsers may return intervals; is that ok here?
return formatter_cls().parse(date_string)

raise ValueError(f"Unsupported format '{format}'")
Comment on lines +172 to +181
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Review the implementation of the parse method in the Undate class.

The parse method correctly retrieves a formatter class based on the provided format and uses it to parse the date string. However, the method assumes that all formatters will correctly handle the input without further validation or error handling for malformed inputs. Consider adding input validation or more detailed error handling to ensure robustness.

Additionally, the comment on line 172 notes that some parsers may return intervals. It's important to clarify whether this behavior is acceptable or if additional handling is needed when an interval is not expected.

rlskoeser marked this conversation as resolved.
Show resolved Hide resolved

def format(self, format) -> str:
"""format this undate as a string using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
# NOTE: some parsers may return intervals; is that ok here?
return formatter_cls().to_string(self)

raise ValueError(f"Unsupported format '{format}'")
rlskoeser marked this conversation as resolved.
Show resolved Hide resolved

def _comparison_type(self, other: object) -> "Undate":
"""Common logic for type handling in comparison methods.
Converts to Undate object if possible, otherwise raises
Expand Down Expand Up @@ -424,6 +445,15 @@ def __str__(self) -> str:
# using EDTF syntax for open ranges
return "%s/%s" % (self.earliest or "..", self.latest or "")

def format(self, format) -> str:
"""format this undate interval as a string using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
return formatter_cls().to_string(self)

raise ValueError(f"Unsupported format '{format}'")

def __repr__(self) -> str:
if self.label:
return "<UndateInterval '%s' (%s)>" % (self.label, self)
Expand Down
54 changes: 54 additions & 0 deletions tests/test_undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,46 @@ def test_is_known_day(self):
assert Undate(month=1, day="X5").is_known("day") is False
assert Undate(month=1, day="XX").is_known("day") is False

def test_parse(self):
assert Undate.parse("1984", "EDTF") == Undate(1984)
assert Undate.parse("1984-04", "EDTF") == Undate(1984, 4)
assert Undate.parse("1984-04", "EDTF") == Undate(1984, 4)
assert Undate.parse("2000/2001", "EDTF") == UndateInterval(
Undate(2000), Undate(2001)
)

assert Undate.parse("1984", "ISO8601") == Undate(1984)
assert Undate.parse("1984-04", "ISO8601") == Undate(1984, 4)
assert Undate.parse("--12-31", "ISO8601") == Undate(month=12, day=31)

# unsupported format
with pytest.raises(ValueError, match="Unsupported format"):
Undate.parse("1984", "foobar")
with pytest.raises(ValueError, match="Unsupported format"):
Undate.parse("1984", "%Y-%m")

def test_format(self):
# EDTF format
assert Undate(1984).format("EDTF") == "1984"
assert Undate(1984, 4).format("EDTF") == "1984-04"
assert Undate(1984, 4, 15).format("EDTF") == "1984-04-15"
assert Undate("19XX").format("EDTF") == "19XX"
assert Undate(1984, "XX").format("EDTF") == "1984-XX"
assert Undate(1984, 4, "XX").format("EDTF") == "1984-04-XX"
assert Undate(month=12, day=31).format("EDTF") == "XXXX-12-31"

# ISO8601 format
assert Undate(1984).format("ISO8601") == "1984"
assert Undate(1984, 4).format("ISO8601") == "1984-04"
assert Undate(1984, 4, 15).format("ISO8601") == "1984-04-15"
assert Undate(month=12, day=31).format("ISO8601") == "--12-31"

# unsupported format
with pytest.raises(ValueError, match="Unsupported format"):
Undate(1984).format("foobar")
with pytest.raises(ValueError, match="Unsupported format"):
Undate(1984).format("%Y-%m")


class TestUndateInterval:
def test_str(self):
Expand All @@ -392,6 +432,20 @@ def test_str(self):
== "2022-11-01/2023-11-07"
)

def test_format(self):
interval = UndateInterval(Undate(2000), Undate(2001))
assert interval.format("EDTF") == "2000/2001"
assert interval.format("ISO8601") == "2000/2001"

# Open-ended intervals
open_start = UndateInterval(latest=Undate(2000))
assert open_start.format("EDTF") == "../2000"
assert open_start.format("ISO8601") == "/2000"

open_end = UndateInterval(earliest=Undate(2000))
assert open_end.format("EDTF") == "2000/.."
assert open_end.format("ISO8601") == "2000/"

def test_repr(self):
assert (
repr(UndateInterval(Undate(2022), Undate(2023)))
Expand Down
Loading