Skip to content

Commit 418837f

Browse files
committed
Make EDTF parser available as undate formatter; handle 5+ digit years
1 parent 329fa3d commit 418837f

File tree

5 files changed

+74
-10
lines changed

5 files changed

+74
-10
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from typing import Dict, List, Union
2+
3+
from lark.exceptions import UnexpectedCharacters
4+
5+
from undate.undate import Undate, UndateInterval
6+
from undate.dateformat.base import BaseDateFormat
7+
from undate.dateformat.edtf.parser import edtf_parser
8+
from undate.dateformat.edtf.transformer import EDTFTransformer
9+
10+
11+
class EDTFDateFormat(BaseDateFormat):
12+
name: str = "EDTF"
13+
14+
def __init__(self):
15+
self.transformer = EDTFTransformer()
16+
17+
def parse(self, value: str) -> Union[Undate, UndateInterval]:
18+
# parse the input string, then transform to undate object
19+
try:
20+
parsetree = edtf_parser.parse(value)
21+
return self.transformer.transform(parsetree)
22+
except UnexpectedCharacters as err:
23+
raise ValueError("Parsing failed due to UnexpectedCharacters: %s" % err)
24+
25+
# def to_string(self, undate: Undate) -> str:
26+
# TODO: how do we leverage the parser for this?

src/undate/dateformat/edtf/transformer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ def day_unspecified(self, items):
6363
def date_level1(self, items):
6464
return self.date(items)
6565

66-
def year_fivedigitsplus(self, token):
66+
def year_fivedigitsplus(self, items):
6767
# strip off the leading Y and convert to integer
68-
# TODO: undate is currently limited to 4-digit years
69-
# (datetime max year of 9999)
70-
return tok.update(int(token[:1]))
68+
token = items[0]
69+
year = int(token.value.lstrip("Y"))
70+
return Tree(data="year", children=[year])

src/undate/dateformat/iso8601.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from typing import Dict, List, Union
2+
13
from undate.dateformat.base import BaseDateFormat
24
from undate.undate import Undate, UndateInterval
3-
from typing import Dict, List, Union
45

56

67
class ISO8601DateFormat(BaseDateFormat):

tests/test_dateformat/edtf/test_edtf_transformer.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@
1313
("1001-03-30", Undate(1001, 3, 30)),
1414
("1000/2000", UndateInterval(Undate(1000), Undate(2000))),
1515
("1000-01/2000-05-01", UndateInterval(Undate(1000, 1), Undate(2000, 5, 1))),
16-
# # level 1
17-
# NOTE: undate currently doesn't most of the level 1 functionality
18-
# NOTE: undate currently doesn't support years beyond 9999 (datetime.MAXYEAR)
19-
# ("Y17000002", Undate(17000002)),
16+
# level 1
17+
("Y17000002", Undate(17000002)),
2018
# "2001-21", # spring 2001
21-
# # qualifiers
19+
# qualifiers TODO - not yet supported by undate
2220
# "1984?",
2321
# "2004-06~",
2422
# "2004-06-11%",

tests/test_dateformat/test_edtf.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import pytest
2+
3+
from undate.dateformat.edtf import EDTFDateFormat
4+
from undate.undate import Undate, UndateInterval
5+
6+
7+
class TestEDTFDateFormat:
8+
def test_parse_singledate(self):
9+
assert EDTFDateFormat().parse("2002") == Undate(2002)
10+
assert EDTFDateFormat().parse("1991-05") == Undate(1991, 5)
11+
assert EDTFDateFormat().parse("1991-05-03") == Undate(1991, 5, 3)
12+
# unknown dates are not strictly equal, but string comparison should match
13+
assert str(EDTFDateFormat().parse("201X")) == str(Undate("201X"))
14+
assert str(EDTFDateFormat().parse("2004-XX")) == str(Undate(2004, "XX"))
15+
# missing year but month/day known
16+
# assert EDTFDateFormat().parse("--05-03") == Undate(month=5, day=3)
17+
18+
def test_parse_singledate_unequal(self):
19+
assert EDTFDateFormat().parse("2002") != Undate(2003)
20+
assert EDTFDateFormat().parse("1991-05") != Undate(1991, 6)
21+
assert EDTFDateFormat().parse("1991-05-03") != Undate(1991, 5, 4)
22+
# missing year but month/day known
23+
# - does EDTF not support this or is parsing logic incorrect?
24+
# assert EDTFDateFormat().parse("XXXX-05-03") != Undate(month=5, day=4)
25+
26+
def test_parse_invalid(self):
27+
with pytest.raises(ValueError):
28+
assert EDTFDateFormat().parse("1991-5") == Undate(1991, 5)
29+
30+
def test_parse_range(self):
31+
assert EDTFDateFormat().parse("1800/1900") == UndateInterval(
32+
Undate(1800), Undate(1900)
33+
)
34+
35+
# def test_to_string(self):
36+
# # NOTE: iso8601 to_string currently tested more thoroughly
37+
# # in undate str tests; may want to move those tests here
38+
# assert EDTFDateFormat().to_string(Undate(900)) == "0900"
39+
# assert EDTFDateFormat().to_string(Undate(33)) == "0033"

0 commit comments

Comments
 (0)