Skip to content

Commit

Permalink
Handle level 2 fully-unspecified year
Browse files Browse the repository at this point in the history
  • Loading branch information
rlskoeser committed Oct 24, 2024
1 parent 5e90973 commit 8760c33
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 58 deletions.
81 changes: 30 additions & 51 deletions examples/notebooks/edtf-support.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -92,7 +92,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "923476ff-344a-4018-a02e-6e5f80ea76a8",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -159,7 +159,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "6ed422de-34a2-4324-b254-f62db00563f7",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -214,7 +214,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "8d98a139-627b-40bd-b1c5-d0028e538a53",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -259,7 +259,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "532470db-851e-4f91-9242-cd93d35054cf",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -324,7 +324,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "a5abd0e4-0b26-49b0-bf78-3e1fe6c046d8",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -429,7 +429,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"id": "e47f3fff-d35c-4c2e-9568-214763f6511a",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -483,7 +483,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"id": "39143c1f-932a-450c-9b2d-ffbe3e1416b0",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -537,7 +537,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"id": "95965f17-0bd5-446f-bc09-9503eaed68e2",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -591,7 +591,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 10,
"id": "c6c2d1a1-39f1-45eb-ac08-1de4fadbe842",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -642,36 +642,16 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 11,
"id": "f24fd31a-176a-40b5-bff4-d72b68f32a18",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1985\n"
]
},
{
"ename": "AssertionError",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[13], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# format\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(Undate(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1985\u001b[39m))\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(Undate(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1985\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m-1985\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
"\u001b[0;31mAssertionError\u001b[0m: "
]
}
],
"outputs": [],
"source": [
"# Example 1 ‘-1985’\n",
"# parse\n",
"neg_year = Undate.parse(\"-1985\", \"EDTF\")\n",
"assert neg_year.year == \"-1985\"\n",
"# format\n",
"print(Undate(-1985))\n",
"assert str(Undate(-1985)) == \"-1985\""
]
},
Expand Down Expand Up @@ -710,7 +690,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"id": "5910caab-eada-4715-b863-9bbbb15b9c5c",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -739,20 +719,20 @@
"# format\n",
"assert str(Undate(\"15XX\", 12, 25)) == \"15XX-12-25\"\n",
"\n",
"# NOT CURRENTLY SUPPORTED\n",
"# Example 3 ‘XXXX-12-XX’\n",
"# parse\n",
"# december = Undate.parse(\"XXXX-12-XX\", \"EDTF\")\n",
"# assert december.year == \"XXXX\"\n",
"# assert december.month == \"12\"\n",
"# assert december.day == \"XX\"\n",
"# assert december.precision == DatePrecision.DAY\n",
"december = Undate.parse(\"XXXX-12-XX\", \"EDTF\")\n",
"assert december.year == \"XXXX\"\n",
"assert december.month == \"12\"\n",
"assert december.day == \"XX\"\n",
"assert december.precision == DatePrecision.DAY\n",
"# TODO: these must be in a different branch...\n",
"# assert december.earliest.year == Undate.MIN_YEAR\n",
"# assert december.latest.year == Undate.MAX_YEAR\n",
"# assert december.earliest.day == 1\n",
"# assert december.latest.day == 31\n",
"assert december.earliest.day == 1\n",
"assert december.latest.day == 31\n",
"# format\n",
"# assert str(Undate(\"XXXX\", 12, \"XX\")) == \"XXXX-12-XX\"\n",
"assert str(Undate(\"XXXX\", 12, \"XX\")) == \"XXXX-12-XX\"\n",
"\n",
"# Example 4 '1XXX-XX’\n",
"# parse\n",
Expand All @@ -765,17 +745,16 @@
"# format\n",
"assert str(Undate(\"1XXX\", \"XX\")) == \"1XXX-XX\"\n",
"\n",
"# NOT CURRENTLY SUPPORTED (parse error)\n",
"# Example 5 ‘1XXX-12’\n",
"# parse\n",
"# some_december = Undate.parse(\"1XXX-12\", \"EDTF\")\n",
"# assert some_december.year == \"1XXX\"\n",
"# assert some_december.month == \"12\"\n",
"# assert some_december.precision == DatePrecision.MONTH\n",
"# assert some_december.earliest.year == 1000\n",
"# assert some_december.latest.year == 1999\n",
"# # format\n",
"# assert str(Undate(\"1XXX\", 12)) == \"1XXX-12\"\n",
"some_december = Undate.parse(\"1XXX-12\", \"EDTF\")\n",
"assert some_december.year == \"1XXX\"\n",
"assert some_december.month == \"12\"\n",
"assert some_december.precision == DatePrecision.MONTH\n",
"assert some_december.earliest.year == 1000\n",
"assert some_december.latest.year == 1999\n",
"# format\n",
"assert str(Undate(\"1XXX\", 12)) == \"1XXX-12\"\n",
"\n",
"# Example 6 ‘1984-1X’\n",
"# parse\n",
Expand Down
3 changes: 2 additions & 1 deletion src/undate/dateformat/edtf/edtf.lark
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ uncertain_approximate: "%"

// The character 'X' may be used in place of one or more rightmost
// digits to indicate that the value of that digit is unspecified
// In Level 2, year may be completely unspecified.
unspecified: /X/
?year_unspecified: /\d+/ unspecified+
?year_unspecified: /\d+/ unspecified+ | unspecified ~ 4
?month_unspecified: "0".."1"? unspecified ~ 1..2
//?year_month_unspecified: year_l1 "-" month_unspecified
?day_unspecified: "0".."3"? unspecified ~ 1..2
Expand Down
7 changes: 5 additions & 2 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,12 @@ def __init__(
elif year:
self.precision = DatePrecision.YEAR

# TODO: refactor partial date min/max calculations
# special case: treat year = XXXX as unknown/none
if year == "XXXX":
year = None

if year is not None:
# could we / should we use str.isnumeric here?
try:
year = int(year)
# update initial value since it is used to determine
Expand Down Expand Up @@ -110,7 +113,7 @@ def __init__(
# if we have no day or partial day, calculate min / max
min_day = 1
# if we know year and month (or max month), calculate exactly
if year and month:
if year and month and isinstance(year, int):
_, max_day = monthrange(int(year), max_month)
elif year is None and month:
# If we don't have year and month,
Expand Down
6 changes: 6 additions & 0 deletions tests/test_dateformat/edtf/test_edtf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
"1985-04/..",
"../1985-04-12",
"/1985-04-12",
# level 2 unspecified digits
"156X-12-25",
"XXXX-12-XX",
"1XXX-12",
"1XXX-XX",
"1984-1X",
]


Expand Down
5 changes: 5 additions & 0 deletions tests/test_dateformat/edtf/test_edtf_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
("1985-04/..", UndateInterval(Undate(1985, 4), None)),
("../1985-04-12", UndateInterval(None, Undate(1985, 4, 12))),
("/1985-04-12", UndateInterval(None, Undate(1985, 4, 12))),
# level 2 unspecified digits
("156X-12-25", Undate("156X", 12, 25)),
("XXXX-12-XX", Undate("XXXX", 12, "XX")),
("1XXX-XX", Undate("1XXX", "XX")),
("1984-1X", Undate(1984, "1X")),
]


Expand Down
15 changes: 11 additions & 4 deletions tests/test_dateformat/test_edtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@ def test_parse_singledate(self):
assert str(EDTFDateFormat().parse("201X")) == str(Undate("201X"))
assert str(EDTFDateFormat().parse("2004-XX")) == str(Undate(2004, "XX"))
# missing year but month/day known
# assert EDTFDateFormat().parse("--05-03") == Undate(month=5, day=3)
# comparison doesn't work because undate knows unknown dates aren't
# necessarily the same, so use string comparison
assert str(EDTFDateFormat().parse("XXXX-05-03")) == Undate(
month=5, day=3
).format("EDTF")

def test_parse_singledate_unequal(self):
assert EDTFDateFormat().parse("2002") != Undate(2003)
assert EDTFDateFormat().parse("1991-05") != Undate(1991, 6)
assert EDTFDateFormat().parse("1991-05-03") != Undate(1991, 5, 4)
# missing year but month/day known
# - does EDTF not support this or is parsing logic incorrect?
# assert EDTFDateFormat().parse("XXXX-05-03") != Undate(month=5, day=4)
assert EDTFDateFormat().parse("XXXX-05-03") != Undate(month=5, day=4)

def test_parse_invalid(self):
with pytest.raises(ValueError):
Expand All @@ -47,4 +50,8 @@ def test_to_string(self):
assert EDTFDateFormat().to_string(Undate(1991, "0X")) == "1991-0X"
assert EDTFDateFormat().to_string(Undate(1991, None, 3)) == "1991-XX-03"

# TODO: override missing digit and confirm replacement
# level 2 unspecified digits
assert EDTFDateFormat().to_string(Undate("156X", 12, 25)) == "156X-12-25"
assert EDTFDateFormat().to_string(Undate("XXXX", 12, "XX")) == "XXXX-12-XX"
assert EDTFDateFormat().to_string(Undate("1XXX", "XX")) == "1XXX-XX"
assert EDTFDateFormat().to_string(Undate(1984, "1X")) == "1984-1X"

0 comments on commit 8760c33

Please sign in to comment.