Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EDTF demo/validation notebook #98

Open
wants to merge 12 commits into
base: develop
Choose a base branch
from
Open
799 changes: 799 additions & 0 deletions examples/notebooks/edtf-support.ipynb

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions src/undate/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
__version__ = "0.3.0.dev0"

from undate.date import DatePrecision
from undate.undate import Undate, UndateInterval

__all__ = ["Undate", "UndateInterval", "DatePrecision"]
12 changes: 9 additions & 3 deletions src/undate/converters/edtf/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,14 @@ def _undate_to_string(self, undate: Undate) -> str:
if undate.precision >= DatePrecision.YEAR:
year = self._convert_missing_digits(undate.year, undate.MISSING_DIGIT)
# years with more than 4 digits should be prefixed with Y
if year and len(year) > 4:
year = f"Y{year}"
# (don't count minus sign when checking digits)
if year and len(year.lstrip("-")) > 4:
negative_year = ""
if year.startswith("-"):
negative_year = "-"
year = year[1:]
year = f"{negative_year}Y{year}"

# TODO: handle uncertain / approximate
parts.append(year or EDTF_UNSPECIFIED_DIGIT * 4)

Expand All @@ -97,4 +103,4 @@ def _undate_to_string(self, undate: Undate) -> str:
return "-".join(parts)

# how can we have an empty string? probably shouldn't get here
return ""
raise ValueError("Failed to generate an EDTF string from %r", undate)
7 changes: 4 additions & 3 deletions src/undate/converters/edtf/edtf.lark
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

date: year | year "-" month | year "-" month "-" day

year: INT
year: INT | /\-/ INT
month: /(0[1-9])|(1[0-2])/
day: /([0-2][1-9])|(3[0-1])/

Expand All @@ -34,14 +34,15 @@ uncertain_approximate: "%"

// The character 'X' may be used in place of one or more rightmost
// digits to indicate that the value of that digit is unspecified
// In Level 2, year may be completely unspecified.
unspecified: /X/
?year_unspecified: /\d+/ unspecified+
?year_unspecified: /\d+/ unspecified+ | unspecified ~ 4
?month_unspecified: "0".."1"? unspecified ~ 1..2
//?year_month_unspecified: year_l1 "-" month_unspecified
?day_unspecified: "0".."3"? unspecified ~ 1..2

// 'Y' may be used at the beginning of the date string to signify that the date is a year, when (and only when) the year exceeds four digits, i.e. for years later than 9999 or earlier than -9999.
year_fivedigitsplus: /Y\d{5,}/
year_fivedigitsplus: /-?Y\d{5,}/
?year_l1: year_fivedigitsplus | year | year_unspecified

// The values 21, 22, 23, 24 may be used used to signify
Expand Down
20 changes: 19 additions & 1 deletion src/undate/converters/edtf/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,26 @@ def day_unspecified(self, items):
def date_level1(self, items):
return self.date(items)

def year(self, items):
# when the year is negative, there are two tokens
if len(items) > 1 and items[0] == "-":
# an anonymous token for the - and the integer year
year = items[1]
return Tree(data="year", children=[-year])

return Tree(data="year", children=[items[0]])

def year_fivedigitsplus(self, items):
# strip off the leading Y and convert to integer
token = items[0]
year = int(token.value.lstrip("Y"))
value = token.value
# check if year is negative
negative = False
if value.startswith("-"):
value = value[1:]
negative = True
year = int(value.lstrip("Y"))

if negative:
year = -year
return Tree(data="year", children=[year])
7 changes: 5 additions & 2 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,12 @@ def __init__(
elif year:
self.precision = DatePrecision.YEAR

# TODO: refactor partial date min/max calculations
# special case: treat year = XXXX as unknown/none
if year == "XXXX":
year = None

if year is not None:
# could we / should we use str.isnumeric here?
try:
year = int(year)
# update initial value since it is used to determine
Expand Down Expand Up @@ -113,7 +116,7 @@ def __init__(
# if we have no day or partial day, calculate min / max
min_day = 1
# if we know year and month (or max month), calculate exactly
if year and month:
if year and month and isinstance(year, int):
_, max_day = monthrange(int(year), max_month)
elif year is None and month:
# If we don't have year and month,
Expand Down
9 changes: 9 additions & 0 deletions tests/test_converters/edtf/test_edtf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
"1000-01/2000-05-01",
# level 1
"Y170000002",
"-Y170000002",
"2001-21", # spring 2001
# negative year
"-1985",
# qualifiers
"1984?",
"2004-06~",
Expand All @@ -28,6 +31,12 @@
"1985-04/..",
"../1985-04-12",
"/1985-04-12",
# level 2 unspecified digits
"156X-12-25",
"XXXX-12-XX",
"1XXX-12",
"1XXX-XX",
"1984-1X",
]


Expand Down
8 changes: 8 additions & 0 deletions tests/test_converters/edtf/test_edtf_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
("1000-01/2000-05-01", UndateInterval(Undate(1000, 1), Undate(2000, 5, 1))),
# level 1
("Y17000002", Undate(17000002)),
("-Y17000002", Undate(-17000002)),
# negative year
("-1985", Undate(-1985)),
# "2001-21", # spring 2001
# qualifiers TODO - not yet supported by undate
# "1984?",
Expand All @@ -30,6 +33,11 @@
("1985-04/..", UndateInterval(Undate(1985, 4), None)),
("../1985-04-12", UndateInterval(None, Undate(1985, 4, 12))),
("/1985-04-12", UndateInterval(None, Undate(1985, 4, 12))),
# level 2 unspecified digits
("156X-12-25", Undate("156X", 12, 25)),
("XXXX-12-XX", Undate("XXXX", 12, "XX")),
("1XXX-XX", Undate("1XXX", "XX")),
("1984-1X", Undate(1984, "1X")),
]


Expand Down
8 changes: 8 additions & 0 deletions tests/test_converters/test_edtf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from undate.converters.edtf import EDTFDateConverter
from undate.date import DatePrecision
from undate.undate import Undate, UndateInterval


Expand Down Expand Up @@ -52,5 +53,12 @@ def test_to_string(self):

assert EDTFDateConverter().to_string(Undate(1991, "0X")) == "1991-0X"
assert EDTFDateConverter().to_string(Undate(1991, None, 3)) == "1991-XX-03"
assert EDTFDateConverter().to_string(Undate(-1984)) == "-1984"

# if converter can't generate a string for the date,
# it should return a value error
empty_undate = Undate()
empty_undate.precision = DatePrecision.DECADE
with pytest.raises(ValueError):
EDTFDateConverter().to_string(empty_undate)
# TODO: override missing digit and confirm replacement
Loading