Skip to content

Commit d26574c

Browse files
committed
Implementing Hebrew Anno Mundi calendar converter based on Hijri
1 parent 5cc19fd commit d26574c

File tree

13 files changed

+447
-19
lines changed

13 files changed

+447
-19
lines changed

src/undate/converters/base.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
formatter methods as desired/appropriate for your converter as well as the
2929
additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian``
3030
calendar.
31+
- Import your calendar in ``undate/converters/calendars/__init__.py`` and include in `__all__``
3132
- Add unit tests for the new calendar logic under ``tests/test_converters/calendars/``
3233
- Add the new calendar to the ``Calendar`` enum of supported calendars in
3334
``undate/undate.py`` and confirm that the `get_converter` method loads your
@@ -136,9 +137,13 @@ class BaseCalendarConverter(BaseDateConverter):
136137
#: Converter name. Subclasses must define a unique name.
137138
name: str = "Base Calendar Converter"
138139

139-
def max_month(self, year: int) -> int:
140-
"""Maximum month for this calendar for this year"""
141-
raise NotImplementedError
140+
def min_month(self) -> int:
141+
"""First month for this calendar. Defaults to 1."""
142+
return 1
143+
144+
def max_month(self) -> int:
145+
"""Last month for this calendar. Defaults to 12."""
146+
return 12
142147

143148
def max_day(self, year: int, month: int) -> int:
144149
"""maximum numeric day for the specified year and month in this calendar"""

src/undate/converters/calendars/gregorian.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ class GregorianDateConverter(BaseCalendarConverter):
1515
#: known non-leap year
1616
NON_LEAP_YEAR: int = 2022
1717

18-
def max_month(self, year: int) -> int:
19-
"""Maximum month for this calendar for this year"""
20-
return 12
21-
2218
def max_day(self, year: int, month: int) -> int:
2319
"""maximum numeric day for the specified year and month in this calendar"""
2420
# if month is known, use that to calculate
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from undate.converters.calendars.hijri.converter import HijriDateConverter
2+
3+
__all__ = ["HijriDateConverter"]
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from typing import Union
2+
3+
from convertdate import hebrew # type: ignore
4+
from lark.exceptions import UnexpectedCharacters
5+
6+
from undate.converters.base import BaseCalendarConverter
7+
from undate.converters.calendars.hebrew.parser import hebrew_parser
8+
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
9+
from undate.undate import Undate, UndateInterval
10+
11+
12+
class HebrewDateConverter(BaseCalendarConverter):
13+
"""
14+
Converter for Hebrew Anno Mundicalendar.
15+
16+
Support for parsing Anno Mundi dates and converting to Undate and UndateInterval
17+
objects in the Gregorian calendar.
18+
"""
19+
20+
#: converter name: Hebrew
21+
name: str = "Hebrew"
22+
calendar_name: str = "Anno Mundi"
23+
24+
def __init__(self):
25+
self.transformer = HebrewDateTransformer()
26+
27+
def min_month(self) -> int:
28+
"""first numeric month for the specified year in this calendar"""
29+
# hebrew calendar civil year starts in Tishri
30+
return hebrew.TISHRI
31+
32+
def max_month(self) -> int:
33+
"""last numeric month for the specified year in this calendar"""
34+
# hebrew calendar civil year starts in Tishri
35+
# Elul is the month before Tishri
36+
return hebrew.ELUL
37+
38+
def max_day(self, year: int, month: int) -> int:
39+
"""maximum numeric day for the specified year and month in this calendar"""
40+
# NOTE: unreleased v2.4.1 of convertdate standardizes month_days to month_length
41+
return hebrew.month_days(year, month)
42+
43+
def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]:
44+
"""Convert a Hebrew date, specified by year, month, and day,
45+
to the Gregorian equivalent date. Returns a tuple of year, month, day.
46+
"""
47+
return hebrew.to_gregorian(year, month, day)
48+
49+
def parse(self, value: str) -> Union[Undate, UndateInterval]:
50+
"""
51+
Parse a Hebrew date string and return an :class:`~undate.undate.Undate` or
52+
:class:`~undate.undate.UndateInterval`.
53+
The Hebrew date string is preserved in the undate label.
54+
"""
55+
if not value:
56+
raise ValueError("Parsing empty string is not supported")
57+
58+
# parse the input string, then transform to undate object
59+
try:
60+
# parse the string with our Hebrew date parser
61+
parsetree = hebrew_parser.parse(value)
62+
# transform the parse tree into an undate or undate interval
63+
undate_obj = self.transformer.transform(parsetree)
64+
# set the original date as a label, with the calendar name
65+
undate_obj.label = f"{value} {self.calendar_name}"
66+
return undate_obj
67+
except UnexpectedCharacters as err:
68+
raise ValueError(f"Could not parse '{value}' as a Hebrew date") from err
69+
70+
# do we need to support conversion the other direction?
71+
# i.e., generate a Hebrew date from an abitrary undate or undate interval?
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
%import common.WS
2+
%ignore WS
3+
4+
// only support day month year format for now
5+
// parser requires numeric day and year to be distinguished based on order
6+
hebrew_date: day month year | month year | year
7+
8+
// TODO: handle date ranges?
9+
10+
// TODO: add support for qualifiers?
11+
// PGP dates use qualifiers like "first decade of" (for beginning of month)
12+
// "first third of", seasons (can look for more examples)
13+
14+
year: /\d+/
15+
16+
// months
17+
month: month_1
18+
| month_2
19+
| month_3
20+
| month_4
21+
| month_5
22+
| month_6
23+
| month_7
24+
| month_8
25+
| month_9
26+
| month_10
27+
| month_11
28+
| month_12
29+
| month_13
30+
// months have 29 or 30 days; we do not expect leading zeroes
31+
day: /[1-9]/ | /[12][0-9]/ | /30/
32+
33+
// months, in order; from convertdate list
34+
// with variants from Princeton Geniza Project
35+
// support matching with and without accents
36+
month_1: "Nisan"
37+
// Iyar or Iyyar
38+
month_2: /Iyy?ar/
39+
month_3: "Sivan"
40+
month_4: "Tammuz"
41+
month_5: "Av"
42+
month_6: "Elul"
43+
// Tishrei or Tishri
44+
month_7: /Tishre?i/
45+
month_8: "Heshvan"
46+
month_9: "Kislev"
47+
// Tevet or Teveth
48+
month_10: /[ṬT]eveth?/
49+
month_11: "Shevat"
50+
// Adar I or Adar
51+
month_12: /Adar( I)?/
52+
// Adar II or Adar Bet
53+
month_13: /Adar (II|Bet)/
54+
55+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import pathlib
2+
3+
from lark import Lark
4+
5+
grammar_path = pathlib.Path(__file__).parent / "hebrew.lark"
6+
7+
with open(grammar_path) as grammar:
8+
# NOTE: LALR parser is faster but can't be used to ambiguity between years and dates
9+
hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from lark import Transformer, Tree
2+
3+
from undate.undate import Undate, Calendar
4+
5+
6+
class HebrewUndate(Undate):
7+
"""Undate convience subclass; sets default calendar to Hebrew."""
8+
9+
calendar = Calendar.HEBREW
10+
11+
12+
class HebrewDateTransformer(Transformer):
13+
"""Transform a Hebrew date parse tree and return an Undate or
14+
UndateInterval."""
15+
16+
def hebrew_date(self, items):
17+
parts = {}
18+
for child in items:
19+
if child.data in ["year", "month", "day"]:
20+
# in each case we expect one integer value;
21+
# anonymous tokens convert to their value and cast as int
22+
value = int(child.children[0])
23+
parts[str(child.data)] = value
24+
25+
# initialize and return an undate with islamic year, month, day and
26+
# islamic calendar
27+
return HebrewUndate(**parts)
28+
29+
# year translation is not needed since we want a tree with name year
30+
# this is equivalent to a no-op
31+
# def year(self, items):
32+
# return Tree(data="year", children=[items[0]])
33+
34+
def month(self, items):
35+
# month has a nested tree for the rule and the value
36+
# the name of the rule (month_1, month_2, etc) gives us the
37+
# number of the month needed for converting the date
38+
tree = items[0]
39+
month_n = tree.data.split("_")[-1]
40+
return Tree(data="month", children=[month_n])
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from undate.converters.calendars.hijri.converter import HijriDateConverter
2+
from undate.converters.calendars.hebrew.converter import HebrewDateConverter
23

3-
__all__ = ["HijriDateConverter"]
4+
__all__ = ["HijriDateConverter", "HebrewDateConverter"]

src/undate/converters/calendars/hijri/converter.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@ class HijriDateConverter(BaseCalendarConverter):
2424
def __init__(self):
2525
self.transformer = HijriDateTransformer()
2626

27-
def max_month(self, year: int) -> int:
28-
"""maximum numeric month for the specified year in this calendar"""
29-
return 12
30-
3127
def max_day(self, year: int, month: int) -> int:
3228
"""maximum numeric day for the specified year and month in this calendar"""
3329
return islamic.month_length(year, month)
@@ -41,8 +37,8 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]:
4137
def parse(self, value: str) -> Union[Undate, UndateInterval]:
4238
"""
4339
Parse a Hijri date string and return an :class:`~undate.undate.Undate` or
44-
:class:`~undate.undate.UndateInterval` in Gregorian calendar.
45-
The Hijri date string is preserved in the undate label
40+
:class:`~undate.undate.UndateInterval`.
41+
The Hijri date string is preserved in the undate label.
4642
"""
4743
if not value:
4844
raise ValueError("Parsing empty string is not supported")

src/undate/undate.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class Calendar(StrEnum):
2222

2323
GREGORIAN = auto()
2424
HIJRI = auto()
25+
HEBREW = auto()
2526

2627
@staticmethod
2728
def get_converter(calendar):
@@ -123,10 +124,11 @@ def calculate_earliest_latest(self, year, month, day):
123124
if month == "XX":
124125
month = None
125126

126-
min_month = 1 # is min month ever anything other than 1 ?
127-
# get max month from the calendar, since it depends on the
128-
# calendar and potentially the year (e.g. leap years in Hebrew Anno Mundi)
129-
max_month = self.calendar_converter.max_month(max_year)
127+
# get first and last month from the calendar, since it is not
128+
# always 1 and 12
129+
# TODO need to differentiate between min/max and first/last!
130+
min_month = self.calendar_converter.min_month()
131+
max_month = self.calendar_converter.max_month()
130132
if month is not None:
131133
try:
132134
# treat as an integer if we can
@@ -137,7 +139,9 @@ def calculate_earliest_latest(self, year, month, day):
137139
except ValueError:
138140
# if not, calculate min/max for missing digits
139141
min_month, max_month = self._missing_digit_minmax(
140-
str(month), min_month, max_month
142+
str(month),
143+
1,
144+
12, # min_month, max_month
141145
)
142146
# similar to month above — unknown day, but day-level granularity
143147
if day == "XX":

0 commit comments

Comments
 (0)