Skip to content

Commit

Permalink
Implementing Hebrew Anno Mundi calendar converter based on Hijri
Browse files Browse the repository at this point in the history
  • Loading branch information
rlskoeser committed Nov 27, 2024
1 parent 5cc19fd commit d26574c
Show file tree
Hide file tree
Showing 13 changed files with 447 additions and 19 deletions.
11 changes: 8 additions & 3 deletions src/undate/converters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
formatter methods as desired/appropriate for your converter as well as the
additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian``
calendar.
- Import your calendar in ``undate/converters/calendars/__init__.py`` and include in `__all__``
- Add unit tests for the new calendar logic under ``tests/test_converters/calendars/``
- Add the new calendar to the ``Calendar`` enum of supported calendars in
``undate/undate.py`` and confirm that the `get_converter` method loads your
Expand Down Expand Up @@ -136,9 +137,13 @@ class BaseCalendarConverter(BaseDateConverter):
#: Converter name. Subclasses must define a unique name.
name: str = "Base Calendar Converter"

def max_month(self, year: int) -> int:
"""Maximum month for this calendar for this year"""
raise NotImplementedError
def min_month(self) -> int:
"""First month for this calendar. Defaults to 1."""
return 1

def max_month(self) -> int:
"""Last month for this calendar. Defaults to 12."""
return 12

def max_day(self, year: int, month: int) -> int:
"""maximum numeric day for the specified year and month in this calendar"""
Expand Down
4 changes: 0 additions & 4 deletions src/undate/converters/calendars/gregorian.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ class GregorianDateConverter(BaseCalendarConverter):
#: known non-leap year
NON_LEAP_YEAR: int = 2022

def max_month(self, year: int) -> int:
"""Maximum month for this calendar for this year"""
return 12

def max_day(self, year: int, month: int) -> int:
"""maximum numeric day for the specified year and month in this calendar"""
# if month is known, use that to calculate
Expand Down
3 changes: 3 additions & 0 deletions src/undate/converters/calendars/hebrew/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from undate.converters.calendars.hijri.converter import HijriDateConverter

__all__ = ["HijriDateConverter"]
71 changes: 71 additions & 0 deletions src/undate/converters/calendars/hebrew/converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import Union

from convertdate import hebrew # type: ignore
from lark.exceptions import UnexpectedCharacters

from undate.converters.base import BaseCalendarConverter
from undate.converters.calendars.hebrew.parser import hebrew_parser
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
from undate.undate import Undate, UndateInterval


class HebrewDateConverter(BaseCalendarConverter):
"""
Converter for Hebrew Anno Mundicalendar.
Support for parsing Anno Mundi dates and converting to Undate and UndateInterval
objects in the Gregorian calendar.
"""

#: converter name: Hebrew
name: str = "Hebrew"
calendar_name: str = "Anno Mundi"

def __init__(self):
self.transformer = HebrewDateTransformer()

def min_month(self) -> int:
"""first numeric month for the specified year in this calendar"""
# hebrew calendar civil year starts in Tishri
return hebrew.TISHRI

def max_month(self) -> int:
"""last numeric month for the specified year in this calendar"""
# hebrew calendar civil year starts in Tishri
# Elul is the month before Tishri
return hebrew.ELUL

def max_day(self, year: int, month: int) -> int:
"""maximum numeric day for the specified year and month in this calendar"""
# NOTE: unreleased v2.4.1 of convertdate standardizes month_days to month_length
return hebrew.month_days(year, month)

def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]:
"""Convert a Hebrew date, specified by year, month, and day,
to the Gregorian equivalent date. Returns a tuple of year, month, day.
"""
return hebrew.to_gregorian(year, month, day)

def parse(self, value: str) -> Union[Undate, UndateInterval]:
"""
Parse a Hebrew date string and return an :class:`~undate.undate.Undate` or
:class:`~undate.undate.UndateInterval`.
The Hebrew date string is preserved in the undate label.
"""
if not value:
raise ValueError("Parsing empty string is not supported")

# parse the input string, then transform to undate object
try:
# parse the string with our Hebrew date parser
parsetree = hebrew_parser.parse(value)
# transform the parse tree into an undate or undate interval
undate_obj = self.transformer.transform(parsetree)
# set the original date as a label, with the calendar name
undate_obj.label = f"{value} {self.calendar_name}"
return undate_obj
except UnexpectedCharacters as err:
raise ValueError(f"Could not parse '{value}' as a Hebrew date") from err

# do we need to support conversion the other direction?
# i.e., generate a Hebrew date from an abitrary undate or undate interval?
55 changes: 55 additions & 0 deletions src/undate/converters/calendars/hebrew/hebrew.lark
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
%import common.WS
%ignore WS

// only support day month year format for now
// parser requires numeric day and year to be distinguished based on order
hebrew_date: day month year | month year | year

// TODO: handle date ranges?

// TODO: add support for qualifiers?
// PGP dates use qualifiers like "first decade of" (for beginning of month)
// "first third of", seasons (can look for more examples)

year: /\d+/

// months
month: month_1
| month_2
| month_3
| month_4
| month_5
| month_6
| month_7
| month_8
| month_9
| month_10
| month_11
| month_12
| month_13
// months have 29 or 30 days; we do not expect leading zeroes
day: /[1-9]/ | /[12][0-9]/ | /30/

// months, in order; from convertdate list
// with variants from Princeton Geniza Project
// support matching with and without accents
month_1: "Nisan"
// Iyar or Iyyar
month_2: /Iyy?ar/
month_3: "Sivan"
month_4: "Tammuz"
month_5: "Av"
month_6: "Elul"
// Tishrei or Tishri
month_7: /Tishre?i/
month_8: "Heshvan"
month_9: "Kislev"
// Tevet or Teveth
month_10: /[ṬT]eveth?/
month_11: "Shevat"
// Adar I or Adar
month_12: /Adar( I)?/
// Adar II or Adar Bet
month_13: /Adar (II|Bet)/


9 changes: 9 additions & 0 deletions src/undate/converters/calendars/hebrew/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import pathlib

from lark import Lark

grammar_path = pathlib.Path(__file__).parent / "hebrew.lark"

with open(grammar_path) as grammar:
# NOTE: LALR parser is faster but can't be used to ambiguity between years and dates
hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True)
40 changes: 40 additions & 0 deletions src/undate/converters/calendars/hebrew/transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from lark import Transformer, Tree

from undate.undate import Undate, Calendar


class HebrewUndate(Undate):
"""Undate convience subclass; sets default calendar to Hebrew."""

calendar = Calendar.HEBREW


class HebrewDateTransformer(Transformer):
"""Transform a Hebrew date parse tree and return an Undate or
UndateInterval."""

def hebrew_date(self, items):
parts = {}
for child in items:
if child.data in ["year", "month", "day"]:
# in each case we expect one integer value;
# anonymous tokens convert to their value and cast as int
value = int(child.children[0])
parts[str(child.data)] = value

# initialize and return an undate with islamic year, month, day and
# islamic calendar
return HebrewUndate(**parts)

# year translation is not needed since we want a tree with name year
# this is equivalent to a no-op
# def year(self, items):
# return Tree(data="year", children=[items[0]])

def month(self, items):
# month has a nested tree for the rule and the value
# the name of the rule (month_1, month_2, etc) gives us the
# number of the month needed for converting the date
tree = items[0]
month_n = tree.data.split("_")[-1]
return Tree(data="month", children=[month_n])
3 changes: 2 additions & 1 deletion src/undate/converters/calendars/hijri/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from undate.converters.calendars.hijri.converter import HijriDateConverter
from undate.converters.calendars.hebrew.converter import HebrewDateConverter

__all__ = ["HijriDateConverter"]
__all__ = ["HijriDateConverter", "HebrewDateConverter"]
8 changes: 2 additions & 6 deletions src/undate/converters/calendars/hijri/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ class HijriDateConverter(BaseCalendarConverter):
def __init__(self):
self.transformer = HijriDateTransformer()

def max_month(self, year: int) -> int:
"""maximum numeric month for the specified year in this calendar"""
return 12

def max_day(self, year: int, month: int) -> int:
"""maximum numeric day for the specified year and month in this calendar"""
return islamic.month_length(year, month)
Expand All @@ -41,8 +37,8 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]:
def parse(self, value: str) -> Union[Undate, UndateInterval]:
"""
Parse a Hijri date string and return an :class:`~undate.undate.Undate` or
:class:`~undate.undate.UndateInterval` in Gregorian calendar.
The Hijri date string is preserved in the undate label
:class:`~undate.undate.UndateInterval`.
The Hijri date string is preserved in the undate label.
"""
if not value:
raise ValueError("Parsing empty string is not supported")
Expand Down
14 changes: 9 additions & 5 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Calendar(StrEnum):

GREGORIAN = auto()
HIJRI = auto()
HEBREW = auto()

@staticmethod
def get_converter(calendar):
Expand Down Expand Up @@ -123,10 +124,11 @@ def calculate_earliest_latest(self, year, month, day):
if month == "XX":
month = None

min_month = 1 # is min month ever anything other than 1 ?
# get max month from the calendar, since it depends on the
# calendar and potentially the year (e.g. leap years in Hebrew Anno Mundi)
max_month = self.calendar_converter.max_month(max_year)
# get first and last month from the calendar, since it is not
# always 1 and 12
# TODO need to differentiate between min/max and first/last!
min_month = self.calendar_converter.min_month()
max_month = self.calendar_converter.max_month()
if month is not None:
try:
# treat as an integer if we can
Expand All @@ -137,7 +139,9 @@ def calculate_earliest_latest(self, year, month, day):
except ValueError:
# if not, calculate min/max for missing digits
min_month, max_month = self._missing_digit_minmax(
str(month), min_month, max_month
str(month),
1,
12, # min_month, max_month
)
# similar to month above — unknown day, but day-level granularity
if day == "XX":
Expand Down
Loading

0 comments on commit d26574c

Please sign in to comment.