Skip to content

Commit

Permalink
Rename formatters submodule and classes to converters
Browse files Browse the repository at this point in the history
resolves #100
  • Loading branch information
rlskoeser committed Nov 8, 2024
1 parent 759ec58 commit 5086d1b
Show file tree
Hide file tree
Showing 18 changed files with 166 additions and 165 deletions.
1 change: 1 addition & 0 deletions src/undate/converters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from undate.converters.base import BaseDateConverter as BaseDateConverter
41 changes: 21 additions & 20 deletions src/undate/dateformat/base.py → src/undate/converters/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
"""
Base class for date format parsing and serializing
Base class for converting date between different formats and calendars.
To add support for a new date format:
To add support for a new date format or conversion:
- create a new file under undate/dateformat
- extend BaseDateFormat and implement parse and to_string methods
- create a new file or module under undate/converters
- extend BaseDateConverter and implement parse and to_string methods
as desired/appropriate
It should be loaded automatically and included in the formatters
returned by :meth:`BaseDateFormat.available_formatters`
The new subclass should be loaded automatically and included in the converters
returned by :meth:`BaseDateConverter.available_converters`
"""

Expand All @@ -21,11 +21,12 @@
logger = logging.getLogger(__name__)


class BaseDateFormat:
"""Base class for parsing and formatting dates for specific formats."""
class BaseDateConverter:
"""Base class for parsing, formatting, and converting dates to handle
specific formats and different calendars."""

# Subclasses should define a unique name.
name: str = "Base Formatter"
name: str = "Base Converter"

def parse(self, value: str):
# can't add type hint here because of circular import
Expand All @@ -40,22 +41,22 @@ def to_string(self, undate) -> str:
# cache import class method to ensure we only import once
@classmethod
@cache
def import_formatters(cls) -> int:
"""Import all undate.dateformat formatters
so that they will be included in available formatters
def import_converters(cls) -> int:
"""Import all undate converters
so that they will be included in available converters
even if not explicitly imported. Only import once.
returns the count of modules imported."""

logger.debug("Loading formatters under undate.dateformat")
import undate.dateformat
logger.debug("Loading converters under undate.converters")
import undate.converters

# load packages under this path with curent package prefix
formatter_path = undate.dateformat.__path__
formatter_prefix = f"{undate.dateformat.__name__}."
converter_path = undate.converters.__path__
converter_prefix = f"{undate.converters.__name__}."

import_count = 0
for importer, modname, ispkg in pkgutil.iter_modules(
formatter_path, formatter_prefix
converter_path, converter_prefix
):
# import everything except the current file
if not modname.endswith(".base"):
Expand All @@ -65,7 +66,7 @@ def import_formatters(cls) -> int:
return import_count

@classmethod
def available_formatters(cls) -> Dict[str, Type["BaseDateFormat"]]:
# ensure undate formatters are imported
cls.import_formatters()
def available_converters(cls) -> Dict[str, Type["BaseDateConverter"]]:
# ensure undate converters are imported
cls.import_converters()
return {c.name: c for c in cls.__subclasses__()} # type: ignore
1 change: 1 addition & 0 deletions src/undate/converters/edtf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from undate.converters.edtf.converter import EDTFDateConverter as EDTFDateConverter
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

from lark.exceptions import UnexpectedCharacters

from undate.converters.base import BaseDateConverter
from undate.converters.edtf.parser import edtf_parser
from undate.converters.edtf.transformer import EDTFTransformer
from undate.date import DatePrecision
from undate.dateformat.base import BaseDateFormat
from undate.dateformat.edtf.parser import edtf_parser
from undate.dateformat.edtf.transformer import EDTFTransformer
from undate.undate import Undate, UndateInterval

EDTF_UNSPECIFIED_DIGIT: str = "X"


class EDTFDateFormat(BaseDateFormat):
class EDTFDateConverter(BaseDateConverter):
name: str = "EDTF"

def __init__(self):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Dict, List, Union

from undate.dateformat.base import BaseDateFormat
from undate.converters.base import BaseDateConverter
from undate.undate import Undate, UndateInterval


class ISO8601DateFormat(BaseDateFormat):
class ISO8601DateFormat(BaseDateConverter):
# NOTE: do we care about validation? could use regex
# but maybe be permissive, warn if invalid but we can parse

Expand Down
3 changes: 0 additions & 3 deletions src/undate/dateformat/__init__.py

This file was deleted.

1 change: 0 additions & 1 deletion src/undate/dateformat/edtf/__init__.py

This file was deleted.

42 changes: 22 additions & 20 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
from typing import Dict, Optional, Union

from undate.converters.base import BaseDateConverter
from undate.date import ONE_DAY, ONE_MONTH_MAX, ONE_YEAR, Date, DatePrecision, Timedelta
from undate.dateformat.base import BaseDateFormat


class Undate:
Expand All @@ -22,7 +22,7 @@ class Undate:
#: A string to label a specific undate, e.g. "German Unity Date 2022" for Oct. 3, 2022.
#: Labels are not taken into account when comparing undate objects.
label: Union[str, None] = None
formatter: BaseDateFormat
converter: BaseDateConverter
#: precision of the date (day, month, year, etc.)
precision: DatePrecision

Expand All @@ -41,7 +41,7 @@ def __init__(
year: Optional[Union[int, str]] = None,
month: Optional[Union[int, str]] = None,
day: Optional[Union[int, str]] = None,
formatter: Optional[BaseDateFormat] = None,
converter: Optional[BaseDateConverter] = None,
label: Optional[str] = None,
):
# keep track of initial values and which values are known
Expand Down Expand Up @@ -135,11 +135,13 @@ def __init__(
self.earliest = Date(min_year, min_month, min_day)
self.latest = Date(max_year, max_month, max_day)

if formatter is None:
if converter is None:
# import all subclass definitions; initialize the default
formatter_cls = BaseDateFormat.available_formatters()[self.DEFAULT_FORMAT]
formatter = formatter_cls()
self.formatter = formatter
converter_cls = BaseDateConverter.available_converters()[
self.DEFAULT_FORMAT
]
converter = converter_cls()
self.converter = converter

self.label = label

Expand All @@ -162,7 +164,7 @@ def __str__(self) -> str:
# combine, skipping any values that are None
return "-".join([str(p) for p in parts if p is not None])

return self.formatter.to_string(self)
return self.converter.to_string(self)

def __repr__(self) -> str:
if self.label:
Expand All @@ -172,21 +174,21 @@ def __repr__(self) -> str:
@classmethod
def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]:
"""parse a string to an undate or undate interval using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
for now, only supports named converters"""
converter_cls = BaseDateConverter.available_converters().get(format, None)
if converter_cls:
# NOTE: some parsers may return intervals; is that ok here?
return formatter_cls().parse(date_string)
return converter_cls().parse(date_string)

raise ValueError(f"Unsupported format '{format}'")

def format(self, format) -> str:
"""format this undate as a string using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
for now, only supports named converters"""
converter_cls = BaseDateConverter.available_converters().get(format, None)
if converter_cls:
# NOTE: some parsers may return intervals; is that ok here?
return formatter_cls().to_string(self)
return converter_cls().to_string(self)

raise ValueError(f"Unsupported format '{format}'")

Expand Down Expand Up @@ -459,10 +461,10 @@ def __str__(self) -> str:

def format(self, format) -> str:
"""format this undate interval as a string using the specified format;
for now, only supports named formatters"""
formatter_cls = BaseDateFormat.available_formatters().get(format, None)
if formatter_cls:
return formatter_cls().to_string(self)
for now, only supports named converters"""
converter_cls = BaseDateConverter.available_converters().get(format, None)
if converter_cls:
return converter_cls().to_string(self)

raise ValueError(f"Unsupported format '{format}'")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from undate.dateformat.edtf.parser import edtf_parser
from undate.converters.edtf.parser import edtf_parser

# for now, just test that valid dates can be parsed

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from undate.dateformat.edtf.parser import edtf_parser
from undate.dateformat.edtf.transformer import EDTFTransformer
from undate.converters.edtf.parser import edtf_parser
from undate.converters.edtf.transformer import EDTFTransformer
from undate.undate import Undate, UndateInterval

# for now, just test that valid dates can be parsed
Expand Down
64 changes: 64 additions & 0 deletions tests/test_converters/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import logging

import pytest
from undate.converters.base import BaseDateConverter


class TestBaseDateConverter:
def test_available_converters(self):
available_converters = BaseDateConverter.available_converters()
assert isinstance(available_converters, dict)

# NOTE: import _after_ generating available formatters
# so we can confirm it gets loaded
from undate.converters.iso8601 import ISO8601DateFormat

assert ISO8601DateFormat.name in available_converters
assert available_converters[ISO8601DateFormat.name] == ISO8601DateFormat

def test_converters_are_unique(self):
assert len(BaseDateConverter.available_converters()) == len(
BaseDateConverter.__subclasses__()
), "Formatter names have to be unique."

def test_parse_not_implemented(self):
with pytest.raises(NotImplementedError):
BaseDateConverter().parse("foo bar baz")

def test_parse_to_string(self):
with pytest.raises(NotImplementedError):
BaseDateConverter().to_string(1991)


def test_import_converters_import_only_once(caplog):
# clear the cache, since any instantiation of an Undate
# object anywhere in the test suite will populate it
BaseDateConverter.import_converters.cache_clear()

# run first, and confirm it runs and loads formatters
with caplog.at_level(logging.DEBUG):
import_count = BaseDateConverter.import_converters()
# should import at least one thing (iso8601)
assert import_count >= 1
# should have log entry
assert "Loading converters" in caplog.text

# if we clear the log and run again, should not do anything
caplog.clear()
with caplog.at_level(logging.DEBUG):
BaseDateConverter.import_converters()
assert "Loading converters" not in caplog.text


@pytest.mark.last
def test_converters_unique_error():
# confirm that unique converter check fails when it should

# run this test last because we can't undefine the subclass
# once it exists...
class ISO8601DateFormat2(BaseDateConverter):
name = "ISO8601" # duplicates existing formatter

assert len(BaseDateConverter.available_converters()) != len(
BaseDateConverter.__subclasses__()
)
47 changes: 47 additions & 0 deletions tests/test_converters/test_edtf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pytest
from undate.converters.edtf import EDTFDateConverter
from undate.undate import Undate, UndateInterval


class TestEDTFDateConverter:
def test_parse_singledate(self):
assert EDTFDateConverter().parse("2002") == Undate(2002)
assert EDTFDateConverter().parse("1991-05") == Undate(1991, 5)
assert EDTFDateConverter().parse("1991-05-03") == Undate(1991, 5, 3)
# unknown dates are not strictly equal, but string comparison should match
assert str(EDTFDateConverter().parse("201X")) == str(Undate("201X"))
assert str(EDTFDateConverter().parse("2004-XX")) == str(Undate(2004, "XX"))
# missing year but month/day known
# assert EDTFDateConverter().parse("--05-03") == Undate(month=5, day=3)

def test_parse_singledate_unequal(self):
assert EDTFDateConverter().parse("2002") != Undate(2003)
assert EDTFDateConverter().parse("1991-05") != Undate(1991, 6)
assert EDTFDateConverter().parse("1991-05-03") != Undate(1991, 5, 4)
# missing year but month/day known
# - does EDTF not support this or is parsing logic incorrect?
# assert EDTFDateConverter().parse("XXXX-05-03") != Undate(month=5, day=4)

def test_parse_invalid(self):
with pytest.raises(ValueError):
EDTFDateConverter().parse("1991-5")

def test_parse_range(self):
assert EDTFDateConverter().parse("1800/1900") == UndateInterval(
Undate(1800), Undate(1900)
)

def test_to_string(self):
assert EDTFDateConverter().to_string(Undate(900)) == "0900"
assert EDTFDateConverter().to_string(Undate("80")) == "0080"
assert EDTFDateConverter().to_string(Undate(33)) == "0033"
assert EDTFDateConverter().to_string(Undate("20XX")) == "20XX"
assert EDTFDateConverter().to_string(Undate(17000002)) == "Y17000002"

assert EDTFDateConverter().to_string(Undate(1991, 6)) == "1991-06"
assert EDTFDateConverter().to_string(Undate(1991, 5, 3)) == "1991-05-03"

assert EDTFDateConverter().to_string(Undate(1991, "0X")) == "1991-0X"
assert EDTFDateConverter().to_string(Undate(1991, None, 3)) == "1991-XX-03"

# TODO: override missing digit and confirm replacement
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from undate.dateformat.iso8601 import ISO8601DateFormat
from undate.converters.iso8601 import ISO8601DateFormat
from undate.undate import Undate, UndateInterval


Expand Down
Loading

0 comments on commit 5086d1b

Please sign in to comment.