diff --git a/docs/conf.py b/docs/conf.py index 8961d82..269df0b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -86,4 +86,4 @@ } # turn on relative links; make sure both github and sphinx links work -myst_enable_extensions = ["linkify"] +# myst_enable_extensions = ["linkify"] # disabling because not found diff --git a/docs/index.rst b/docs/index.rst index de36eaa..4f302f9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,7 @@ undate documentation :caption: Contents: readme - undate + undate/index CONTRIBUTING DEVELOPER_NOTES CONTRIBUTORS diff --git a/docs/undate.rst b/docs/undate.rst deleted file mode 100644 index 303a004..0000000 --- a/docs/undate.rst +++ /dev/null @@ -1,11 +0,0 @@ -API documentation -================= - -.. autoclass:: undate.undate.Undate - :members: - :undoc-members: - - -.. autoclass:: undate.undate.UndateInterval - :members: - :undoc-members: \ No newline at end of file diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst new file mode 100644 index 0000000..701aaf1 --- /dev/null +++ b/docs/undate/converters.rst @@ -0,0 +1,30 @@ +Converters +========== + +.. automodule:: undate.converters.base + :members: + :undoc-members: + +ISO8601 +------- + +.. automodule:: undate.converters.iso8601 + :members: + :undoc-members: + +Extended Date-Time Format (EDTF) +-------------------------------- + +.. automodule:: undate.converters.edtf.converter + :members: + :undoc-members: + +.. automodule:: undate.converters.edtf.parser + :members: + :undoc-members: + +.. transformer is more of an internal, probably doesn't make sense to include +.. .. automodule:: undate.converters.edtf.transformer +.. :members: +.. :undoc-members: + diff --git a/docs/undate/core.rst b/docs/undate/core.rst new file mode 100644 index 0000000..e7b6b4b --- /dev/null +++ b/docs/undate/core.rst @@ -0,0 +1,22 @@ +Undate objects +============== + +undates and undate intervals +------------------------------ + +.. autoclass:: undate.undate.Undate + :members: + +.. autoclass:: undate.undate.UndateInterval + :members: + +date, timedelta, and date precision +----------------------------------- + +.. autoclass:: undate.date.Date + :members: + +.. autoclass:: undate.date.Timedelta + :members: + +.. autoclass:: undate.date.DatePrecision diff --git a/docs/undate/index.rst b/docs/undate/index.rst new file mode 100644 index 0000000..3deea35 --- /dev/null +++ b/docs/undate/index.rst @@ -0,0 +1,9 @@ +API documentation +================= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + core + converters \ No newline at end of file diff --git a/examples/notebooks/shxco_partial_date_durations.ipynb b/examples/notebooks/shxco_partial_date_durations.ipynb index b89661f..9e291f9 100644 --- a/examples/notebooks/shxco_partial_date_durations.ipynb +++ b/examples/notebooks/shxco_partial_date_durations.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -29,8 +29,8 @@ "output_type": "stream", "text": [ "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Users/rkoeser/workarea/env/undate-py3.10/bin/python3.10 -m pip install --upgrade pip\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Users/rkoeser/workarea/env/undate/bin/python3 -m pip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -288,7 +288,7 @@ "[5 rows x 28 columns]" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -316,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "id": "y_MqgrQW64uI" }, @@ -324,7 +324,7 @@ "source": [ "from undate.date import ONE_DAY\n", "from undate.undate import UndateInterval\n", - "from undate.dateformat.iso8601 import ISO8601DateFormat\n", + "from undate.converters.iso8601 import ISO8601DateFormat\n", "\n", "def undate_duration(start_date, end_date):\n", " isoformat = ISO8601DateFormat()\n", @@ -353,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -452,7 +452,7 @@ "260 4 months 122.0 " ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -478,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -497,15 +497,15 @@ "91.0 397\n", "365.0 337\n", " ... \n", - "200.0 1\n", - "277.0 1\n", - "169.0 1\n", - "45.0 1\n", - "38.0 1\n", + "69.0 1\n", + "36.0 1\n", + "73.0 1\n", + "574.0 1\n", + "171.0 1\n", "Name: count, Length: 133, dtype: int64" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -517,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -540,7 +540,7 @@ "Name: subscription_duration_days, dtype: float64" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -558,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -607,7 +607,7 @@ "Index: []" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -619,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -688,7 +688,7 @@ "13686 NaN 31.0 " ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -707,7 +707,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": { "id": "jwvN9-CgLQRx" }, @@ -727,7 +727,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -832,7 +832,7 @@ "260 4 months 122.0 152 days " ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -845,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -950,7 +950,7 @@ "260 4 months 122.0 152 days " ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -962,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1161,7 +1161,7 @@ "[9144 rows x 7 columns]" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1174,7 +1174,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1200,7 +1200,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1220,7 +1220,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1403,7 +1403,7 @@ "313 30.0 " ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1416,7 +1416,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1436,14 +1436,14 @@ "4 months 5\n", "5 months 3\n", "1 year 2\n", - "8 months 2\n", "7 months 2\n", + "8 months 2\n", "11 months 1\n", "10 months 1\n", "Name: count, dtype: int64" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1455,7 +1455,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1686,7 +1686,7 @@ "472 30.0 60 days 30.0 " ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1709,7 +1709,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1957,7 +1957,7 @@ "415 29.0 " ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1982,7 +1982,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2068,7 +2068,7 @@ "606 G. E. Pulsford --01-20 --01-28 8.0" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -2082,7 +2082,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2168,7 +2168,7 @@ "29908 Ann Samyn 1961-10-04 1962-03-21 168.0" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2179,7 +2179,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2316,7 +2316,7 @@ "611 Gertrude Stein --01-24 --05-30 126.0 126 days" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2329,7 +2329,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2489,7 +2489,7 @@ "611 0.0 " ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2502,7 +2502,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2519,7 +2519,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -2551,7 +2551,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2592,7 +2592,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/src/undate/converters/__init__.py b/src/undate/converters/__init__.py new file mode 100644 index 0000000..e13532d --- /dev/null +++ b/src/undate/converters/__init__.py @@ -0,0 +1 @@ +from undate.converters.base import BaseDateConverter as BaseDateConverter diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py new file mode 100644 index 0000000..02cf820 --- /dev/null +++ b/src/undate/converters/base.py @@ -0,0 +1,95 @@ +""" +:class:`undate.converters.BaseDateConverter` provides a base class for +implementing date converters, which can provide support for +parsing and generating dates in different formats and also converting +dates between different calendars. + +To add support for a new date format or calendar conversion: + +- Create a new file under ``undate/converters/`` + - For converters with sufficient complexity, you may want to create a submodule; + see ``undate.converters.edtf`` for an example. +- Extend ``BaseDateConverter`` and implement ``parse`` and ``to_string`` methods + as desired/appropriate for your converter +- Add unit tests for the new converter in ``tests/test_converters/`` +- Optionally, you may want to create a notebook to demonstrate the use and value + of the new converter. + +The new subclass should be loaded automatically and included in the converters +returned by :meth:`BaseDateConverter.available_converters` + +------------------- +""" + +import importlib +import logging +import pkgutil +from functools import cache +from typing import Dict, Type + +logger = logging.getLogger(__name__) + + +class BaseDateConverter: + """Base class for parsing, formatting, and converting dates to handle + specific formats and different calendars.""" + + #: Converter name. Subclasses must define a unique name. + name: str = "Base Converter" + + def parse(self, value: str): + """ + Parse a string and return an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval`. Must be implemented by + subclasses. + """ + # can't add type hint here because of circular import + # should return an undate or undate interval + raise NotImplementedError + + def to_string(self, undate) -> str: + """ + Convert an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval` to string. + Must be implemented by subclasses. + """ + + # undate param should be of type Union[Undate, UndateInterval] but can't add type hint here because of circular import + # convert an undate or interval to string representation for this format + raise NotImplementedError + + # cache import class method to ensure we only import once + @classmethod + @cache + def import_converters(cls) -> int: + """Import all undate converters + so that they will be included in available converters + even if not explicitly imported. Only import once. + returns the count of modules imported.""" + + logger.debug("Loading converters under undate.converters") + import undate.converters + + # load packages under this path with curent package prefix + converter_path = undate.converters.__path__ + converter_prefix = f"{undate.converters.__name__}." + + import_count = 0 + for importer, modname, ispkg in pkgutil.iter_modules( + converter_path, converter_prefix + ): + # import everything except the current file + if not modname.endswith(".base"): + importlib.import_module(modname) + import_count += 1 + + return import_count + + @classmethod + def available_converters(cls) -> Dict[str, Type["BaseDateConverter"]]: + """ + Dictionary of available converters keyed on name. + """ + # ensure undate converters are imported + cls.import_converters() + return {c.name: c for c in cls.__subclasses__()} # type: ignore diff --git a/src/undate/converters/edtf/__init__.py b/src/undate/converters/edtf/__init__.py new file mode 100644 index 0000000..1b55200 --- /dev/null +++ b/src/undate/converters/edtf/__init__.py @@ -0,0 +1 @@ +from undate.converters.edtf.converter import EDTFDateConverter as EDTFDateConverter diff --git a/src/undate/dateformat/edtf/formatter.py b/src/undate/converters/edtf/converter.py similarity index 75% rename from src/undate/dateformat/edtf/formatter.py rename to src/undate/converters/edtf/converter.py index 4e04ff1..394dd7d 100644 --- a/src/undate/dateformat/edtf/formatter.py +++ b/src/undate/converters/edtf/converter.py @@ -2,22 +2,39 @@ from lark.exceptions import UnexpectedCharacters +from undate.converters.base import BaseDateConverter +from undate.converters.edtf.parser import edtf_parser +from undate.converters.edtf.transformer import EDTFTransformer from undate.date import DatePrecision -from undate.dateformat.base import BaseDateFormat -from undate.dateformat.edtf.parser import edtf_parser -from undate.dateformat.edtf.transformer import EDTFTransformer from undate.undate import Undate, UndateInterval +#: character for unspecified digits EDTF_UNSPECIFIED_DIGIT: str = "X" -class EDTFDateFormat(BaseDateFormat): +class EDTFDateConverter(BaseDateConverter): + """ + Converter for Extended Date/Time Format (EDTF). + + Supports parsing and serializing dates and date ranges in EDTF format. + Does not support all of EDTF, and only supports dates and not times. + """ + + #: converter name: EDTF name: str = "EDTF" def __init__(self): self.transformer = EDTFTransformer() def parse(self, value: str) -> Union[Undate, UndateInterval]: + """ + Parse a string in a supported EDTF date or date interval format and + return an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval`. + """ + if not value: + raise ValueError("Parsing empty/unset string is not supported") + # parse the input string, then transform to undate object try: parsetree = edtf_parser.parse(value) @@ -33,6 +50,10 @@ def _convert_missing_digits( return None def to_string(self, undate: Union[Undate, UndateInterval]) -> str: + """ + Convert an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval` to EDTF format. + """ if isinstance(undate, Undate): return self._undate_to_string(undate) elif isinstance(undate, UndateInterval): diff --git a/src/undate/dateformat/edtf/edtf.lark b/src/undate/converters/edtf/edtf.lark similarity index 100% rename from src/undate/dateformat/edtf/edtf.lark rename to src/undate/converters/edtf/edtf.lark diff --git a/src/undate/dateformat/edtf/parser.py b/src/undate/converters/edtf/parser.py similarity index 100% rename from src/undate/dateformat/edtf/parser.py rename to src/undate/converters/edtf/parser.py diff --git a/src/undate/dateformat/edtf/transformer.py b/src/undate/converters/edtf/transformer.py similarity index 100% rename from src/undate/dateformat/edtf/transformer.py rename to src/undate/converters/edtf/transformer.py diff --git a/src/undate/dateformat/iso8601.py b/src/undate/converters/iso8601.py similarity index 87% rename from src/undate/dateformat/iso8601.py rename to src/undate/converters/iso8601.py index a5f79d7..a0ecad5 100644 --- a/src/undate/dateformat/iso8601.py +++ b/src/undate/converters/iso8601.py @@ -1,15 +1,16 @@ from typing import Dict, List, Union -from undate.dateformat.base import BaseDateFormat +from undate.converters.base import BaseDateConverter from undate.undate import Undate, UndateInterval -class ISO8601DateFormat(BaseDateFormat): +class ISO8601DateFormat(BaseDateConverter): # NOTE: do we care about validation? could use regex # but maybe be permissive, warn if invalid but we can parse - # do not change; Undate relies on this string + #: converter name: ISO8601 name: str = "ISO8601" + # do not change; Undate relies on this string #: datetime strftime format for known part of date iso_format: Dict[str, str] = { @@ -19,12 +20,15 @@ class ISO8601DateFormat(BaseDateFormat): } def parse(self, value: str) -> Union[Undate, UndateInterval]: - # TODO: must return value of type "Union[Undate, UndateInterval]" + """ + Parse an ISO88601 string and return an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval`. Currently supports + YYYY, YYYY-MM, YYYY-MM-DD, --MM-DD for single date + and interval format (YYYY/YYYY in any supported single date format). + """ # TODO: what happens if someone gives us a full isoformat date with time? # (ignore, error?) # TODO: what about invalid format? - # could be YYYY, YYYY-MM, YYYY-MM-DD, --MM-DD for single date - # or YYYY/YYYY (etc.) for an interval parts: List[str] = value.split("/") # split in case we have a range if len(parts) == 1: return self._parse_single_date(parts[0]) @@ -50,6 +54,10 @@ def _parse_single_date(self, value: str) -> Undate: return Undate(*date_parts) # type: ignore def to_string(self, undate: Union[Undate, UndateInterval]) -> str: + """ + Convert an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval` to ISO8601 string format. + """ if isinstance(undate, Undate): return self._undate_to_string(undate) elif isinstance(undate, UndateInterval): diff --git a/src/undate/dateformat/__init__.py b/src/undate/dateformat/__init__.py deleted file mode 100644 index 5dc5c3c..0000000 --- a/src/undate/dateformat/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from undate.dateformat.base import BaseDateFormat as BaseDateFormat - -# from undate.dateformat.iso8601 import ISO8601DateFormat diff --git a/src/undate/dateformat/base.py b/src/undate/dateformat/base.py deleted file mode 100644 index 59777b1..0000000 --- a/src/undate/dateformat/base.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Base class for date format parsing and serializing - -To add support for a new date format: - -- create a new file under undate/dateformat -- extend BaseDateFormat and implement parse and to_string methods - as desired/appropriate - -It should be loaded automatically and included in the formatters -returned by :meth:`BaseDateFormat.available_formatters` - -""" - -import importlib -import logging -import pkgutil -from functools import cache -from typing import Dict, Type - -logger = logging.getLogger(__name__) - - -class BaseDateFormat: - """Base class for parsing and formatting dates for specific formats.""" - - # Subclasses should define a unique name. - name: str = "Base Formatter" - - def parse(self, value: str): - # can't add type hint here because of circular import - # should return an undate or undate interval - raise NotImplementedError - - def to_string(self, undate) -> str: - # undate param should be of type Union[Undate, UndateInterval] but can't add type hint here because of circular import - # convert an undate or interval to string representation for this format - raise NotImplementedError - - # cache import class method to ensure we only import once - @classmethod - @cache - def import_formatters(cls) -> int: - """Import all undate.dateformat formatters - so that they will be included in available formatters - even if not explicitly imported. Only import once. - returns the count of modules imported.""" - - logger.debug("Loading formatters under undate.dateformat") - import undate.dateformat - - # load packages under this path with curent package prefix - formatter_path = undate.dateformat.__path__ - formatter_prefix = f"{undate.dateformat.__name__}." - - import_count = 0 - for importer, modname, ispkg in pkgutil.iter_modules( - formatter_path, formatter_prefix - ): - # import everything except the current file - if not modname.endswith(".base"): - importlib.import_module(modname) - import_count += 1 - - return import_count - - @classmethod - def available_formatters(cls) -> Dict[str, Type["BaseDateFormat"]]: - # ensure undate formatters are imported - cls.import_formatters() - return {c.name: c for c in cls.__subclasses__()} # type: ignore diff --git a/src/undate/dateformat/edtf/__init__.py b/src/undate/dateformat/edtf/__init__.py deleted file mode 100644 index 4b95f19..0000000 --- a/src/undate/dateformat/edtf/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from undate.dateformat.edtf.formatter import EDTFDateFormat as EDTFDateFormat diff --git a/src/undate/undate.py b/src/undate/undate.py index c7335c9..cda08d4 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -5,14 +5,14 @@ # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Dict, Optional, Union +from undate.converters.base import BaseDateConverter from undate.date import ONE_DAY, ONE_MONTH_MAX, ONE_YEAR, Date, DatePrecision, Timedelta -from undate.dateformat.base import BaseDateFormat class Undate: """object for representing uncertain, fuzzy or partially unknown dates""" - DEFAULT_FORMAT: str = "ISO8601" + DEFAULT_CONVERTER: str = "ISO8601" #: symbol for unknown digits within a date value MISSING_DIGIT: str = "X" @@ -22,7 +22,7 @@ class Undate: #: A string to label a specific undate, e.g. "German Unity Date 2022" for Oct. 3, 2022. #: Labels are not taken into account when comparing undate objects. label: Union[str, None] = None - formatter: BaseDateFormat + converter: BaseDateConverter #: precision of the date (day, month, year, etc.) precision: DatePrecision @@ -41,7 +41,7 @@ def __init__( year: Optional[Union[int, str]] = None, month: Optional[Union[int, str]] = None, day: Optional[Union[int, str]] = None, - formatter: Optional[BaseDateFormat] = None, + converter: Optional[BaseDateConverter] = None, label: Optional[str] = None, ): # keep track of initial values and which values are known @@ -135,11 +135,13 @@ def __init__( self.earliest = Date(min_year, min_month, min_day) self.latest = Date(max_year, max_month, max_day) - if formatter is None: + if converter is None: # import all subclass definitions; initialize the default - formatter_cls = BaseDateFormat.available_formatters()[self.DEFAULT_FORMAT] - formatter = formatter_cls() - self.formatter = formatter + converter_cls = BaseDateConverter.available_converters()[ + self.DEFAULT_CONVERTER + ] + converter = converter_cls() + self.converter = converter self.label = label @@ -162,7 +164,7 @@ def __str__(self) -> str: # combine, skipping any values that are None return "-".join([str(p) for p in parts if p is not None]) - return self.formatter.to_string(self) + return self.converter.to_string(self) def __repr__(self) -> str: if self.label: @@ -172,21 +174,21 @@ def __repr__(self) -> str: @classmethod def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]: """parse a string to an undate or undate interval using the specified format; - for now, only supports named formatters""" - formatter_cls = BaseDateFormat.available_formatters().get(format, None) - if formatter_cls: + for now, only supports named converters""" + converter_cls = BaseDateConverter.available_converters().get(format, None) + if converter_cls: # NOTE: some parsers may return intervals; is that ok here? - return formatter_cls().parse(date_string) + return converter_cls().parse(date_string) raise ValueError(f"Unsupported format '{format}'") def format(self, format) -> str: """format this undate as a string using the specified format; - for now, only supports named formatters""" - formatter_cls = BaseDateFormat.available_formatters().get(format, None) - if formatter_cls: + for now, only supports named converters""" + converter_cls = BaseDateConverter.available_converters().get(format, None) + if converter_cls: # NOTE: some parsers may return intervals; is that ok here? - return formatter_cls().to_string(self) + return converter_cls().to_string(self) raise ValueError(f"Unsupported format '{format}'") @@ -459,10 +461,10 @@ def __str__(self) -> str: def format(self, format) -> str: """format this undate interval as a string using the specified format; - for now, only supports named formatters""" - formatter_cls = BaseDateFormat.available_formatters().get(format, None) - if formatter_cls: - return formatter_cls().to_string(self) + for now, only supports named converters""" + converter_cls = BaseDateConverter.available_converters().get(format, None) + if converter_cls: + return converter_cls().to_string(self) raise ValueError(f"Unsupported format '{format}'") diff --git a/tests/test_dateformat/edtf/test_edtf_parser.py b/tests/test_converters/edtf/test_edtf_parser.py similarity index 94% rename from tests/test_dateformat/edtf/test_edtf_parser.py rename to tests/test_converters/edtf/test_edtf_parser.py index 3a2604b..6af9de6 100644 --- a/tests/test_dateformat/edtf/test_edtf_parser.py +++ b/tests/test_converters/edtf/test_edtf_parser.py @@ -1,5 +1,5 @@ import pytest -from undate.dateformat.edtf.parser import edtf_parser +from undate.converters.edtf.parser import edtf_parser # for now, just test that valid dates can be parsed diff --git a/tests/test_dateformat/edtf/test_edtf_transformer.py b/tests/test_converters/edtf/test_edtf_transformer.py similarity index 92% rename from tests/test_dateformat/edtf/test_edtf_transformer.py rename to tests/test_converters/edtf/test_edtf_transformer.py index a475e75..4741b1d 100644 --- a/tests/test_dateformat/edtf/test_edtf_transformer.py +++ b/tests/test_converters/edtf/test_edtf_transformer.py @@ -1,6 +1,6 @@ import pytest -from undate.dateformat.edtf.parser import edtf_parser -from undate.dateformat.edtf.transformer import EDTFTransformer +from undate.converters.edtf.parser import edtf_parser +from undate.converters.edtf.transformer import EDTFTransformer from undate.undate import Undate, UndateInterval # for now, just test that valid dates can be parsed diff --git a/tests/test_converters/test_base.py b/tests/test_converters/test_base.py new file mode 100644 index 0000000..60d5d1e --- /dev/null +++ b/tests/test_converters/test_base.py @@ -0,0 +1,64 @@ +import logging + +import pytest +from undate.converters.base import BaseDateConverter + + +class TestBaseDateConverter: + def test_available_converters(self): + available_converters = BaseDateConverter.available_converters() + assert isinstance(available_converters, dict) + + # NOTE: import _after_ generating available formatters + # so we can confirm it gets loaded + from undate.converters.iso8601 import ISO8601DateFormat + + assert ISO8601DateFormat.name in available_converters + assert available_converters[ISO8601DateFormat.name] == ISO8601DateFormat + + def test_converters_are_unique(self): + assert len(BaseDateConverter.available_converters()) == len( + BaseDateConverter.__subclasses__() + ), "Formatter names have to be unique." + + def test_parse_not_implemented(self): + with pytest.raises(NotImplementedError): + BaseDateConverter().parse("foo bar baz") + + def test_parse_to_string(self): + with pytest.raises(NotImplementedError): + BaseDateConverter().to_string(1991) + + +def test_import_converters_import_only_once(caplog): + # clear the cache, since any instantiation of an Undate + # object anywhere in the test suite will populate it + BaseDateConverter.import_converters.cache_clear() + + # run first, and confirm it runs and loads formatters + with caplog.at_level(logging.DEBUG): + import_count = BaseDateConverter.import_converters() + # should import at least one thing (iso8601) + assert import_count >= 1 + # should have log entry + assert "Loading converters" in caplog.text + + # if we clear the log and run again, should not do anything + caplog.clear() + with caplog.at_level(logging.DEBUG): + BaseDateConverter.import_converters() + assert "Loading converters" not in caplog.text + + +@pytest.mark.last +def test_converters_unique_error(): + # confirm that unique converter check fails when it should + + # run this test last because we can't undefine the subclass + # once it exists... + class ISO8601DateFormat2(BaseDateConverter): + name = "ISO8601" # duplicates existing formatter + + assert len(BaseDateConverter.available_converters()) != len( + BaseDateConverter.__subclasses__() + ) diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py new file mode 100644 index 0000000..24fad38 --- /dev/null +++ b/tests/test_converters/test_edtf.py @@ -0,0 +1,56 @@ +import pytest +from undate.converters.edtf import EDTFDateConverter +from undate.undate import Undate, UndateInterval + + +class TestEDTFDateConverter: + def test_parse_singledate(self): + assert EDTFDateConverter().parse("2002") == Undate(2002) + assert EDTFDateConverter().parse("1991-05") == Undate(1991, 5) + assert EDTFDateConverter().parse("1991-05-03") == Undate(1991, 5, 3) + # unknown dates are not strictly equal, but string comparison should match + assert str(EDTFDateConverter().parse("201X")) == str(Undate("201X")) + assert str(EDTFDateConverter().parse("2004-XX")) == str(Undate(2004, "XX")) + # missing year but month/day known + # assert EDTFDateConverter().parse("--05-03") == Undate(month=5, day=3) + + def test_parse_singledate_unequal(self): + assert EDTFDateConverter().parse("2002") != Undate(2003) + assert EDTFDateConverter().parse("1991-05") != Undate(1991, 6) + assert EDTFDateConverter().parse("1991-05-03") != Undate(1991, 5, 4) + # missing year but month/day known + # - does EDTF not support this or is parsing logic incorrect? + # assert EDTFDateConverter().parse("XXXX-05-03") != Undate(month=5, day=4) + + invalid_inputs = [ + "1991-13", # invalid month + "1991-12-32", # invalid day + "199A", # invalid year format + "", # empty string + None, # None input + ] + + @pytest.mark.parametrize("invalid_input", invalid_inputs) + def test_parse_invalid(self, invalid_input): + with pytest.raises(ValueError): + EDTFDateConverter().parse(invalid_input) + + def test_parse_range(self): + assert EDTFDateConverter().parse("1800/1900") == UndateInterval( + Undate(1800), Undate(1900) + ) + + def test_to_string(self): + assert EDTFDateConverter().to_string(Undate(900)) == "0900" + assert EDTFDateConverter().to_string(Undate("80")) == "0080" + assert EDTFDateConverter().to_string(Undate(33)) == "0033" + assert EDTFDateConverter().to_string(Undate("20XX")) == "20XX" + assert EDTFDateConverter().to_string(Undate(17000002)) == "Y17000002" + + assert EDTFDateConverter().to_string(Undate(1991, 6)) == "1991-06" + assert EDTFDateConverter().to_string(Undate(1991, 5, 3)) == "1991-05-03" + + assert EDTFDateConverter().to_string(Undate(1991, "0X")) == "1991-0X" + assert EDTFDateConverter().to_string(Undate(1991, None, 3)) == "1991-XX-03" + + # TODO: override missing digit and confirm replacement diff --git a/tests/test_dateformat/test_iso8601.py b/tests/test_converters/test_iso8601.py similarity index 96% rename from tests/test_dateformat/test_iso8601.py rename to tests/test_converters/test_iso8601.py index ee69c47..73f645e 100644 --- a/tests/test_dateformat/test_iso8601.py +++ b/tests/test_converters/test_iso8601.py @@ -1,4 +1,4 @@ -from undate.dateformat.iso8601 import ISO8601DateFormat +from undate.converters.iso8601 import ISO8601DateFormat from undate.undate import Undate, UndateInterval diff --git a/tests/test_dateformat/test_base.py b/tests/test_dateformat/test_base.py deleted file mode 100644 index 1d184db..0000000 --- a/tests/test_dateformat/test_base.py +++ /dev/null @@ -1,64 +0,0 @@ -import logging - -import pytest -from undate.dateformat.base import BaseDateFormat - - -class TestBaseDateFormat: - def test_available_formatters(self): - available_formatters = BaseDateFormat.available_formatters() - assert isinstance(available_formatters, dict) - - # NOTE: import _after_ generating available formatters - # so we can confirm it gets loaded - from undate.dateformat.iso8601 import ISO8601DateFormat - - assert ISO8601DateFormat.name in available_formatters - assert available_formatters[ISO8601DateFormat.name] == ISO8601DateFormat - - def test_formatters_are_unique(self): - assert len(BaseDateFormat.available_formatters()) == len( - BaseDateFormat.__subclasses__() - ), "Formatter names have to be unique." - - def test_parse_not_implemented(self): - with pytest.raises(NotImplementedError): - BaseDateFormat().parse("foo bar baz") - - def test_parse_to_string(self): - with pytest.raises(NotImplementedError): - BaseDateFormat().to_string(1991) - - -def test_import_formatters_import_only_once(caplog): - # clear the cache, since any instantiation of an Undate - # object anywhere in the test suite will populate it - BaseDateFormat.import_formatters.cache_clear() - - # run first, and confirm it runs and loads formatters - with caplog.at_level(logging.DEBUG): - import_count = BaseDateFormat.import_formatters() - # should import at least one thing (iso8601) - assert import_count >= 1 - # should have log entry - assert "Loading formatters" in caplog.text - - # if we clear the log and run again, should not do anything - caplog.clear() - with caplog.at_level(logging.DEBUG): - BaseDateFormat.import_formatters() - assert "Loading formatters" not in caplog.text - - -@pytest.mark.last -def test_formatters_unique_error(): - # confirm that our uniqe formatters check fails when it should - - # run this test last because we can't undefine the subclass - # once it exists... - class ISO8601DateFormat2(BaseDateFormat): - name = "ISO8601" # duplicates existing formatter - - assert len(BaseDateFormat.available_formatters()) != len( - BaseDateFormat.__subclasses__() - ) diff --git a/tests/test_dateformat/test_edtf.py b/tests/test_dateformat/test_edtf.py deleted file mode 100644 index 32ec014..0000000 --- a/tests/test_dateformat/test_edtf.py +++ /dev/null @@ -1,47 +0,0 @@ -import pytest -from undate.dateformat.edtf import EDTFDateFormat -from undate.undate import Undate, UndateInterval - - -class TestEDTFDateFormat: - def test_parse_singledate(self): - assert EDTFDateFormat().parse("2002") == Undate(2002) - assert EDTFDateFormat().parse("1991-05") == Undate(1991, 5) - assert EDTFDateFormat().parse("1991-05-03") == Undate(1991, 5, 3) - # unknown dates are not strictly equal, but string comparison should match - assert str(EDTFDateFormat().parse("201X")) == str(Undate("201X")) - assert str(EDTFDateFormat().parse("2004-XX")) == str(Undate(2004, "XX")) - # missing year but month/day known - # assert EDTFDateFormat().parse("--05-03") == Undate(month=5, day=3) - - def test_parse_singledate_unequal(self): - assert EDTFDateFormat().parse("2002") != Undate(2003) - assert EDTFDateFormat().parse("1991-05") != Undate(1991, 6) - assert EDTFDateFormat().parse("1991-05-03") != Undate(1991, 5, 4) - # missing year but month/day known - # - does EDTF not support this or is parsing logic incorrect? - # assert EDTFDateFormat().parse("XXXX-05-03") != Undate(month=5, day=4) - - def test_parse_invalid(self): - with pytest.raises(ValueError): - EDTFDateFormat().parse("1991-5") - - def test_parse_range(self): - assert EDTFDateFormat().parse("1800/1900") == UndateInterval( - Undate(1800), Undate(1900) - ) - - def test_to_string(self): - assert EDTFDateFormat().to_string(Undate(900)) == "0900" - assert EDTFDateFormat().to_string(Undate("80")) == "0080" - assert EDTFDateFormat().to_string(Undate(33)) == "0033" - assert EDTFDateFormat().to_string(Undate("20XX")) == "20XX" - assert EDTFDateFormat().to_string(Undate(17000002)) == "Y17000002" - - assert EDTFDateFormat().to_string(Undate(1991, 6)) == "1991-06" - assert EDTFDateFormat().to_string(Undate(1991, 5, 3)) == "1991-05-03" - - assert EDTFDateFormat().to_string(Undate(1991, "0X")) == "1991-0X" - assert EDTFDateFormat().to_string(Undate(1991, None, 3)) == "1991-XX-03" - - # TODO: override missing digit and confirm replacement