From 47fc7fc0e45a85293d8224472bc7e3a72f69f690 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 24 Oct 2024 15:46:05 -0400 Subject: [PATCH 01/11] New notebook to demonstrate & confirm EDTF spec support. --- examples/notebooks/edtf-support.ipynb | 260 ++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 examples/notebooks/edtf-support.ipynb diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb new file mode 100644 index 0000000..b537976 --- /dev/null +++ b/examples/notebooks/edtf-support.ipynb @@ -0,0 +1,260 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e517e953-8229-4f72-9376-fe822c4fae56", + "metadata": {}, + "source": [ + "# EDTF support - demonstration and validation\n", + "\n", + "This notebook demonstrates and validates `undate` support for specific parts of the [Extended Date/Time Format (EDTF)](https://www.loc.gov/standards/datetime/) specification.\n", + "\n", + "This notebook follows the same structure and uses the example from the Library of Congress specification, demonstrating parsing EDTF dates and formating dates in EDTF syntax, for the parts of the specification undate implements.\n", + "\n", + "Undate only handles dates and date intervals; time is not supported.\n", + "\n", + "*Notebook authored by Rebecca Sutton Koeser, October 2024.*" + ] + }, + { + "cell_type": "markdown", + "id": "b01e0622-b07a-41b9-8f69-758f70abb6c9", + "metadata": {}, + "source": [ + "## Level 0\n", + "\n", + "Full support for **Date** and **Time Interval**; **Date and Time** is not supported." + ] + }, + { + "cell_type": "markdown", + "id": "9ea09abd-62df-419c-b803-c870a2e0a8f0", + "metadata": {}, + "source": [ + "### Date\n", + "\n", + "```\n", + "complete representation: [year][“-”][month][“-”][day]\n", + "Example 1 ‘1985-04-12’ refers to the calendar date 1985 April 12th with day precision.\n", + "reduced precision for year and month: [year][“-”][month]\n", + "Example 2 ‘1985-04’ refers to the calendar month 1985 April with month precision.\n", + "reduced precision for year: [year]\n", + "Example 3 ‘1985’ refers to the calendar year 1985 with year precision.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "2922207c-fc2d-4dac-a038-bf368c6af1f4", + "metadata": {}, + "source": [ + "#### Parse EDTF format\n", + "\n", + "Demonstrate that these EDTF strings can be parsed into `Undate` objects." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date\n", + "\n", + "# Example 1: day\n", + "day = Undate.parse(\"1985-04-12\", \"EDTF\")\n", + "assert day.precision == DatePrecision.DAY\n", + "assert day == date(1985, 4, 12)\n", + "\n", + "# Example 2 : month\n", + "month = Undate.parse(\"1985-04\", \"EDTF\")\n", + "assert month.year == \"1985\" and month.month == \"04\"\n", + "assert month.precision == DatePrecision.MONTH\n", + "\n", + "# Example 3 : year\n", + "year = Undate.parse(\"1985\", \"EDTF\")\n", + "assert year.year == \"1985\"\n", + "assert year.precision == DatePrecision.YEAR" + ] + }, + { + "cell_type": "markdown", + "id": "6666c12d-7fda-419a-bbd9-af68ed4bbff0", + "metadata": {}, + "source": [ + "#### Output in EDTF format\n", + "\n", + "Demonstrate that initalizing `Undate` objects and serializing with EDTF formatter returns the expected value." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "923476ff-344a-4018-a02e-6e5f80ea76a8", + "metadata": {}, + "outputs": [], + "source": [ + "from undate.undate import Undate, DatePrecision\n", + "from undate.dateformat.edtf import EDTFDateFormat\n", + "\n", + "# set default format to EDTF\n", + "Undate.DEFAULT_FORMAT = \"EDTF\"\n", + "\n", + "# Example 1: day\n", + "day = Undate(1985, 4, 12)\n", + "# confirm EDTF formatter is being used\n", + "assert isinstance(day.formatter, EDTFDateFormat)\n", + "# casting to str is now equivalent to day.format(\"EDTF\")\n", + "assert str(day) == \"1985-04-12\"\n", + "assert day.precision == DatePrecision.DAY\n", + "\n", + "# Example 2 : month\n", + "month = Undate(1985, 4)\n", + "assert str(month) == \"1985-04\"\n", + "assert month.precision == DatePrecision.MONTH\n", + "\n", + "# Example 3 : year\n", + "year = Undate(1985)\n", + "assert str(year) == \"1985\"\n", + "assert year.precision == DatePrecision.YEAR" + ] + }, + { + "cell_type": "markdown", + "id": "6f2fa2c1-9022-4afd-9404-d79816a211a5", + "metadata": {}, + "source": [ + "### Date and Time - unsupported\n", + "\n", + "`undate` does not includes support for time, onnly dates and date intervals." + ] + }, + { + "cell_type": "markdown", + "id": "22b71a54-484e-49f2-975c-3da21c519095", + "metadata": {}, + "source": [ + "### Time Interval\n", + "\n", + "EDTF Level 0 adopts representations of a time interval where both the start and end are dates: start and end date only; that is, both start and duration, and duration and end, are excluded. Time of day is excluded.\n", + "\n", + "```\n", + " Example 1 ‘1964/2008’ is a time interval with calendar year precision, beginning sometime in 1964 and ending sometime in 2008.\n", + " Example 2 ‘2004-06/2006-08’ is a time interval with calendar month precision, beginning sometime in June 2004 and ending sometime in August of 2006.\n", + " Example 3 ‘2004-02-01/2005-02-08’ is a time interval with calendar day precision, beginning sometime on February 1, 2004 and ending sometime on February 8, 2005.\n", + " Example 4 ‘2004-02-01/2005-02’ is a time interval beginning sometime on February 1, 2004 and ending sometime in February 2005. Since the start endpoint precision (day) is different than that of the end endpoint (month) the precision of the time interval at large is undefined.\n", + " Example 5 ‘2004-02-01/2005’ is a time interval beginning sometime on February 1, 2004 and ending sometime in 2005. The start endpoint has calendar day precision and the end endpoint has calendar year precision. Similar to the previous example, the precision of the time interval at large is undefined.\n", + " Example 6 ‘2005/2006-02’ is a time interval beginning sometime in 2005 and ending sometime in February 2006.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "b843650f-5da2-4a6a-bcb2-2190fb2d0fcf", + "metadata": {}, + "source": [ + "#### Parse EDTF format" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "6ed422de-34a2-4324-b254-f62db00563f7", + "metadata": {}, + "outputs": [], + "source": [ + "from undate.undate import UndateInterval\n", + "\n", + "# Example 1\n", + "year_range = Undate.parse(\"1964/2008\", \"EDTF\")\n", + "assert isinstance(year_range, UndateInterval)\n", + "assert year_range.earliest == Undate(1964)\n", + "assert year_range.latest == Undate(2008)\n", + "# Example 2\n", + "month_range = Undate.parse(\"2004-06/2006-08\", \"EDTF\")\n", + "assert isinstance(month_range, UndateInterval)\n", + "assert month_range.earliest == Undate(2004, 6)\n", + "assert month_range.latest == Undate(2006, 8)\n", + "# Example 3\n", + "day_range = Undate.parse(\"2004-02-01/2005-02-08\", \"EDTF\")\n", + "assert isinstance(day_range, UndateInterval)\n", + "assert day_range.earliest == Undate(2004, 2, 1)\n", + "assert day_range.latest == Undate(2005, 2, 8)\n", + "# Example 4 \n", + "day_month_range = Undate.parse(\"2004-02-01/2005-02\", \"EDTF\")\n", + "assert isinstance(day_range, UndateInterval)\n", + "assert day_month_range.earliest == Undate(2004, 2, 1)\n", + "assert day_month_range.latest == Undate(2005, 2)\n", + "assert day_month_range.earliest.precision == DatePrecision.DAY\n", + "assert day_month_range.latest.precision == DatePrecision.MONTH\n", + "# Example 5\n", + "day_year_range = Undate.parse(\"2004-02-01/2005\", \"EDTF\")\n", + "assert isinstance(day_range, UndateInterval)\n", + "assert day_year_range.earliest == Undate(2004, 2, 1)\n", + "assert day_year_range.latest == Undate(2005)\n", + "assert day_year_range.earliest.precision == DatePrecision.DAY\n", + "assert day_year_range.latest.precision == DatePrecision.YEAR\n", + "# Example 6 \n", + "year_month_range = Undate.parse(\"2005/2006-02\", \"EDTF\")\n", + "assert isinstance(year_month_range, UndateInterval)\n", + "assert year_month_range.earliest == Undate(2005)\n", + "assert year_month_range.latest == Undate(2006, 2)\n", + "assert year_month_range.earliest.precision == DatePrecision.YEAR\n", + "assert year_month_range.latest.precision == DatePrecision.MONTH\n" + ] + }, + { + "cell_type": "markdown", + "id": "8f4dc069-2d8a-4707-84af-ad9da5334ab9", + "metadata": {}, + "source": [ + "#### Output in EDTF format" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "8d98a139-627b-40bd-b1c5-d0028e538a53", + "metadata": {}, + "outputs": [], + "source": [ + "from undate.undate import UndateInterval\n", + "\n", + "# Example 1\n", + "assert UndateInterval(Undate(1964), Undate(2008)).format(\"EDTF\") == \"1964/2008\"\n", + "# Example 2\n", + "assert UndateInterval(Undate(2004, 6), Undate(2006, 8)).format(\"EDTF\") == \"2004-06/2006-08\"\n", + "# Example 3\n", + "assert UndateInterval(Undate(2004, 2, 1), Undate(2005, 2, 8)).format(\"EDTF\") == \"2004-02-01/2005-02-08\"\n", + "# Example 4 \n", + "assert UndateInterval(Undate(2004, 2, 1), Undate(2005, 2)).format(\"EDTF\") == \"2004-02-01/2005-02\"\n", + "# Example 5\n", + "assert UndateInterval(Undate(2004, 2, 1), Undate(2005)).format(\"EDTF\") == \"2004-02-01/2005\"\n", + "# Example 6 \n", + "assert UndateInterval(Undate(2005), Undate(2006, 2)).format(\"EDTF\") == \"2005/2006-02\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 605d4566c285404dc70b398a4813973a1d2cceb5 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 24 Oct 2024 16:34:44 -0400 Subject: [PATCH 02/11] Add examples for more of L1/L2 EDTF spec --- examples/notebooks/edtf-support.ipynb | 549 +++++++++++++++++++++++++- 1 file changed, 541 insertions(+), 8 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index b537976..10ff447 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -9,9 +9,9 @@ "\n", "This notebook demonstrates and validates `undate` support for specific parts of the [Extended Date/Time Format (EDTF)](https://www.loc.gov/standards/datetime/) specification.\n", "\n", - "This notebook follows the same structure and uses the example from the Library of Congress specification, demonstrating parsing EDTF dates and formating dates in EDTF syntax, for the parts of the specification undate implements.\n", + "This notebook follows the same structure and uses the example from the Library of Congress specification, demonstrating parsing EDTF dates and formatting dates in EDTF syntax, for the parts of the specification undate implements.\n", "\n", - "Undate only handles dates and date intervals; time is not supported.\n", + "`undate` only handles dates and date intervals; time is not supported.\n", "\n", "*Notebook authored by Rebecca Sutton Koeser, October 2024.*" ] @@ -55,17 +55,17 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 88, "id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d", "metadata": {}, "outputs": [], "source": [ - "from datetime import date\n", + "import datetime \n", "\n", "# Example 1: day\n", "day = Undate.parse(\"1985-04-12\", \"EDTF\")\n", "assert day.precision == DatePrecision.DAY\n", - "assert day == date(1985, 4, 12)\n", + "assert day == datetime.date(1985, 4, 12)\n", "\n", "# Example 2 : month\n", "month = Undate.parse(\"1985-04\", \"EDTF\")\n", @@ -125,9 +125,7 @@ "id": "6f2fa2c1-9022-4afd-9404-d79816a211a5", "metadata": {}, "source": [ - "### Date and Time - unsupported\n", - "\n", - "`undate` does not includes support for time, onnly dates and date intervals." + "### Date and Time - not supported" ] }, { @@ -234,6 +232,541 @@ "# Example 6 \n", "assert UndateInterval(Undate(2005), Undate(2006, 2)).format(\"EDTF\") == \"2005/2006-02\"" ] + }, + { + "cell_type": "markdown", + "id": "f0e15f82-7586-4363-8ffa-5a3b7407c774", + "metadata": {}, + "source": [ + "## Level 1" + ] + }, + { + "cell_type": "markdown", + "id": "4c2ec542-eaf7-4f3f-8c91-a255791309e7", + "metadata": {}, + "source": [ + "### Letter-prefixed calendar year\n", + "\n", + "'Y' may be used at the beginning of the date string to signify that the date is a year, when (and only when) the year exceeds four digits, i.e. for years later than 9999 or earlier than -9999.\n", + "```\n", + " Example 1 'Y170000002' is the year 170000002\n", + " Example 2 'Y-170000002' is the year -170000002\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "532470db-851e-4f91-9242-cd93d35054cf", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 1\n", + "# parse\n", + "assert Undate.parse(\"Y170000002\", \"EDTF\").year == \"170000002\"\n", + "# format\n", + "assert str(Undate(170000002)) == \"Y170000002\"\n", + "\n", + "# Example 2\n", + "# negative not yet supported!\n", + "# parse\n", + "# assert Undate.parse(\"-Y170000002\", \"EDTF\").year == \"-170000002\"\n", + "# # format\n", + "# assert str(Undate(-170000002)) == \"-Y170000002\"" + ] + }, + { + "cell_type": "markdown", + "id": "acd27893-4b0d-433d-931d-e2a18f73a47f", + "metadata": {}, + "source": [ + "### Seasons - not supported\n" + ] + }, + { + "cell_type": "markdown", + "id": "4047b969-7fef-462d-b2fa-9445cfcc2cde", + "metadata": {}, + "source": [ + "### Qualification of a date (complete) - not yet supported\n", + "\n", + "The characters '?', '~' and '%' are used to mean \"uncertain\", \"approximate\", and \"uncertain\" as well as \"approximate\", respectively. These characters may occur only at the end of the date string and apply to the entire date.\n", + "\n", + "```\n", + " Example 1 '1984?' year uncertain (possibly the year 1984, but not definitely)\n", + " Example 2 '2004-06~'' year-month approximate\n", + " Example 3 '2004-06-11%' entire date (year-month-day) uncertain and approximate\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "f6914a33-fe9d-43b1-b457-ca56b671d7b7", + "metadata": {}, + "source": [ + "### Unspecified digit(s) from the right \n", + "\n", + "The character 'X' may be used in place of one or more rightmost digits to indicate that the value of that digit is unspecified, for the following cases:\n", + "```\n", + " A year with one or two (rightmost) unspecified digits in a year-only expression (year precision)\n", + " Example 1 ‘201X’\n", + " Example 2 ‘20XX’\n", + " Year specified, month unspecified in a year-month expression (month precision)\n", + " Example 3 ‘2004-XX’\n", + " Year and month specified, day unspecified in a year-month-day expression (day precision)\n", + " Example 4 ‘1985-04-XX’ \n", + " Year specified, day and month unspecified in a year-month-day expression (day precision)\n", + " Example 5 ‘1985-XX-XX’ \n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "a5abd0e4-0b26-49b0-bf78-3e1fe6c046d8", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 1 ‘201X’\n", + "# parse\n", + "date = Undate.parse(\"201X\", \"EDTF\")\n", + "assert date.year == \"201X\"\n", + "assert date.precision == DatePrecision.YEAR\n", + "# earliest/latest possible years\n", + "assert date.earliest.year == 2010\n", + "assert date.latest.year == 2019\n", + "# format\n", + "assert str(Undate(\"201X\")) == \"201X\"\n", + "\n", + "# Example 2 ‘20XX’\n", + "# parse\n", + "date = Undate.parse(\"20XX\", \"EDTF\")\n", + "assert date.year == \"20XX\"\n", + "assert date.precision == DatePrecision.YEAR\n", + "# earliest/latest possible years\n", + "assert date.earliest.year == 2000\n", + "assert date.latest.year == 2099\n", + "# format\n", + "assert str(Undate(\"20XX\")) == \"20XX\"\n", + "\n", + "# Example 3 ‘2004-XX’\n", + "# parse\n", + "date = Undate.parse(\"2004-XX\", \"EDTF\")\n", + "assert date.year == \"2004\"\n", + "assert date.month == \"XX\"\n", + "assert date.precision == DatePrecision.MONTH\n", + "# earliest/latest possible months\n", + "assert date.earliest.month == 1\n", + "assert date.latest.month == 12\n", + "# format\n", + "assert str(Undate(2004, \"XX\")) == \"2004-XX\"\n", + "\n", + "# Example 4 ‘1985-04-XX’ \n", + "# parse\n", + "date = Undate.parse(\"1985-04-XX\", \"EDTF\")\n", + "assert date.year == \"1985\"\n", + "assert date.month == \"04\"\n", + "assert date.day == \"XX\"\n", + "assert date.precision == DatePrecision.DAY\n", + "# earliest/latest possible days\n", + "assert date.earliest.day == 1\n", + "assert date.latest.day == 30\n", + "# format\n", + "assert str(Undate(1985, 4, \"XX\")) == \"1985-04-XX\"\n", + "\n", + "# Example 5 ‘1985-XX-XX’ \n", + "# parse\n", + "date = Undate.parse(\"1985-XX-XX\", \"EDTF\")\n", + "assert date.year == \"1985\"\n", + "assert date.month == \"XX\"\n", + "assert date.day == \"XX\"\n", + "assert date.precision == DatePrecision.DAY\n", + "# earliest/latest possible months\n", + "assert date.earliest.month == 1\n", + "assert date.latest.month == 12\n", + "# earliest/latest possible days\n", + "assert date.earliest.day == 1\n", + "assert date.latest.day == 31 # undate guesses maximum month length when month is unknown\n", + "# format\n", + "assert str(Undate(1985, \"XX\", \"XX\")) == \"1985-XX-XX\"" + ] + }, + { + "cell_type": "markdown", + "id": "4b7a2aaf-58e2-444a-bcde-3cd2b53546f0", + "metadata": {}, + "source": [ + "### Extended Interval (L1)\n", + "\n", + "1. A null string may be used for the start or end date when it is unknown.\n", + "2. Double-dot (“..”) may be used when either the start or end date is not specified, either because there is none or for any other reason.\n", + "3. A modifier may appear at the end of the date to indicate \"uncertain\" and/or \"approximate\"\n", + "\n", + "* * *\n", + "\n", + "**NOTE:** `undate` does not currently distinguish between open intervals and intervals with an unknown start or end date." + ] + }, + { + "cell_type": "markdown", + "id": "c7bbc0c9-4a79-4427-ab6b-3e0a44843eba", + "metadata": {}, + "source": [ + "#### Open end time interval\n", + "\n", + "`undate` supports open ended time intervals, but does not currently distinguish between null string and double dot.\n", + "\n", + "\n", + " Example 1 ‘1985-04-12/..’\n", + " interval starting at 1985 April 12th with day precision; end open\n", + " Example 2 ‘1985-04/..’\n", + " interval starting at 1985 April with month precision; end open\n", + " Example 3 ‘1985/..’\n", + " interval starting at year 1985 with year precision; end open\n" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "e47f3fff-d35c-4c2e-9568-214763f6511a", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 1 ‘1985-04-12/..’\n", + "# parse\n", + "interval = Undate.parse(\"1985-04-12/..\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest == datetime.date(1985, 4, 12)\n", + "assert interval.earliest.precision == DatePrecision.DAY\n", + "assert interval.latest is None\n", + "# format\n", + "# NOTE: undate interval does not currently distinguish between double dot and null string\n", + "assert str(UndateInterval(Undate(1985, 4, 12), None)) == \"1985-04-12/\"\n", + "\n", + "# Example 2 ‘1985-04/..’\n", + "# parse\n", + "interval = Undate.parse(\"1985-04/..\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest == Undate(1985, 4)\n", + "assert interval.earliest.precision == DatePrecision.MONTH\n", + "assert interval.latest is None\n", + "# format\n", + "assert str(UndateInterval(Undate(1985, 4), None)) == \"1985-04/\"\n", + "\n", + "# Example 3 ‘1985/..’\n", + "# parse\n", + "interval = Undate.parse(\"1985/..\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest == Undate(1985)\n", + "assert interval.earliest.precision == DatePrecision.YEAR\n", + "assert interval.latest is None\n", + "# format\n", + "assert str(UndateInterval(Undate(1985), None)) == \"1985/\"" + ] + }, + { + "cell_type": "markdown", + "id": "7a0514ce-91ef-430f-9ae3-cc809e41c47b", + "metadata": {}, + "source": [ + "#### Open start time interval\n", + "\n", + " Example 4 ‘../1985-04-12’\n", + " interval with open start; ending 1985 April 12th with day precision\n", + " Example 5 ‘../1985-04’\n", + " interval with open start; ending 1985 April with month precision\n", + " Example 6 ‘../1985’\n", + " interval with open start; ending at year 1985 with year precision" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "39143c1f-932a-450c-9b2d-ffbe3e1416b0", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 4 ‘../1985-04-12’\n", + "# parse\n", + "interval = Undate.parse(\"../1985-04-12\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest is None\n", + "assert interval.latest == datetime.date(1985, 4, 12)\n", + "assert interval.latest.precision == DatePrecision.DAY\n", + "# format\n", + "# NOTE: undate interval does not currently distinguish between double dot and null string\n", + "assert str(UndateInterval(None, Undate(1985, 4, 12))) == \"../1985-04-12\"\n", + "\n", + "# Example 5 ‘../1985-04’\n", + "# parse\n", + "interval = Undate.parse(\"../1985-04\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest is None\n", + "assert interval.latest == Undate(1985, 4)\n", + "assert interval.latest.precision == DatePrecision.MONTH\n", + "# format\n", + "assert str(UndateInterval(None, Undate(1985, 4), )) == \"../1985-04\"\n", + "\n", + "# Example 6 ‘../1985’\n", + "# parse\n", + "interval = Undate.parse(\"../1985\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest is None\n", + "assert interval.latest == Undate(1985)\n", + "assert interval.latest.precision == DatePrecision.YEAR\n", + "# format\n", + "assert str(UndateInterval(None, Undate(1985))) == \"../1985\"" + ] + }, + { + "cell_type": "markdown", + "id": "dc49211b-971d-489c-a98d-37e067bc210c", + "metadata": {}, + "source": [ + "#### Time interval with unknown end\n", + "\n", + " Example 7 ‘1985-04-12/’\n", + " interval starting 1985 April 12th with day precision; end unknown\n", + " Example 8 ‘1985-04/’\n", + " interval starting 1985 April with month precision; end unknown\n", + " Example 9 ‘1985/’\n", + " interval starting year 1985 with year precision; end unknown\n" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "95965f17-0bd5-446f-bc09-9503eaed68e2", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 7 ‘1985-04-12/’\n", + "# parse\n", + "interval = Undate.parse(\"1985-04-12/\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest == datetime.date(1985, 4, 12)\n", + "assert interval.earliest.precision == DatePrecision.DAY\n", + "assert interval.latest is None\n", + "# format\n", + "# NOTE: undate interval does not currently distinguish between double dot and null string\n", + "assert str(UndateInterval(Undate(1985, 4, 12), None)) == \"1985-04-12/\"\n", + "\n", + "# Example 8 ‘1985-04/’\n", + "# parse\n", + "interval = Undate.parse(\"1985-04/\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest == Undate(1985, 4)\n", + "assert interval.earliest.precision == DatePrecision.MONTH\n", + "assert interval.latest is None\n", + "# format\n", + "assert str(UndateInterval(Undate(1985, 4), None)) == \"1985-04/\"\n", + "\n", + "# Example 9 ‘1985/’\n", + "# parse\n", + "interval = Undate.parse(\"1985/\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest == Undate(1985)\n", + "assert interval.earliest.precision == DatePrecision.YEAR\n", + "assert interval.latest is None\n", + "# format\n", + "assert str(UndateInterval(Undate(1985), None)) == \"1985/\"" + ] + }, + { + "cell_type": "markdown", + "id": "9f3cd588-70d3-4eb4-add7-39591b22e6df", + "metadata": {}, + "source": [ + "#### Time interval with unknown start\n", + "\n", + " Example 10 ‘/1985-04-12’\n", + " interval with unknown start; ending 1985 April 12th with day precision\n", + " Example 11 ‘/1985-04’\n", + " interval with unknown start; ending 1985 April with month precision\n", + " Example 12 ‘/1985’\n", + " interval with unknown start; ending year 1985 with year precision\n" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "c6c2d1a1-39f1-45eb-ac08-1de4fadbe842", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 10 ‘/1985-04-12’\n", + "# parse\n", + "interval = Undate.parse(\"/1985-04-12\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest is None\n", + "assert interval.latest == datetime.date(1985, 4, 12)\n", + "assert interval.latest.precision == DatePrecision.DAY\n", + "# format\n", + "# NOTE: undate interval does not currently distinguish between double dot and null string\n", + "assert str(UndateInterval(None, Undate(1985, 4, 12))) == \"../1985-04-12\"\n", + "\n", + "# Example 11 ‘/1985-04’\n", + "# parse\n", + "interval = Undate.parse(\"/1985-04\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest is None\n", + "assert interval.latest == Undate(1985, 4)\n", + "assert interval.latest.precision == DatePrecision.MONTH\n", + "# format\n", + "assert str(UndateInterval(None, Undate(1985, 4), )) == \"../1985-04\"\n", + "\n", + "# Example 12 ‘/1985’\n", + "# parse\n", + "interval = Undate.parse(\"/1985\", \"EDTF\")\n", + "assert isinstance(interval, UndateInterval)\n", + "assert interval.earliest is None\n", + "assert interval.latest == Undate(1985)\n", + "assert interval.latest.precision == DatePrecision.YEAR\n", + "# format\n", + "assert str(UndateInterval(None, Undate(1985))) == \"../1985\"" + ] + }, + { + "cell_type": "markdown", + "id": "4c6327d8-2243-4bc1-bef6-255f8872ea51", + "metadata": {}, + "source": [ + "#### Negative calendar year - not yet supported\n", + "\n", + " Example 1 ‘-1985’\n", + "\n", + "Note: ISO 8601 Part 1 does not support negative year. " + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 1 ‘-1985’\n", + "# parse\n", + "# neg_year = Undate.parse(\"-1985\", \"EDTF\")\n", + "# assert neg_year.year == \"-1985\"\n", + "# # format\n", + "# assert str(Undate(-1985)) == \"-1985\"" + ] + }, + { + "cell_type": "markdown", + "id": "be28b001-6745-4145-ab29-1c80c67d71ba", + "metadata": {}, + "source": [ + "## Level 2\n", + "\n", + "The only part of L2 that `undate` currently supports is allowing an unspecified digit anywhere in the date." + ] + }, + { + "cell_type": "markdown", + "id": "b4c78eff-1c30-4d1a-94e8-d83074bb3678", + "metadata": {}, + "source": [ + "#### Unspecified Digit\n", + "\n", + "For level 2 the unspecified digit, 'X', may occur anywhere within a component.\n", + "\n", + " Example 1 ‘156X-12-25’\n", + " December 25 sometime during the 1560s\n", + " Example 2 ‘15XX-12-25’\n", + " December 25 sometime during the 1500s\n", + " Example 3 ‘XXXX-12-XX’\n", + " Some day in December in some year\n", + " Example 4 '1XXX-XX’\n", + " Some month during the 1000s\n", + " Example 5 ‘1XXX-12’\n", + " Some December during the 1000s\n", + " Example 6 ‘1984-1X’\n", + " October, November, or December 1984" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", + "metadata": {}, + "outputs": [], + "source": [ + "# Example 1 ‘156X-12-25’\n", + "# parse\n", + "december = Undate.parse(\"156X-12-25\", \"EDTF\")\n", + "assert december.year == \"156X\"\n", + "assert december.month == \"12\"\n", + "assert december.day == \"25\"\n", + "assert december.precision == DatePrecision.DAY\n", + "assert december.earliest.year == 1560\n", + "assert december.latest.year == 1569\n", + "# format\n", + "assert str(Undate(\"156X\", 12, 25)) == \"156X-12-25\"\n", + "\n", + "# Example 2 ‘15XX-12-25’\n", + "# parse\n", + "december = Undate.parse(\"15XX-12-25\", \"EDTF\")\n", + "assert december.year == \"15XX\"\n", + "assert december.month == \"12\"\n", + "assert december.day == \"25\"\n", + "assert december.precision == DatePrecision.DAY\n", + "assert december.earliest.year == 1500\n", + "assert december.latest.year == 1599\n", + "# format\n", + "assert str(Undate(\"15XX\", 12, 25)) == \"15XX-12-25\"\n", + "\n", + "# NOT CURRENTLY SUPPORTED\n", + "# Example 3 ‘XXXX-12-XX’\n", + "# parse\n", + "# december = Undate.parse(\"XXXX-12-XX\", \"EDTF\")\n", + "# assert december.year == \"XXXX\"\n", + "# assert december.month == \"12\"\n", + "# assert december.day == \"XX\"\n", + "# assert december.precision == DatePrecision.DAY\n", + "# assert december.earliest.year == Undate.MIN_YEAR\n", + "# assert december.latest.year == Undate.MAX_YEAR\n", + "# assert december.earliest.day == 1\n", + "# assert december.latest.day == 31\n", + "# format\n", + "# assert str(Undate(\"XXXX\", 12, \"XX\")) == \"XXXX-12-XX\"\n", + "\n", + "# Example 4 '1XXX-XX’\n", + "# parse\n", + "some_month = Undate.parse(\"1XXX-XX\", \"EDTF\")\n", + "assert some_month.year == \"1XXX\"\n", + "assert some_month.month == \"XX\"\n", + "assert some_month.precision == DatePrecision.MONTH\n", + "assert some_month.earliest.year == 1000\n", + "assert some_month.latest.year == 1999\n", + "# format\n", + "assert str(Undate(\"1XXX\", \"XX\")) == \"1XXX-XX\"\n", + "\n", + "# NOT CURRENTLY SUPPORTED (parse error)\n", + "# Example 5 ‘1XXX-12’\n", + "# parse\n", + "# some_december = Undate.parse(\"1XXX-12\", \"EDTF\")\n", + "# assert some_december.year == \"1XXX\"\n", + "# assert some_december.month == \"12\"\n", + "# assert some_december.precision == DatePrecision.MONTH\n", + "# assert some_december.earliest.year == 1000\n", + "# assert some_december.latest.year == 1999\n", + "# # format\n", + "# assert str(Undate(\"1XXX\", 12)) == \"1XXX-12\"\n", + "\n", + "# Example 6 ‘1984-1X’\n", + "# parse\n", + "late_1984 = Undate.parse(\"1984-1X\", \"EDTF\")\n", + "assert late_1984.year == \"1984\"\n", + "assert late_1984.month == \"1X\"\n", + "assert late_1984.precision == DatePrecision.MONTH\n", + "assert late_1984.earliest.month == 10\n", + "assert late_1984.latest.month == 12\n", + "# format\n", + "assert str(Undate(1984, \"1X\")) == \"1984-1X\"" + ] } ], "metadata": { From 590996c58a99094a9cac9c68796834b7ea7ce640 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 24 Oct 2024 16:38:28 -0400 Subject: [PATCH 03/11] Support negative years for five digit+ years --- examples/notebooks/edtf-support.ipynb | 43 +++++++++++-------- src/undate/dateformat/edtf/edtf.lark | 2 +- src/undate/dateformat/edtf/formatter.py | 6 ++- src/undate/dateformat/edtf/transformer.py | 11 ++++- .../test_dateformat/edtf/test_edtf_parser.py | 1 + .../edtf/test_edtf_transformer.py | 1 + tests/test_dateformat/test_edtf.py | 1 + 7 files changed, 45 insertions(+), 20 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index 10ff447..cb6a898 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -55,13 +55,15 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 2, "id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d", "metadata": {}, "outputs": [], "source": [ "import datetime \n", "\n", + "from undate import Undate, UndateInterval, DatePrecision\n", + "\n", "# Example 1: day\n", "day = Undate.parse(\"1985-04-12\", \"EDTF\")\n", "assert day.precision == DatePrecision.DAY\n", @@ -90,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 3, "id": "923476ff-344a-4018-a02e-6e5f80ea76a8", "metadata": {}, "outputs": [], @@ -157,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 4, "id": "6ed422de-34a2-4324-b254-f62db00563f7", "metadata": {}, "outputs": [], @@ -212,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 5, "id": "8d98a139-627b-40bd-b1c5-d0028e538a53", "metadata": {}, "outputs": [], @@ -257,10 +259,18 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 14, "id": "532470db-851e-4f91-9242-cd93d35054cf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-Y170000002\n" + ] + } + ], "source": [ "# Example 1\n", "# parse\n", @@ -269,11 +279,10 @@ "assert str(Undate(170000002)) == \"Y170000002\"\n", "\n", "# Example 2\n", - "# negative not yet supported!\n", "# parse\n", - "# assert Undate.parse(\"-Y170000002\", \"EDTF\").year == \"-170000002\"\n", - "# # format\n", - "# assert str(Undate(-170000002)) == \"-Y170000002\"" + "assert Undate.parse(\"-Y170000002\", \"EDTF\").year == \"-170000002\"\n", + "# format\n", + "assert str(Undate(-170000002)) == \"-Y170000002\"" ] }, { @@ -323,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 7, "id": "a5abd0e4-0b26-49b0-bf78-3e1fe6c046d8", "metadata": {}, "outputs": [], @@ -428,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 8, "id": "e47f3fff-d35c-4c2e-9568-214763f6511a", "metadata": {}, "outputs": [], @@ -482,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 9, "id": "39143c1f-932a-450c-9b2d-ffbe3e1416b0", "metadata": {}, "outputs": [], @@ -536,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 10, "id": "95965f17-0bd5-446f-bc09-9503eaed68e2", "metadata": {}, "outputs": [], @@ -590,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 11, "id": "c6c2d1a1-39f1-45eb-ac08-1de4fadbe842", "metadata": {}, "outputs": [], @@ -641,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 12, "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", "metadata": {}, "outputs": [], @@ -689,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 13, "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", "metadata": {}, "outputs": [], diff --git a/src/undate/dateformat/edtf/edtf.lark b/src/undate/dateformat/edtf/edtf.lark index 6b8e5aa..6305094 100644 --- a/src/undate/dateformat/edtf/edtf.lark +++ b/src/undate/dateformat/edtf/edtf.lark @@ -41,7 +41,7 @@ unspecified: /X/ ?day_unspecified: "0".."3"? unspecified ~ 1..2 // 'Y' may be used at the beginning of the date string to signify that the date is a year, when (and only when) the year exceeds four digits, i.e. for years later than 9999 or earlier than -9999. -year_fivedigitsplus: /Y\d{5,}/ +year_fivedigitsplus: /-?Y\d{5,}/ ?year_l1: year_fivedigitsplus | year | year_unspecified // The values 21, 22, 23, 24 may be used used to signify diff --git a/src/undate/dateformat/edtf/formatter.py b/src/undate/dateformat/edtf/formatter.py index e9a88f6..edd9e23 100644 --- a/src/undate/dateformat/edtf/formatter.py +++ b/src/undate/dateformat/edtf/formatter.py @@ -53,8 +53,12 @@ def _undate_to_string(self, undate: Undate) -> str: if undate.precision >= DatePrecision.YEAR: year = self._convert_missing_digits(undate.year, undate.MISSING_DIGIT) # years with more than 4 digits should be prefixed with Y + negative_year = "" + if year.startswith("-"): + negative_year = "-" + year = year[1:] if year and len(year) > 4: - year = f"Y{year}" + year = f"{negative_year}Y{year}" # TODO: handle uncertain / approximate parts.append(year or EDTF_UNSPECIFIED_DIGIT * 4) diff --git a/src/undate/dateformat/edtf/transformer.py b/src/undate/dateformat/edtf/transformer.py index 20f92d1..866dadc 100644 --- a/src/undate/dateformat/edtf/transformer.py +++ b/src/undate/dateformat/edtf/transformer.py @@ -66,5 +66,14 @@ def date_level1(self, items): def year_fivedigitsplus(self, items): # strip off the leading Y and convert to integer token = items[0] - year = int(token.value.lstrip("Y")) + value = token.value + # check if year is negative + negative = False + if value.startswith("-"): + value = value[1:] + negative = True + year = int(value.lstrip("Y")) + + if negative: + year = -year return Tree(data="year", children=[year]) diff --git a/tests/test_dateformat/edtf/test_edtf_parser.py b/tests/test_dateformat/edtf/test_edtf_parser.py index 5a2b8ea..4cc78c5 100644 --- a/tests/test_dateformat/edtf/test_edtf_parser.py +++ b/tests/test_dateformat/edtf/test_edtf_parser.py @@ -13,6 +13,7 @@ "1000-01/2000-05-01", # level 1 "Y170000002", + "-Y170000002", "2001-21", # spring 2001 # qualifiers "1984?", diff --git a/tests/test_dateformat/edtf/test_edtf_transformer.py b/tests/test_dateformat/edtf/test_edtf_transformer.py index 48b15f1..b8c480f 100644 --- a/tests/test_dateformat/edtf/test_edtf_transformer.py +++ b/tests/test_dateformat/edtf/test_edtf_transformer.py @@ -15,6 +15,7 @@ ("1000-01/2000-05-01", UndateInterval(Undate(1000, 1), Undate(2000, 5, 1))), # level 1 ("Y17000002", Undate(17000002)), + ("-Y17000002", Undate(-17000002)), # "2001-21", # spring 2001 # qualifiers TODO - not yet supported by undate # "1984?", diff --git a/tests/test_dateformat/test_edtf.py b/tests/test_dateformat/test_edtf.py index 13d3e5a..e6c14a2 100644 --- a/tests/test_dateformat/test_edtf.py +++ b/tests/test_dateformat/test_edtf.py @@ -38,6 +38,7 @@ def test_to_string(self): assert EDTFDateFormat().to_string(Undate(33)) == "0033" assert EDTFDateFormat().to_string(Undate("20XX")) == "20XX" assert EDTFDateFormat().to_string(Undate(17000002)) == "Y17000002" + assert EDTFDateFormat().to_string(Undate(-17000002)) == "-Y17000002" assert EDTFDateFormat().to_string(Undate(1991, 6)) == "1991-06" assert EDTFDateFormat().to_string(Undate(1991, 5, 3)) == "1991-05-03" From 56eac4b2a4c11bc3356ba1f2049a1c1fa76145fb Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 24 Oct 2024 16:43:44 -0400 Subject: [PATCH 04/11] Allow importing main undate objects from top level namespace --- src/undate/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/undate/__init__.py b/src/undate/__init__.py index dabe9f1..588ef83 100644 --- a/src/undate/__init__.py +++ b/src/undate/__init__.py @@ -1 +1,4 @@ __version__ = "0.3.0.dev0" + +from undate.undate import Undate, UndateInterval +from undate.date import DatePrecision From 5e9097350d20395d8cd63c27692e61bef68a73be Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 24 Oct 2024 16:50:03 -0400 Subject: [PATCH 05/11] Handle negative 4-digit years --- examples/notebooks/edtf-support.ipynb | 46 ++++++++++++------- src/undate/dateformat/edtf/edtf.lark | 3 +- src/undate/dateformat/edtf/formatter.py | 12 +++-- .../test_dateformat/edtf/test_edtf_parser.py | 2 + .../edtf/test_edtf_transformer.py | 2 + tests/test_dateformat/test_edtf.py | 1 + 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index cb6a898..c94e1ee 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -259,18 +259,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "id": "532470db-851e-4f91-9242-cd93d35054cf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-Y170000002\n" - ] - } - ], + "outputs": [], "source": [ "# Example 1\n", "# parse\n", @@ -650,17 +642,37 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1985\n" + ] + }, + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# format\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(Undate(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1985\u001b[39m))\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(Undate(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1985\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m-1985\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], "source": [ "# Example 1 ‘-1985’\n", "# parse\n", - "# neg_year = Undate.parse(\"-1985\", \"EDTF\")\n", - "# assert neg_year.year == \"-1985\"\n", - "# # format\n", - "# assert str(Undate(-1985)) == \"-1985\"" + "neg_year = Undate.parse(\"-1985\", \"EDTF\")\n", + "assert neg_year.year == \"-1985\"\n", + "# format\n", + "print(Undate(-1985))\n", + "assert str(Undate(-1985)) == \"-1985\"" ] }, { @@ -698,7 +710,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", "metadata": {}, "outputs": [], diff --git a/src/undate/dateformat/edtf/edtf.lark b/src/undate/dateformat/edtf/edtf.lark index 6305094..7ad92ec 100644 --- a/src/undate/dateformat/edtf/edtf.lark +++ b/src/undate/dateformat/edtf/edtf.lark @@ -1,5 +1,6 @@ %import common.ESCAPED_STRING -> STRING %import common.INT -> INT +%import common.SIGNED_INT -> SIGNED_INT %import common.WS %ignore WS @@ -14,7 +15,7 @@ date: year | year "-" month | year "-" month "-" day -year: INT +year: SIGNED_INT month: /(0[1-9])|(1[0-2])/ day: /([0-2][1-9])|(3[0-1])/ diff --git a/src/undate/dateformat/edtf/formatter.py b/src/undate/dateformat/edtf/formatter.py index edd9e23..8f6de92 100644 --- a/src/undate/dateformat/edtf/formatter.py +++ b/src/undate/dateformat/edtf/formatter.py @@ -53,12 +53,14 @@ def _undate_to_string(self, undate: Undate) -> str: if undate.precision >= DatePrecision.YEAR: year = self._convert_missing_digits(undate.year, undate.MISSING_DIGIT) # years with more than 4 digits should be prefixed with Y - negative_year = "" - if year.startswith("-"): - negative_year = "-" - year = year[1:] - if year and len(year) > 4: + # (don't count minus sign when checking digits) + if year and len(year.lstrip("-")) > 4: + negative_year = "" + if year.startswith("-"): + negative_year = "-" + year = year[1:] year = f"{negative_year}Y{year}" + # TODO: handle uncertain / approximate parts.append(year or EDTF_UNSPECIFIED_DIGIT * 4) diff --git a/tests/test_dateformat/edtf/test_edtf_parser.py b/tests/test_dateformat/edtf/test_edtf_parser.py index 4cc78c5..06cd5b3 100644 --- a/tests/test_dateformat/edtf/test_edtf_parser.py +++ b/tests/test_dateformat/edtf/test_edtf_parser.py @@ -15,6 +15,8 @@ "Y170000002", "-Y170000002", "2001-21", # spring 2001 + # negative year + "-1985", # qualifiers "1984?", "2004-06~", diff --git a/tests/test_dateformat/edtf/test_edtf_transformer.py b/tests/test_dateformat/edtf/test_edtf_transformer.py index b8c480f..8de09d9 100644 --- a/tests/test_dateformat/edtf/test_edtf_transformer.py +++ b/tests/test_dateformat/edtf/test_edtf_transformer.py @@ -16,6 +16,8 @@ # level 1 ("Y17000002", Undate(17000002)), ("-Y17000002", Undate(-17000002)), + # negative year + ("-1985", Undate(-1985)), # "2001-21", # spring 2001 # qualifiers TODO - not yet supported by undate # "1984?", diff --git a/tests/test_dateformat/test_edtf.py b/tests/test_dateformat/test_edtf.py index e6c14a2..a70f891 100644 --- a/tests/test_dateformat/test_edtf.py +++ b/tests/test_dateformat/test_edtf.py @@ -39,6 +39,7 @@ def test_to_string(self): assert EDTFDateFormat().to_string(Undate("20XX")) == "20XX" assert EDTFDateFormat().to_string(Undate(17000002)) == "Y17000002" assert EDTFDateFormat().to_string(Undate(-17000002)) == "-Y17000002" + assert EDTFDateFormat().to_string(Undate(-1985)) == "-1985" assert EDTFDateFormat().to_string(Undate(1991, 6)) == "1991-06" assert EDTFDateFormat().to_string(Undate(1991, 5, 3)) == "1991-05-03" From 8760c33a429ed813ccf57333c0e19edab2aa3e5a Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 24 Oct 2024 17:03:27 -0400 Subject: [PATCH 06/11] Handle level 2 fully-unspecified year --- examples/notebooks/edtf-support.ipynb | 81 +++++++------------ src/undate/dateformat/edtf/edtf.lark | 3 +- src/undate/undate.py | 7 +- .../test_dateformat/edtf/test_edtf_parser.py | 6 ++ .../edtf/test_edtf_transformer.py | 5 ++ tests/test_dateformat/test_edtf.py | 15 +++- 6 files changed, 59 insertions(+), 58 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index c94e1ee..208b7dd 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d", "metadata": {}, "outputs": [], @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "923476ff-344a-4018-a02e-6e5f80ea76a8", "metadata": {}, "outputs": [], @@ -159,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "6ed422de-34a2-4324-b254-f62db00563f7", "metadata": {}, "outputs": [], @@ -214,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "8d98a139-627b-40bd-b1c5-d0028e538a53", "metadata": {}, "outputs": [], @@ -259,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "532470db-851e-4f91-9242-cd93d35054cf", "metadata": {}, "outputs": [], @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "a5abd0e4-0b26-49b0-bf78-3e1fe6c046d8", "metadata": {}, "outputs": [], @@ -429,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "e47f3fff-d35c-4c2e-9568-214763f6511a", "metadata": {}, "outputs": [], @@ -483,7 +483,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "39143c1f-932a-450c-9b2d-ffbe3e1416b0", "metadata": {}, "outputs": [], @@ -537,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "95965f17-0bd5-446f-bc09-9503eaed68e2", "metadata": {}, "outputs": [], @@ -591,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "c6c2d1a1-39f1-45eb-ac08-1de4fadbe842", "metadata": {}, "outputs": [], @@ -642,36 +642,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1985\n" - ] - }, - { - "ename": "AssertionError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# format\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(Undate(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1985\u001b[39m))\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(Undate(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1985\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m-1985\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "\u001b[0;31mAssertionError\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "# Example 1 ‘-1985’\n", "# parse\n", "neg_year = Undate.parse(\"-1985\", \"EDTF\")\n", "assert neg_year.year == \"-1985\"\n", "# format\n", - "print(Undate(-1985))\n", "assert str(Undate(-1985)) == \"-1985\"" ] }, @@ -710,7 +690,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", "metadata": {}, "outputs": [], @@ -739,20 +719,20 @@ "# format\n", "assert str(Undate(\"15XX\", 12, 25)) == \"15XX-12-25\"\n", "\n", - "# NOT CURRENTLY SUPPORTED\n", "# Example 3 ‘XXXX-12-XX’\n", "# parse\n", - "# december = Undate.parse(\"XXXX-12-XX\", \"EDTF\")\n", - "# assert december.year == \"XXXX\"\n", - "# assert december.month == \"12\"\n", - "# assert december.day == \"XX\"\n", - "# assert december.precision == DatePrecision.DAY\n", + "december = Undate.parse(\"XXXX-12-XX\", \"EDTF\")\n", + "assert december.year == \"XXXX\"\n", + "assert december.month == \"12\"\n", + "assert december.day == \"XX\"\n", + "assert december.precision == DatePrecision.DAY\n", + "# TODO: these must be in a different branch...\n", "# assert december.earliest.year == Undate.MIN_YEAR\n", "# assert december.latest.year == Undate.MAX_YEAR\n", - "# assert december.earliest.day == 1\n", - "# assert december.latest.day == 31\n", + "assert december.earliest.day == 1\n", + "assert december.latest.day == 31\n", "# format\n", - "# assert str(Undate(\"XXXX\", 12, \"XX\")) == \"XXXX-12-XX\"\n", + "assert str(Undate(\"XXXX\", 12, \"XX\")) == \"XXXX-12-XX\"\n", "\n", "# Example 4 '1XXX-XX’\n", "# parse\n", @@ -765,17 +745,16 @@ "# format\n", "assert str(Undate(\"1XXX\", \"XX\")) == \"1XXX-XX\"\n", "\n", - "# NOT CURRENTLY SUPPORTED (parse error)\n", "# Example 5 ‘1XXX-12’\n", "# parse\n", - "# some_december = Undate.parse(\"1XXX-12\", \"EDTF\")\n", - "# assert some_december.year == \"1XXX\"\n", - "# assert some_december.month == \"12\"\n", - "# assert some_december.precision == DatePrecision.MONTH\n", - "# assert some_december.earliest.year == 1000\n", - "# assert some_december.latest.year == 1999\n", - "# # format\n", - "# assert str(Undate(\"1XXX\", 12)) == \"1XXX-12\"\n", + "some_december = Undate.parse(\"1XXX-12\", \"EDTF\")\n", + "assert some_december.year == \"1XXX\"\n", + "assert some_december.month == \"12\"\n", + "assert some_december.precision == DatePrecision.MONTH\n", + "assert some_december.earliest.year == 1000\n", + "assert some_december.latest.year == 1999\n", + "# format\n", + "assert str(Undate(\"1XXX\", 12)) == \"1XXX-12\"\n", "\n", "# Example 6 ‘1984-1X’\n", "# parse\n", diff --git a/src/undate/dateformat/edtf/edtf.lark b/src/undate/dateformat/edtf/edtf.lark index 7ad92ec..f5b34f7 100644 --- a/src/undate/dateformat/edtf/edtf.lark +++ b/src/undate/dateformat/edtf/edtf.lark @@ -35,8 +35,9 @@ uncertain_approximate: "%" // The character 'X' may be used in place of one or more rightmost // digits to indicate that the value of that digit is unspecified +// In Level 2, year may be completely unspecified. unspecified: /X/ -?year_unspecified: /\d+/ unspecified+ +?year_unspecified: /\d+/ unspecified+ | unspecified ~ 4 ?month_unspecified: "0".."1"? unspecified ~ 1..2 //?year_month_unspecified: year_l1 "-" month_unspecified ?day_unspecified: "0".."3"? unspecified ~ 1..2 diff --git a/src/undate/undate.py b/src/undate/undate.py index 3a506d7..eb789ca 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -54,9 +54,12 @@ def __init__( elif year: self.precision = DatePrecision.YEAR - # TODO: refactor partial date min/max calculations + # special case: treat year = XXXX as unknown/none + if year == "XXXX": + year = None if year is not None: + # could we / should we use str.isnumeric here? try: year = int(year) # update initial value since it is used to determine @@ -110,7 +113,7 @@ def __init__( # if we have no day or partial day, calculate min / max min_day = 1 # if we know year and month (or max month), calculate exactly - if year and month: + if year and month and isinstance(year, int): _, max_day = monthrange(int(year), max_month) elif year is None and month: # If we don't have year and month, diff --git a/tests/test_dateformat/edtf/test_edtf_parser.py b/tests/test_dateformat/edtf/test_edtf_parser.py index 06cd5b3..c8c05e8 100644 --- a/tests/test_dateformat/edtf/test_edtf_parser.py +++ b/tests/test_dateformat/edtf/test_edtf_parser.py @@ -32,6 +32,12 @@ "1985-04/..", "../1985-04-12", "/1985-04-12", + # level 2 unspecified digits + "156X-12-25", + "XXXX-12-XX", + "1XXX-12", + "1XXX-XX", + "1984-1X", ] diff --git a/tests/test_dateformat/edtf/test_edtf_transformer.py b/tests/test_dateformat/edtf/test_edtf_transformer.py index 8de09d9..c29bb70 100644 --- a/tests/test_dateformat/edtf/test_edtf_transformer.py +++ b/tests/test_dateformat/edtf/test_edtf_transformer.py @@ -34,6 +34,11 @@ ("1985-04/..", UndateInterval(Undate(1985, 4), None)), ("../1985-04-12", UndateInterval(None, Undate(1985, 4, 12))), ("/1985-04-12", UndateInterval(None, Undate(1985, 4, 12))), + # level 2 unspecified digits + ("156X-12-25", Undate("156X", 12, 25)), + ("XXXX-12-XX", Undate("XXXX", 12, "XX")), + ("1XXX-XX", Undate("1XXX", "XX")), + ("1984-1X", Undate(1984, "1X")), ] diff --git a/tests/test_dateformat/test_edtf.py b/tests/test_dateformat/test_edtf.py index a70f891..4ae2fe9 100644 --- a/tests/test_dateformat/test_edtf.py +++ b/tests/test_dateformat/test_edtf.py @@ -13,15 +13,18 @@ def test_parse_singledate(self): assert str(EDTFDateFormat().parse("201X")) == str(Undate("201X")) assert str(EDTFDateFormat().parse("2004-XX")) == str(Undate(2004, "XX")) # missing year but month/day known - # assert EDTFDateFormat().parse("--05-03") == Undate(month=5, day=3) + # comparison doesn't work because undate knows unknown dates aren't + # necessarily the same, so use string comparison + assert str(EDTFDateFormat().parse("XXXX-05-03")) == Undate( + month=5, day=3 + ).format("EDTF") def test_parse_singledate_unequal(self): assert EDTFDateFormat().parse("2002") != Undate(2003) assert EDTFDateFormat().parse("1991-05") != Undate(1991, 6) assert EDTFDateFormat().parse("1991-05-03") != Undate(1991, 5, 4) # missing year but month/day known - # - does EDTF not support this or is parsing logic incorrect? - # assert EDTFDateFormat().parse("XXXX-05-03") != Undate(month=5, day=4) + assert EDTFDateFormat().parse("XXXX-05-03") != Undate(month=5, day=4) def test_parse_invalid(self): with pytest.raises(ValueError): @@ -47,4 +50,8 @@ def test_to_string(self): assert EDTFDateFormat().to_string(Undate(1991, "0X")) == "1991-0X" assert EDTFDateFormat().to_string(Undate(1991, None, 3)) == "1991-XX-03" - # TODO: override missing digit and confirm replacement + # level 2 unspecified digits + assert EDTFDateFormat().to_string(Undate("156X", 12, 25)) == "156X-12-25" + assert EDTFDateFormat().to_string(Undate("XXXX", 12, "XX")) == "XXXX-12-XX" + assert EDTFDateFormat().to_string(Undate("1XXX", "XX")) == "1XXX-XX" + assert EDTFDateFormat().to_string(Undate(1984, "1X")) == "1984-1X" From 71cddb8475685bd965c6079393c047bcfe46fb0c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 24 Oct 2024 19:40:48 -0400 Subject: [PATCH 07/11] Don't use signed int for year since + is unsupported --- examples/notebooks/edtf-support.ipynb | 2 +- src/undate/dateformat/edtf/edtf.lark | 3 +-- src/undate/dateformat/edtf/transformer.py | 9 +++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index 208b7dd..6a01c47 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -690,7 +690,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", "metadata": {}, "outputs": [], diff --git a/src/undate/dateformat/edtf/edtf.lark b/src/undate/dateformat/edtf/edtf.lark index f5b34f7..9118582 100644 --- a/src/undate/dateformat/edtf/edtf.lark +++ b/src/undate/dateformat/edtf/edtf.lark @@ -1,6 +1,5 @@ %import common.ESCAPED_STRING -> STRING %import common.INT -> INT -%import common.SIGNED_INT -> SIGNED_INT %import common.WS %ignore WS @@ -15,7 +14,7 @@ date: year | year "-" month | year "-" month "-" day -year: SIGNED_INT +year: INT | /\-/ INT month: /(0[1-9])|(1[0-2])/ day: /([0-2][1-9])|(3[0-1])/ diff --git a/src/undate/dateformat/edtf/transformer.py b/src/undate/dateformat/edtf/transformer.py index 866dadc..536c77e 100644 --- a/src/undate/dateformat/edtf/transformer.py +++ b/src/undate/dateformat/edtf/transformer.py @@ -63,6 +63,15 @@ def day_unspecified(self, items): def date_level1(self, items): return self.date(items) + def year(self, items): + # when the year is negative, there are two tokens + if len(items) > 1 and items[0] == "-": + # an anonymous token for the - and the integer year + year = items[1] + return Tree(data="year", children=[-year]) + + return Tree(data="year", children=[items[0]]) + def year_fivedigitsplus(self, items): # strip off the leading Y and convert to integer token = items[0] From 7b4081fe099242f2f00291e040d09e347b81a36f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 8 Nov 2024 17:29:34 -0500 Subject: [PATCH 08/11] Update EDTF demo/validation notebook for formatter/converter refactor --- examples/notebooks/edtf-support.ipynb | 44 ++++++++++++++++----------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index 6a01c47..2a5ae36 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d", "metadata": {}, "outputs": [], @@ -92,21 +92,21 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "923476ff-344a-4018-a02e-6e5f80ea76a8", "metadata": {}, "outputs": [], "source": [ "from undate.undate import Undate, DatePrecision\n", - "from undate.dateformat.edtf import EDTFDateFormat\n", + "from undate.converters.edtf import EDTFDateConverter\n", "\n", "# set default format to EDTF\n", - "Undate.DEFAULT_FORMAT = \"EDTF\"\n", + "Undate.DEFAULT_CONVERTER = \"EDTF\"\n", "\n", "# Example 1: day\n", "day = Undate(1985, 4, 12)\n", - "# confirm EDTF formatter is being used\n", - "assert isinstance(day.formatter, EDTFDateFormat)\n", + "# confirm EDTF converter is being used\n", + "assert isinstance(day.converter, EDTFDateConverter)\n", "# casting to str is now equivalent to day.format(\"EDTF\")\n", "assert str(day) == \"1985-04-12\"\n", "assert day.precision == DatePrecision.DAY\n", @@ -159,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "6ed422de-34a2-4324-b254-f62db00563f7", "metadata": {}, "outputs": [], @@ -214,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "8d98a139-627b-40bd-b1c5-d0028e538a53", "metadata": {}, "outputs": [], @@ -259,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "532470db-851e-4f91-9242-cd93d35054cf", "metadata": {}, "outputs": [], @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "a5abd0e4-0b26-49b0-bf78-3e1fe6c046d8", "metadata": {}, "outputs": [], @@ -429,11 +429,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "e47f3fff-d35c-4c2e-9568-214763f6511a", "metadata": {}, "outputs": [], "source": [ + "import datetime\n", + "\n", "# Example 1 ‘1985-04-12/..’\n", "# parse\n", "interval = Undate.parse(\"1985-04-12/..\", \"EDTF\")\n", @@ -483,7 +485,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "39143c1f-932a-450c-9b2d-ffbe3e1416b0", "metadata": {}, "outputs": [], @@ -537,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "95965f17-0bd5-446f-bc09-9503eaed68e2", "metadata": {}, "outputs": [], @@ -591,7 +593,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "c6c2d1a1-39f1-45eb-ac08-1de4fadbe842", "metadata": {}, "outputs": [], @@ -642,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", "metadata": {}, "outputs": [], @@ -690,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", "metadata": {}, "outputs": [], @@ -767,6 +769,14 @@ "# format\n", "assert str(Undate(1984, \"1X\")) == \"1984-1X\"" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8297d77f-3680-4237-9bcd-28da33ac9619", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -785,7 +795,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.0" + "version": "3.12.7" } }, "nbformat": 4, From f829a97a4c7319897a52a5583d3122bba822a530 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 8 Nov 2024 17:37:55 -0500 Subject: [PATCH 09/11] Remove unsupported note for negative years --- examples/notebooks/edtf-support.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index 2a5ae36..70c29e2 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -635,7 +635,7 @@ "id": "4c6327d8-2243-4bc1-bef6-255f8872ea51", "metadata": {}, "source": [ - "#### Negative calendar year - not yet supported\n", + "#### Negative calendar year\n", "\n", " Example 1 ‘-1985’\n", "\n", @@ -644,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", "metadata": {}, "outputs": [], From 3531bb91a3c6e9f1967bb5c3f2445829a7cf9331 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 8 Nov 2024 17:43:15 -0500 Subject: [PATCH 10/11] Clean up items flagged by ruff check --- examples/notebooks/edtf-support.ipynb | 6 +----- src/undate/__init__.py | 4 +++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/notebooks/edtf-support.ipynb index 70c29e2..880808c 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/notebooks/edtf-support.ipynb @@ -164,8 +164,6 @@ "metadata": {}, "outputs": [], "source": [ - "from undate.undate import UndateInterval\n", - "\n", "# Example 1\n", "year_range = Undate.parse(\"1964/2008\", \"EDTF\")\n", "assert isinstance(year_range, UndateInterval)\n", @@ -214,13 +212,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 16, "id": "8d98a139-627b-40bd-b1c5-d0028e538a53", "metadata": {}, "outputs": [], "source": [ - "from undate.undate import UndateInterval\n", - "\n", "# Example 1\n", "assert UndateInterval(Undate(1964), Undate(2008)).format(\"EDTF\") == \"1964/2008\"\n", "# Example 2\n", diff --git a/src/undate/__init__.py b/src/undate/__init__.py index 588ef83..9f82cb5 100644 --- a/src/undate/__init__.py +++ b/src/undate/__init__.py @@ -1,4 +1,6 @@ __version__ = "0.3.0.dev0" -from undate.undate import Undate, UndateInterval from undate.date import DatePrecision +from undate.undate import Undate, UndateInterval + +__all__ = ["Undate", "UndateInterval", "DatePrecision"] From b8219236474745576d4ba4abff96ec996d8e95f2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 8 Nov 2024 17:50:26 -0500 Subject: [PATCH 11/11] Improve test coverage and error handling for EDTF --- src/undate/converters/edtf/converter.py | 2 +- tests/test_converters/test_edtf.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/undate/converters/edtf/converter.py b/src/undate/converters/edtf/converter.py index 13411fd..86d83b5 100644 --- a/src/undate/converters/edtf/converter.py +++ b/src/undate/converters/edtf/converter.py @@ -103,4 +103,4 @@ def _undate_to_string(self, undate: Undate) -> str: return "-".join(parts) # how can we have an empty string? probably shouldn't get here - return "" + raise ValueError("Failed to generate an EDTF string from %r", undate) diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 24fad38..78b3be5 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,5 +1,6 @@ import pytest from undate.converters.edtf import EDTFDateConverter +from undate.date import DatePrecision from undate.undate import Undate, UndateInterval @@ -52,5 +53,12 @@ def test_to_string(self): assert EDTFDateConverter().to_string(Undate(1991, "0X")) == "1991-0X" assert EDTFDateConverter().to_string(Undate(1991, None, 3)) == "1991-XX-03" + assert EDTFDateConverter().to_string(Undate(-1984)) == "-1984" + # if converter can't generate a string for the date, + # it should return a value error + empty_undate = Undate() + empty_undate.precision = DatePrecision.DECADE + with pytest.raises(ValueError): + EDTFDateConverter().to_string(empty_undate) # TODO: override missing digit and confirm replacement