Skip to content

Commit

Permalink
add notebook with first examples of converting ismi dates to undate.
Browse files Browse the repository at this point in the history
  • Loading branch information
robcast committed Nov 21, 2024
1 parent 0e36e45 commit f0ee32c
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 0 deletions.
179 changes: 179 additions & 0 deletions examples/use-cases/ismi/ismi-dates.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "0fa36628-ccf2-4977-8c4c-e0a85e2b37b6",
"metadata": {},
"source": [
"# Working with ISMI project dates"
]
},
{
"cell_type": "markdown",
"id": "ffd4b544-8957-494e-9e09-b703d68bb7df",
"metadata": {},
"source": [
"## Load date samples from RDF"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8a36e7f-6057-44d1-8466-6709910d4249",
"metadata": {},
"outputs": [],
"source": [
"from rdflib import Graph, RDF, URIRef\n",
"from rdflib.namespace import Namespace, RDFS\n",
"from undate.undate import Undate\n",
"\n",
"# additional RDF namespaces\n",
"crmNs = Namespace('http://www.cidoc-crm.org/cidoc-crm/')\n",
"\n",
"g = Graph()\n",
"g.bind('crm', crmNs)\n",
"# load ISMI RDF sample data\n",
"g.parse('data/ismi-crm-date-samples.ttl')\n",
"# check: number of triples\n",
"len(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c940ca2b-b369-4511-8dc9-420bdaeb3e65",
"metadata": {},
"outputs": [],
"source": [
"date_uris = [u for u in g.subjects(RDF.type, crmNs['E52_Time-Span'])]\n",
"\n",
"for uri in date_uris:\n",
" q = '''SELECT ?uri ?label ?note \n",
" WHERE { \n",
" ?uri crm:P3_has_note ?note ;\n",
" crm:P1_is_identified_by / rdfs:label ?label .\n",
" } limit 10'''\n",
" res = g.query(q, initBindings={'uri': uri})\n",
" for r in res:\n",
" print(f\"uri={str(uri)} label={r.label} note={r.note}\")"
]
},
{
"cell_type": "markdown",
"id": "16361060-657f-431c-b70f-9101d550aa38",
"metadata": {},
"source": [
"## Convert RDF dates to Undate"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e443b974-930b-4a5d-8f21-641b4556b159",
"metadata": {},
"outputs": [],
"source": [
"from undate.date import DatePrecision, Date\n",
"import datetime\n",
"\n",
"uri = date_uris[1]\n",
"\n",
"#\n",
"# read date type\n",
"#\n",
"date_type = None\n",
"for date_type_uri in g.objects(uri, crmNs.P2_has_type):\n",
" for dt in ['day', 'year', 'range']:\n",
" if str(date_type_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/' + dt:\n",
" date_type = dt\n",
"\n",
"if not date_type:\n",
" raise RuntimeError(f\"Unknown datetype URI {date_type_uri}\")\n",
"\n",
"#\n",
"# read label and calendar\n",
"#\n",
"date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))\n",
"date_label = str(next(g.objects(date_label_uri, RDFS.label)))\n",
"for date_label_calendar_uri in g.objects(date_label_uri, crmNs.P2_has_type):\n",
" for ct in ['gregorian', 'julian', 'islamic']:\n",
" if str(date_label_calendar_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/' + ct:\n",
" calendar_type = ct\n",
"\n",
"if not calendar_type:\n",
" raise RuntimeError(f\"Unknown calendar type URI {date_label_calendar_uri}\")\n",
"\n",
"#\n",
"# create undate\n",
"#\n",
"if date_type == 'day':\n",
" xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within))\n",
" date = Undate.parse(str(xsd_date), 'ISO8601')\n",
" date.precision = DatePrecision.DAY\n",
" date.label = date_label\n",
"\n",
"if date_type == 'year':\n",
" xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n",
" xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n",
" date_from = datetime.date.fromisoformat(str(xsd_date_from))\n",
" if calendar_type == 'gregorian':\n",
" # this should be fine\n",
" date = Undate(year=date_from.year)\n",
"\n",
" else:\n",
" # create day precision Undate from end date\n",
" date = Undate.parse(str(xsd_date_until), 'ISO8601')\n",
" # change earliest date\n",
" date.earliest = Date(year=date_from.year, month=date_from.month, day=date_from.day)\n",
"\n",
" # change precision and label\n",
" date.precision = DatePrecision.DAY\n",
" date.label = date_label\n",
"\n",
"if date_type == 'range':\n",
" xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n",
" xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n",
" # create day precision Undate from start date\n",
" date = Undate.parse(str(xsd_date_from), 'ISO8601')\n",
" # change latest date\n",
" date_until = datetime.date.fromisoformat(str(xsd_date_until))\n",
" date.latest = Date(year=date_until.year, month=date_until.month, day=date_until.day)\n",
" # change precision and label\n",
" date.precision = DatePrecision.DAY\n",
" date.label = date_label\n",
"\n",
"\n",
"print(f\"{date_label=} {date_type=} {calendar_type=} {date=}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "742ba275-7de6-461b-8891-6f06dbdd89a0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 2 additions & 0 deletions examples/use-cases/ismi/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jupyterlab~=4.3.1
rdflib~=7.1.1

0 comments on commit f0ee32c

Please sign in to comment.