-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add notebook with first examples of converting ismi dates to undate.
- Loading branch information
Showing
2 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "0fa36628-ccf2-4977-8c4c-e0a85e2b37b6", | ||
"metadata": {}, | ||
"source": [ | ||
"# Working with ISMI project dates" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "ffd4b544-8957-494e-9e09-b703d68bb7df", | ||
"metadata": {}, | ||
"source": [ | ||
"## Load date samples from RDF" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "a8a36e7f-6057-44d1-8466-6709910d4249", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from rdflib import Graph, RDF, URIRef\n", | ||
"from rdflib.namespace import Namespace, RDFS\n", | ||
"from undate.undate import Undate\n", | ||
"\n", | ||
"# additional RDF namespaces\n", | ||
"crmNs = Namespace('http://www.cidoc-crm.org/cidoc-crm/')\n", | ||
"\n", | ||
"g = Graph()\n", | ||
"g.bind('crm', crmNs)\n", | ||
"# load ISMI RDF sample data\n", | ||
"g.parse('data/ismi-crm-date-samples.ttl')\n", | ||
"# check: number of triples\n", | ||
"len(g)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c940ca2b-b369-4511-8dc9-420bdaeb3e65", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"date_uris = [u for u in g.subjects(RDF.type, crmNs['E52_Time-Span'])]\n", | ||
"\n", | ||
"for uri in date_uris:\n", | ||
" q = '''SELECT ?uri ?label ?note \n", | ||
" WHERE { \n", | ||
" ?uri crm:P3_has_note ?note ;\n", | ||
" crm:P1_is_identified_by / rdfs:label ?label .\n", | ||
" } limit 10'''\n", | ||
" res = g.query(q, initBindings={'uri': uri})\n", | ||
" for r in res:\n", | ||
" print(f\"uri={str(uri)} label={r.label} note={r.note}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "16361060-657f-431c-b70f-9101d550aa38", | ||
"metadata": {}, | ||
"source": [ | ||
"## Convert RDF dates to Undate" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e443b974-930b-4a5d-8f21-641b4556b159", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from undate.date import DatePrecision, Date\n", | ||
"import datetime\n", | ||
"\n", | ||
"uri = date_uris[1]\n", | ||
"\n", | ||
"#\n", | ||
"# read date type\n", | ||
"#\n", | ||
"date_type = None\n", | ||
"for date_type_uri in g.objects(uri, crmNs.P2_has_type):\n", | ||
" for dt in ['day', 'year', 'range']:\n", | ||
" if str(date_type_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/' + dt:\n", | ||
" date_type = dt\n", | ||
"\n", | ||
"if not date_type:\n", | ||
" raise RuntimeError(f\"Unknown datetype URI {date_type_uri}\")\n", | ||
"\n", | ||
"#\n", | ||
"# read label and calendar\n", | ||
"#\n", | ||
"date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))\n", | ||
"date_label = str(next(g.objects(date_label_uri, RDFS.label)))\n", | ||
"for date_label_calendar_uri in g.objects(date_label_uri, crmNs.P2_has_type):\n", | ||
" for ct in ['gregorian', 'julian', 'islamic']:\n", | ||
" if str(date_label_calendar_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/' + ct:\n", | ||
" calendar_type = ct\n", | ||
"\n", | ||
"if not calendar_type:\n", | ||
" raise RuntimeError(f\"Unknown calendar type URI {date_label_calendar_uri}\")\n", | ||
"\n", | ||
"#\n", | ||
"# create undate\n", | ||
"#\n", | ||
"if date_type == 'day':\n", | ||
" xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within))\n", | ||
" date = Undate.parse(str(xsd_date), 'ISO8601')\n", | ||
" date.precision = DatePrecision.DAY\n", | ||
" date.label = date_label\n", | ||
"\n", | ||
"if date_type == 'year':\n", | ||
" xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", | ||
" xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", | ||
" date_from = datetime.date.fromisoformat(str(xsd_date_from))\n", | ||
" if calendar_type == 'gregorian':\n", | ||
" # this should be fine\n", | ||
" date = Undate(year=date_from.year)\n", | ||
"\n", | ||
" else:\n", | ||
" # create day precision Undate from end date\n", | ||
" date = Undate.parse(str(xsd_date_until), 'ISO8601')\n", | ||
" # change earliest date\n", | ||
" date.earliest = Date(year=date_from.year, month=date_from.month, day=date_from.day)\n", | ||
"\n", | ||
" # change precision and label\n", | ||
" date.precision = DatePrecision.DAY\n", | ||
" date.label = date_label\n", | ||
"\n", | ||
"if date_type == 'range':\n", | ||
" xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", | ||
" xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", | ||
" # create day precision Undate from start date\n", | ||
" date = Undate.parse(str(xsd_date_from), 'ISO8601')\n", | ||
" # change latest date\n", | ||
" date_until = datetime.date.fromisoformat(str(xsd_date_until))\n", | ||
" date.latest = Date(year=date_until.year, month=date_until.month, day=date_until.day)\n", | ||
" # change precision and label\n", | ||
" date.precision = DatePrecision.DAY\n", | ||
" date.label = date_label\n", | ||
"\n", | ||
"\n", | ||
"print(f\"{date_label=} {date_type=} {calendar_type=} {date=}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "742ba275-7de6-461b-8891-6f06dbdd89a0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
jupyterlab~=4.3.1 | ||
rdflib~=7.1.1 |