Skip to content

Commit

Permalink
Adds support for dcatap:availability in dcat:Distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
seitenbau-govdata committed Jun 30, 2022
1 parent 7475fdc commit 7a2288f
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ This mapping is compatible with the [DCAT-AP v1.1](https://joinup.ec.europa.eu/a
| dcat:Distribution | foaf:page | resource:documentation | | list | See note about lists |
| dcat:Distribution | dct:language | resource:language | | list | See note about lists |
| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See note about lists |
| dcat:Distribution | dcatap:availability | resource:availability | | text | See note about URIs |
| spdx:Checksum | spdx:checksumValue | resource:hash | | text | |
| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | |

Expand Down
32 changes: 32 additions & 0 deletions ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")
DCATAP = Namespace("http://data.europa.eu/r5r/")
ADMS = Namespace("http://www.w3.org/ns/adms#")
VCARD = Namespace("http://www.w3.org/2006/vcard/ns#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
Expand All @@ -40,6 +41,7 @@
namespaces = {
'dct': DCT,
'dcat': DCAT,
'dcatap': DCATAP,
'adms': ADMS,
'vcard': VCARD,
'foaf': FOAF,
Expand Down Expand Up @@ -1172,6 +1174,10 @@ def parse_dataset(self, dataset_dict, dataset_ref):
rdflib.term.URIRef)
else '')

# Remember the (internal) distribution reference for referencing in
# further profiles, e.g. for adding more properties
resource_dict['distribution_ref'] = str(distribution)

dataset_dict['resources'].append(resource_dict)

if self.compatibility_mode:
Expand Down Expand Up @@ -1508,6 +1514,20 @@ def parse_dataset(self, dataset_dict, dataset_ref):
dataset_dict['extras'].append({'key': 'spatial_resolution_in_meters',
'value': json.dumps(spatial_resolution_in_meters)})

# Resources
for distribution in self._distributions(dataset_ref):
distribution_ref = str(distribution)
for resource_dict in dataset_dict.get('resources', []):
# Match distribution in graph and distribution in resource dict
if resource_dict and distribution_ref == resource_dict.get('distribution_ref'):
# Simple values
for key, predicate in (
('availability', DCATAP.availability),
):
value = self._object_value(distribution, predicate)
if value:
resource_dict[key] = value

return dataset_dict

def graph_from_dataset(self, dataset_dict, dataset_ref):
Expand Down Expand Up @@ -1560,6 +1580,18 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
except (ValueError, TypeError):
self.g.add((dataset_ref, DCAT.spatialResolutionInMeters, Literal(value)))

# Resources
for resource_dict in dataset_dict.get('resources', []):

distribution = CleanedURIRef(resource_uri(resource_dict))

# Simple values
items = [
('availability', DCATAP.availability, None, URIRefOrLiteral)
]

self._add_triples_from_dict(resource_dict, distribution, items)

def graph_from_catalog(self, catalog_dict, catalog_ref):

# call super method
Expand Down
181 changes: 180 additions & 1 deletion ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-

from builtins import str
from builtins import object
import os
Expand Down Expand Up @@ -32,11 +34,13 @@ def test_dataset_all_fields(self):
isreferencedby_uri = 'https://doi.org/10.1038/sdata.2018.22'
temporal_start = '1905-03-01T03:00:00+02:00'
temporal_end = '2013-01-05'
dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:dct="http://purl.org/dc/terms/"
xmlns:dcat="http://www.w3.org/ns/dcat#"
xmlns:dcatap="http://data.europa.eu/r5r/"
xmlns:schema="http://schema.org/"
xmlns:time="http://www.w3.org/2006/time"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
Expand All @@ -51,10 +55,21 @@ def test_dataset_all_fields(self):
<dcat:temporalResolution rdf:datatype="http://www.w3.org/2001/XMLSchema#duration">{temp_res}</dcat:temporalResolution>
<dcat:spatialResolutionInMeters rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">{spatial_res}</dcat:spatialResolutionInMeters>
<dct:isReferencedBy rdf:resource="{referenced_by}"/>
<dcat:distribution>
<dcat:Distribution rdf:about="https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/1">
<dcat:accessURL rdf:resource="http://geodienste.hamburg.de/darf_nicht_die_gleiche_url_wie_downloadurl_sein_da_es_sonst_nicht_angezeigt_wird"/>
<dct:description>Das ist eine deutsche Beschreibung der Distribution</dct:description>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-02-27</dct:issued>
<dct:title>Download WFS Naturräume Geest und Marsch (GML)</dct:title>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-03-07T10:00:00</dct:modified>
<dcatap:availability rdf:resource="{availability}"/>
</dcat:Distribution>
</dcat:distribution>
</dcat:Dataset>
</rdf:RDF>
'''.format(start=temporal_start, end=temporal_end, temp_res=temporal_resolution,
spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri)
spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri,
availability=dist_availability)

p = RDFParser(profiles=DCAT_AP_PROFILES)

Expand All @@ -66,6 +81,7 @@ def test_dataset_all_fields(self):

dataset = datasets[0]

# Dataset
extras = self._extras(dataset)

temporal_resolution_list = json.loads(extras['temporal_resolution'])
Expand All @@ -83,6 +99,168 @@ def test_dataset_all_fields(self):
assert extras['temporal_start'] == temporal_start
assert extras['temporal_end'] == temporal_end

# Resources
assert len(dataset['resources']) == 1

resource = dataset['resources'][0]

# Simple values
assert resource['availability'] == dist_availability

def test_availability_distibutions_without_uri(self):

dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:dct="http://purl.org/dc/terms/"
xmlns:dcat="http://www.w3.org/ns/dcat#"
xmlns:dcatap="http://data.europa.eu/r5r/"
xmlns:schema="http://schema.org/"
xmlns:time="http://www.w3.org/2006/time"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<dcat:Dataset rdf:about="http://example.org">
<dcat:distribution>
<dcat:Distribution>
<dcat:accessURL rdf:resource="http://geodienste.hamburg.de/darf_nicht_die_gleiche_url_wie_downloadurl_sein_da_es_sonst_nicht_angezeigt_wird"/>
<dct:description>Das ist eine deutsche Beschreibung der Distribution</dct:description>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-02-27</dct:issued>
<dct:title>Download WFS Naturräume Geest und Marsch (GML)</dct:title>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-03-07T10:00:00</dct:modified>
<dcatap:availability rdf:resource="{availability}"/>
</dcat:Distribution>
</dcat:distribution>
</dcat:Dataset>
</rdf:RDF>
'''.format(availability=dist_availability)

p = RDFParser(profiles=DCAT_AP_PROFILES)

p.parse(data)

datasets = [d for d in p.datasets()]

assert len(datasets) == 1

dataset = datasets[0]

assert len(dataset['resources']) == 1

resource = dataset['resources'][0]

assert resource['availability'] == dist_availability

def test_availability_multiple_distibutions(self):

dist_availability_1 = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"
dist_availability_2 = "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL"
dist_availability_3 = "http://publications.europa.eu/resource/authority/planned-availability/STABLE"

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:dct="http://purl.org/dc/terms/"
xmlns:dcat="http://www.w3.org/ns/dcat#"
xmlns:dcatap="http://data.europa.eu/r5r/"
xmlns:schema="http://schema.org/"
xmlns:time="http://www.w3.org/2006/time"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<dcat:Dataset rdf:about="http://example.org">
<dcat:distribution>
<dcat:Distribution>
<dcat:accessURL rdf:resource="http://geodienste.hamburg.de/darf_nicht_die_gleiche_url_wie_downloadurl_sein_da_es_sonst_nicht_angezeigt_wird"/>
<dct:description>{availability_1}</dct:description>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-02-27</dct:issued>
<dct:title>Download WFS Naturräume Geest und Marsch (GML)</dct:title>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-03-07T10:00:00</dct:modified>
<dcatap:availability rdf:resource="{availability_1}"/>
</dcat:Distribution>
</dcat:distribution>
<dcat:distribution>
<dcat:Distribution rdf:about="https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/1">
<dcat:accessURL rdf:resource="http://geodienste.hamburg.de/darf_nicht_die_gleiche_url_wie_downloadurl_sein_da_es_sonst_nicht_angezeigt_wird"/>
<dct:description>{availability_2}</dct:description>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-02-27</dct:issued>
<dct:title>Download WFS Naturräume Geest und Marsch (GML)</dct:title>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-03-07T10:00:00</dct:modified>
<dcatap:availability rdf:resource="{availability_2}"/>
</dcat:Distribution>
</dcat:distribution>
<dcat:distribution>
<dcat:Distribution>
<dcat:accessURL rdf:resource="http://geodienste.hamburg.de/darf_nicht_die_gleiche_url_wie_downloadurl_sein_da_es_sonst_nicht_angezeigt_wird"/>
<dct:description>{availability_3}</dct:description>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-02-27</dct:issued>
<dct:title>Download WFS Naturräume Geest und Marsch (GML)</dct:title>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-03-07T10:00:00</dct:modified>
<dcatap:availability rdf:resource="{availability_3}"/>
</dcat:Distribution>
</dcat:distribution>
</dcat:Dataset>
</rdf:RDF>
'''.format(availability_1=dist_availability_1, availability_2=dist_availability_2,
availability_3=dist_availability_3)

p = RDFParser(profiles=DCAT_AP_PROFILES)

p.parse(data)

datasets = [d for d in p.datasets()]

assert len(datasets) == 1

dataset = datasets[0]

assert len(dataset['resources']) == 3

for resource in dataset['resources']:
assert resource['availability'] == resource['description']

def test_availability_distibutions_literal(self):

dist_availability = "AVAILABLE"

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:dct="http://purl.org/dc/terms/"
xmlns:dcat="http://www.w3.org/ns/dcat#"
xmlns:dcatap="http://data.europa.eu/r5r/"
xmlns:schema="http://schema.org/"
xmlns:time="http://www.w3.org/2006/time"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<dcat:Dataset rdf:about="http://example.org">
<dcat:distribution>
<dcat:Distribution>
<dcat:accessURL rdf:resource="http://geodienste.hamburg.de/darf_nicht_die_gleiche_url_wie_downloadurl_sein_da_es_sonst_nicht_angezeigt_wird"/>
<dct:description>Das ist eine deutsche Beschreibung der Distribution</dct:description>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-02-27</dct:issued>
<dct:title>Download WFS Naturräume Geest und Marsch (GML)</dct:title>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2017-03-07T10:00:00</dct:modified>
<dcatap:availability>{availability}</dcatap:availability>
</dcat:Distribution>
</dcat:distribution>
</dcat:Dataset>
</rdf:RDF>
'''.format(availability=dist_availability)

p = RDFParser(profiles=DCAT_AP_PROFILES)

p.parse(data)

datasets = [d for d in p.datasets()]

assert len(datasets) == 1

dataset = datasets[0]

assert len(dataset['resources']) == 1

resource = dataset['resources'][0]

assert resource['availability'] == dist_availability

def test_temporal_resolution_multiple(self):
g = Graph()

Expand Down Expand Up @@ -157,6 +335,7 @@ def test_isreferencedby_multiple(self):
assert isreferencedby_uri in isreferencedby_list
assert isreferencedby_uri_2 in isreferencedby_list


class TestEuroDCATAP2ProfileParsingSpatial(BaseParseTest):

def test_spatial_multiple_dct_spatial_instances(self):
Expand Down
64 changes: 63 additions & 1 deletion ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-

from builtins import str
from builtins import object
import json
Expand All @@ -16,7 +18,7 @@

from ckanext.dcat import utils
from ckanext.dcat.processors import RDFSerializer
from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
from ckanext.dcat.profiles import (DCAT, DCATAP, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT)
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
from ckanext.dcat.tests.utils import BaseSerializeTest
Expand Down Expand Up @@ -303,3 +305,63 @@ def test_temporal(self):
for temporal_obj in temporal_obj_list:
triples.extend(self._triples(g, temporal_obj, predicate, parse_date(extras['temporal_end']).isoformat(), XSD.dateTime))
assert len(triples) == 1

def test_distribution_fields(self):

resource = {
'id': 'c041c635-054f-4431-b647-f9186926d021',
'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'Distribution name',
'availability': 'http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL',
}

dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'Test DCAT dataset',
'resources': [
resource
]
}

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset)

assert len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]) == 1

distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
assert self._triple(g, distribution, DCATAP.availability, URIRef(resource['availability']))

def test_distribution_availability_literal(self):

resource = {
'id': 'c041c635-054f-4431-b647-f9186926d021',
'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'Distribution name',
'availability': 'EXPERIMENTAL',
}

dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'Test DCAT dataset',
'resources': [
resource
]
}

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset)

assert len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]) == 1

distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
assert self._triple(g, distribution, DCATAP.availability, Literal(resource['availability']))

0 comments on commit 7a2288f

Please sign in to comment.