diff --git a/README.md b/README.md index 5e77c4df..3b521afe 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,7 @@ This mapping is compatible with the [DCAT-AP v1.1](https://joinup.ec.europa.eu/a | dcat:Distribution | foaf:page | resource:documentation | | list | See note about lists | | dcat:Distribution | dct:language | resource:language | | list | See note about lists | | dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See note about lists | +| dcat:Distribution | dcatap:availability | resource:availability | | text | See note about URIs | | spdx:Checksum | spdx:checksumValue | resource:hash | | text | | | spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | | diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py index 4a8b97de..97f15c2f 100644 --- a/ckanext/dcat/profiles.py +++ b/ckanext/dcat/profiles.py @@ -25,6 +25,7 @@ DCT = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") +DCATAP = Namespace("http://data.europa.eu/r5r/") ADMS = Namespace("http://www.w3.org/ns/adms#") VCARD = Namespace("http://www.w3.org/2006/vcard/ns#") FOAF = Namespace("http://xmlns.com/foaf/0.1/") @@ -40,6 +41,7 @@ namespaces = { 'dct': DCT, 'dcat': DCAT, + 'dcatap': DCATAP, 'adms': ADMS, 'vcard': VCARD, 'foaf': FOAF, @@ -1172,6 +1174,10 @@ def parse_dataset(self, dataset_dict, dataset_ref): rdflib.term.URIRef) else '') + # Remember the (internal) distribution reference for referencing in + # further profiles, e.g. for adding more properties + resource_dict['distribution_ref'] = str(distribution) + dataset_dict['resources'].append(resource_dict) if self.compatibility_mode: @@ -1508,6 +1514,20 @@ def parse_dataset(self, dataset_dict, dataset_ref): dataset_dict['extras'].append({'key': 'spatial_resolution_in_meters', 'value': json.dumps(spatial_resolution_in_meters)}) + # Resources + for distribution in self._distributions(dataset_ref): + distribution_ref = str(distribution) + for resource_dict in dataset_dict.get('resources', []): + # Match distribution in graph and distribution in resource dict + if resource_dict and distribution_ref == resource_dict.get('distribution_ref'): + # Simple values + for key, predicate in ( + ('availability', DCATAP.availability), + ): + value = self._object_value(distribution, predicate) + if value: + resource_dict[key] = value + return dataset_dict def graph_from_dataset(self, dataset_dict, dataset_ref): @@ -1560,6 +1580,18 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): except (ValueError, TypeError): self.g.add((dataset_ref, DCAT.spatialResolutionInMeters, Literal(value))) + # Resources + for resource_dict in dataset_dict.get('resources', []): + + distribution = CleanedURIRef(resource_uri(resource_dict)) + + # Simple values + items = [ + ('availability', DCATAP.availability, None, URIRefOrLiteral) + ] + + self._add_triples_from_dict(resource_dict, distribution, items) + def graph_from_catalog(self, catalog_dict, catalog_ref): # call super method diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py index f856158d..eb86880f 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from builtins import str from builtins import object import os @@ -32,11 +34,13 @@ def test_dataset_all_fields(self): isreferencedby_uri = 'https://doi.org/10.1038/sdata.2018.22' temporal_start = '1905-03-01T03:00:00+02:00' temporal_end = '2013-01-05' + dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE" data = ''' {temp_res} {spatial_res} + + + + Das ist eine deutsche Beschreibung der Distribution + 2017-02-27 + Download WFS Naturräume Geest und Marsch (GML) + 2017-03-07T10:00:00 + + + '''.format(start=temporal_start, end=temporal_end, temp_res=temporal_resolution, - spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri) + spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri, + availability=dist_availability) p = RDFParser(profiles=DCAT_AP_PROFILES) @@ -66,6 +81,7 @@ def test_dataset_all_fields(self): dataset = datasets[0] + # Dataset extras = self._extras(dataset) temporal_resolution_list = json.loads(extras['temporal_resolution']) @@ -83,6 +99,168 @@ def test_dataset_all_fields(self): assert extras['temporal_start'] == temporal_start assert extras['temporal_end'] == temporal_end + # Resources + assert len(dataset['resources']) == 1 + + resource = dataset['resources'][0] + + # Simple values + assert resource['availability'] == dist_availability + + def test_availability_distibutions_without_uri(self): + + dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE" + + data = ''' + + + + + + Das ist eine deutsche Beschreibung der Distribution + 2017-02-27 + Download WFS Naturräume Geest und Marsch (GML) + 2017-03-07T10:00:00 + + + + + + '''.format(availability=dist_availability) + + p = RDFParser(profiles=DCAT_AP_PROFILES) + + p.parse(data) + + datasets = [d for d in p.datasets()] + + assert len(datasets) == 1 + + dataset = datasets[0] + + assert len(dataset['resources']) == 1 + + resource = dataset['resources'][0] + + assert resource['availability'] == dist_availability + + def test_availability_multiple_distibutions(self): + + dist_availability_1 = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE" + dist_availability_2 = "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL" + dist_availability_3 = "http://publications.europa.eu/resource/authority/planned-availability/STABLE" + + data = ''' + + + + + + {availability_1} + 2017-02-27 + Download WFS Naturräume Geest und Marsch (GML) + 2017-03-07T10:00:00 + + + + + + + {availability_2} + 2017-02-27 + Download WFS Naturräume Geest und Marsch (GML) + 2017-03-07T10:00:00 + + + + + + + {availability_3} + 2017-02-27 + Download WFS Naturräume Geest und Marsch (GML) + 2017-03-07T10:00:00 + + + + + + '''.format(availability_1=dist_availability_1, availability_2=dist_availability_2, + availability_3=dist_availability_3) + + p = RDFParser(profiles=DCAT_AP_PROFILES) + + p.parse(data) + + datasets = [d for d in p.datasets()] + + assert len(datasets) == 1 + + dataset = datasets[0] + + assert len(dataset['resources']) == 3 + + for resource in dataset['resources']: + assert resource['availability'] == resource['description'] + + def test_availability_distibutions_literal(self): + + dist_availability = "AVAILABLE" + + data = ''' + + + + + + Das ist eine deutsche Beschreibung der Distribution + 2017-02-27 + Download WFS Naturräume Geest und Marsch (GML) + 2017-03-07T10:00:00 + {availability} + + + + + '''.format(availability=dist_availability) + + p = RDFParser(profiles=DCAT_AP_PROFILES) + + p.parse(data) + + datasets = [d for d in p.datasets()] + + assert len(datasets) == 1 + + dataset = datasets[0] + + assert len(dataset['resources']) == 1 + + resource = dataset['resources'][0] + + assert resource['availability'] == dist_availability + def test_temporal_resolution_multiple(self): g = Graph() @@ -157,6 +335,7 @@ def test_isreferencedby_multiple(self): assert isreferencedby_uri in isreferencedby_list assert isreferencedby_uri_2 in isreferencedby_list + class TestEuroDCATAP2ProfileParsingSpatial(BaseParseTest): def test_spatial_multiple_dct_spatial_instances(self): diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py index 2c5f97d0..796734aa 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from builtins import str from builtins import object import json @@ -16,7 +18,7 @@ from ckanext.dcat import utils from ckanext.dcat.processors import RDFSerializer -from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA, +from ckanext.dcat.profiles import (DCAT, DCATAP, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA, SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT) from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS from ckanext.dcat.tests.utils import BaseSerializeTest @@ -303,3 +305,63 @@ def test_temporal(self): for temporal_obj in temporal_obj_list: triples.extend(self._triples(g, temporal_obj, predicate, parse_date(extras['temporal_end']).isoformat(), XSD.dateTime)) assert len(triples) == 1 + + def test_distribution_fields(self): + + resource = { + 'id': 'c041c635-054f-4431-b647-f9186926d021', + 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'Distribution name', + 'availability': 'http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL', + } + + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'title': 'Test DCAT dataset', + 'resources': [ + resource + ] + } + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + assert len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]) == 1 + + distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] + + assert self._triple(g, distribution, RDF.type, DCAT.Distribution) + assert self._triple(g, distribution, DCATAP.availability, URIRef(resource['availability'])) + + def test_distribution_availability_literal(self): + + resource = { + 'id': 'c041c635-054f-4431-b647-f9186926d021', + 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'Distribution name', + 'availability': 'EXPERIMENTAL', + } + + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'title': 'Test DCAT dataset', + 'resources': [ + resource + ] + } + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + assert len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]) == 1 + + distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] + + assert self._triple(g, distribution, RDF.type, DCAT.Distribution) + assert self._triple(g, distribution, DCATAP.availability, Literal(resource['availability']))