diff --git a/README.md b/README.md
index 5e77c4df..3b521afe 100644
--- a/README.md
+++ b/README.md
@@ -375,6 +375,7 @@ This mapping is compatible with the [DCAT-AP v1.1](https://joinup.ec.europa.eu/a
| dcat:Distribution | foaf:page | resource:documentation | | list | See note about lists |
| dcat:Distribution | dct:language | resource:language | | list | See note about lists |
| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See note about lists |
+| dcat:Distribution | dcatap:availability | resource:availability | | text | See note about URIs |
| spdx:Checksum | spdx:checksumValue | resource:hash | | text | |
| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | |
diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py
index 4a8b97de..97f15c2f 100644
--- a/ckanext/dcat/profiles.py
+++ b/ckanext/dcat/profiles.py
@@ -25,6 +25,7 @@
DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")
+DCATAP = Namespace("http://data.europa.eu/r5r/")
ADMS = Namespace("http://www.w3.org/ns/adms#")
VCARD = Namespace("http://www.w3.org/2006/vcard/ns#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
@@ -40,6 +41,7 @@
namespaces = {
'dct': DCT,
'dcat': DCAT,
+ 'dcatap': DCATAP,
'adms': ADMS,
'vcard': VCARD,
'foaf': FOAF,
@@ -1172,6 +1174,10 @@ def parse_dataset(self, dataset_dict, dataset_ref):
rdflib.term.URIRef)
else '')
+ # Remember the (internal) distribution reference for referencing in
+ # further profiles, e.g. for adding more properties
+ resource_dict['distribution_ref'] = str(distribution)
+
dataset_dict['resources'].append(resource_dict)
if self.compatibility_mode:
@@ -1508,6 +1514,20 @@ def parse_dataset(self, dataset_dict, dataset_ref):
dataset_dict['extras'].append({'key': 'spatial_resolution_in_meters',
'value': json.dumps(spatial_resolution_in_meters)})
+ # Resources
+ for distribution in self._distributions(dataset_ref):
+ distribution_ref = str(distribution)
+ for resource_dict in dataset_dict.get('resources', []):
+ # Match distribution in graph and distribution in resource dict
+ if resource_dict and distribution_ref == resource_dict.get('distribution_ref'):
+ # Simple values
+ for key, predicate in (
+ ('availability', DCATAP.availability),
+ ):
+ value = self._object_value(distribution, predicate)
+ if value:
+ resource_dict[key] = value
+
return dataset_dict
def graph_from_dataset(self, dataset_dict, dataset_ref):
@@ -1560,6 +1580,18 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
except (ValueError, TypeError):
self.g.add((dataset_ref, DCAT.spatialResolutionInMeters, Literal(value)))
+ # Resources
+ for resource_dict in dataset_dict.get('resources', []):
+
+ distribution = CleanedURIRef(resource_uri(resource_dict))
+
+ # Simple values
+ items = [
+ ('availability', DCATAP.availability, None, URIRefOrLiteral)
+ ]
+
+ self._add_triples_from_dict(resource_dict, distribution, items)
+
def graph_from_catalog(self, catalog_dict, catalog_ref):
# call super method
diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py
index f856158d..eb86880f 100644
--- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py
+++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
from builtins import str
from builtins import object
import os
@@ -32,11 +34,13 @@ def test_dataset_all_fields(self):
isreferencedby_uri = 'https://doi.org/10.1038/sdata.2018.22'
temporal_start = '1905-03-01T03:00:00+02:00'
temporal_end = '2013-01-05'
+ dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"
data = '''
{temp_res}
{spatial_res}
+
+
+
+ Das ist eine deutsche Beschreibung der Distribution
+ 2017-02-27
+ Download WFS Naturräume Geest und Marsch (GML)
+ 2017-03-07T10:00:00
+
+
+
'''.format(start=temporal_start, end=temporal_end, temp_res=temporal_resolution,
- spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri)
+ spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri,
+ availability=dist_availability)
p = RDFParser(profiles=DCAT_AP_PROFILES)
@@ -66,6 +81,7 @@ def test_dataset_all_fields(self):
dataset = datasets[0]
+ # Dataset
extras = self._extras(dataset)
temporal_resolution_list = json.loads(extras['temporal_resolution'])
@@ -83,6 +99,168 @@ def test_dataset_all_fields(self):
assert extras['temporal_start'] == temporal_start
assert extras['temporal_end'] == temporal_end
+ # Resources
+ assert len(dataset['resources']) == 1
+
+ resource = dataset['resources'][0]
+
+ # Simple values
+ assert resource['availability'] == dist_availability
+
+ def test_availability_distibutions_without_uri(self):
+
+ dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"
+
+ data = '''
+
+
+
+
+
+ Das ist eine deutsche Beschreibung der Distribution
+ 2017-02-27
+ Download WFS Naturräume Geest und Marsch (GML)
+ 2017-03-07T10:00:00
+
+
+
+
+
+ '''.format(availability=dist_availability)
+
+ p = RDFParser(profiles=DCAT_AP_PROFILES)
+
+ p.parse(data)
+
+ datasets = [d for d in p.datasets()]
+
+ assert len(datasets) == 1
+
+ dataset = datasets[0]
+
+ assert len(dataset['resources']) == 1
+
+ resource = dataset['resources'][0]
+
+ assert resource['availability'] == dist_availability
+
+ def test_availability_multiple_distibutions(self):
+
+ dist_availability_1 = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"
+ dist_availability_2 = "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL"
+ dist_availability_3 = "http://publications.europa.eu/resource/authority/planned-availability/STABLE"
+
+ data = '''
+
+
+
+
+
+ {availability_1}
+ 2017-02-27
+ Download WFS Naturräume Geest und Marsch (GML)
+ 2017-03-07T10:00:00
+
+
+
+
+
+
+ {availability_2}
+ 2017-02-27
+ Download WFS Naturräume Geest und Marsch (GML)
+ 2017-03-07T10:00:00
+
+
+
+
+
+
+ {availability_3}
+ 2017-02-27
+ Download WFS Naturräume Geest und Marsch (GML)
+ 2017-03-07T10:00:00
+
+
+
+
+
+ '''.format(availability_1=dist_availability_1, availability_2=dist_availability_2,
+ availability_3=dist_availability_3)
+
+ p = RDFParser(profiles=DCAT_AP_PROFILES)
+
+ p.parse(data)
+
+ datasets = [d for d in p.datasets()]
+
+ assert len(datasets) == 1
+
+ dataset = datasets[0]
+
+ assert len(dataset['resources']) == 3
+
+ for resource in dataset['resources']:
+ assert resource['availability'] == resource['description']
+
+ def test_availability_distibutions_literal(self):
+
+ dist_availability = "AVAILABLE"
+
+ data = '''
+
+
+
+
+
+ Das ist eine deutsche Beschreibung der Distribution
+ 2017-02-27
+ Download WFS Naturräume Geest und Marsch (GML)
+ 2017-03-07T10:00:00
+ {availability}
+
+
+
+
+ '''.format(availability=dist_availability)
+
+ p = RDFParser(profiles=DCAT_AP_PROFILES)
+
+ p.parse(data)
+
+ datasets = [d for d in p.datasets()]
+
+ assert len(datasets) == 1
+
+ dataset = datasets[0]
+
+ assert len(dataset['resources']) == 1
+
+ resource = dataset['resources'][0]
+
+ assert resource['availability'] == dist_availability
+
def test_temporal_resolution_multiple(self):
g = Graph()
@@ -157,6 +335,7 @@ def test_isreferencedby_multiple(self):
assert isreferencedby_uri in isreferencedby_list
assert isreferencedby_uri_2 in isreferencedby_list
+
class TestEuroDCATAP2ProfileParsingSpatial(BaseParseTest):
def test_spatial_multiple_dct_spatial_instances(self):
diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py
index 2c5f97d0..796734aa 100644
--- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py
+++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
from builtins import str
from builtins import object
import json
@@ -16,7 +18,7 @@
from ckanext.dcat import utils
from ckanext.dcat.processors import RDFSerializer
-from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
+from ckanext.dcat.profiles import (DCAT, DCATAP, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT)
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
from ckanext.dcat.tests.utils import BaseSerializeTest
@@ -303,3 +305,63 @@ def test_temporal(self):
for temporal_obj in temporal_obj_list:
triples.extend(self._triples(g, temporal_obj, predicate, parse_date(extras['temporal_end']).isoformat(), XSD.dateTime))
assert len(triples) == 1
+
+ def test_distribution_fields(self):
+
+ resource = {
+ 'id': 'c041c635-054f-4431-b647-f9186926d021',
+ 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
+ 'name': 'Distribution name',
+ 'availability': 'http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL',
+ }
+
+ dataset = {
+ 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
+ 'name': 'test-dataset',
+ 'title': 'Test DCAT dataset',
+ 'resources': [
+ resource
+ ]
+ }
+
+ s = RDFSerializer(profiles=DCAT_AP_PROFILES)
+ g = s.g
+
+ dataset_ref = s.graph_from_dataset(dataset)
+
+ assert len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]) == 1
+
+ distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
+
+ assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
+ assert self._triple(g, distribution, DCATAP.availability, URIRef(resource['availability']))
+
+ def test_distribution_availability_literal(self):
+
+ resource = {
+ 'id': 'c041c635-054f-4431-b647-f9186926d021',
+ 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
+ 'name': 'Distribution name',
+ 'availability': 'EXPERIMENTAL',
+ }
+
+ dataset = {
+ 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
+ 'name': 'test-dataset',
+ 'title': 'Test DCAT dataset',
+ 'resources': [
+ resource
+ ]
+ }
+
+ s = RDFSerializer(profiles=DCAT_AP_PROFILES)
+ g = s.g
+
+ dataset_ref = s.graph_from_dataset(dataset)
+
+ assert len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]) == 1
+
+ distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
+
+ assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
+ assert self._triple(g, distribution, DCATAP.availability, Literal(resource['availability']))