From 4e434d529aeb83d316412ee67eec2f797eecfcfc Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Wed, 22 Jun 2022 12:42:25 +0200 Subject: [PATCH] Adds support for property dcat:spatialResolutionInMeters --- README.md | 1 + ckanext/dcat/profiles.py | 71 ++++++++++---- .../tests/test_euro_dcatap_2_profile_parse.py | 45 +++++++++ .../test_euro_dcatap_2_profile_serialize.py | 92 +++++++++++++++++++ 4 files changed, 192 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index bb1f426e..e54d1328 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,7 @@ This mapping is compatible with the [DCAT-AP v1.1](https://joinup.ec.europa.eu/a | dcat:Dataset | dct:spatial | extra:spatial_uri | | text | If the RDF provides them, profiles should store the textual and geometric representation of the location in extra:spatial_text, extra:spatial, extra:spatial_bbox and extra:spatial_centroid respectively | | dcat:Dataset | dct:temporal | extra:temporal_start + extra:temporal_end | | text | None, one or both extras can be present | | dcat:Dataset | dcat:temporalResolution| extra:temporal_resolution | | list | | +| dcat:Dataset | dcat:spatialResolutionInMeters| extra:spatial_resolution_in_meters | | list | | | dcat:Dataset | dct:publisher | extra:publisher_uri | | text | See note about URIs | | foaf:Agent | foaf:name | extra:publisher_name | | text | | | foaf:Agent | foaf:mbox | extra:publisher_email | organization:title | text | | diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py index 9e78144b..455df3f6 100644 --- a/ckanext/dcat/profiles.py +++ b/ckanext/dcat/profiles.py @@ -210,6 +210,24 @@ def _object_value_int(self, subject, predicate): pass return None + def _object_value_int_list(self, subject, predicate): + ''' + Given a subject and a predicate, returns the value of the object as a + list of integers + + Both subject and predicate must be rdflib URIRef or BNode objects + + If the value can not be parsed as intger, returns an empty list + ''' + object_values = [] + for object in self.g.objects(subject, predicate): + if object: + try: + object_values.append(int(float(object))) + except ValueError: + pass + return object_values + def _object_value_list(self, subject, predicate): ''' Given a subject and a predicate, returns a list with all the values of @@ -576,6 +594,24 @@ def _get_dict_value(self, _dict, key, default=None): return default + def _read_list_value(self, value): + items = [] + # List of values + if isinstance(value, list): + items = value + elif isinstance(value, basestring): + try: + items = json.loads(value) + if isinstance(items, ((int, float, complex))): + items = [items] # JSON list + except ValueError: + if ',' in value: + # Comma-separated list + items = value.split(',') + else: + items = [value] # Normal text value + return items + def _add_spatial_value_to_graph(self, spatial_ref, datatype, value): ''' Adds spatial triples to the graph. @@ -687,23 +723,7 @@ def _add_list_triple(self, subject, predicate, value, _type=Literal): item. If `value` is a string there is an attempt to split it using commas, to support legacy fields. ''' - items = [] - # List of values - if isinstance(value, list): - items = value - elif isinstance(value, basestring): - try: - # JSON list - items = json.loads(value) - if isinstance(items, ((int, int, float, complex))): - items = [items] - except ValueError: - if ',' in value: - # Comma-separated list - items = value.split(',') - else: - # Normal text value - items = [value] + items = self._read_list_value(value) for item in items: # ensure URIRef items are preprocessed (space removal/url encoding) @@ -1406,6 +1426,12 @@ def parse_dataset(self, dataset_dict, dataset_ref): for key in ('bbox', 'centroid'): self._add_spatial_to_dict(dataset_dict, key, spatial) + # Spatial resolution in meters + spatial_resolution_in_meters = self._object_value_int_list(dataset_ref, DCAT.spatialResolutionInMeters) + if spatial_resolution_in_meters: + dataset_dict['extras'].append({'key': 'spatial_resolution_in_meters', + 'value': json.dumps(spatial_resolution_in_meters)}) + return dataset_dict def graph_from_dataset(self, dataset_dict, dataset_ref): @@ -1426,6 +1452,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): if spatial_cent: self._add_spatial_value_to_graph(spatial_ref, DCAT.centroid, spatial_cent) + # Spatial resolution in meters + spatial_resolution_in_meters = self._read_list_value( + self._get_dataset_value(dataset_dict, 'spatial_resolution_in_meters')) + if spatial_resolution_in_meters: + for value in spatial_resolution_in_meters: + try: + self.g.add((dataset_ref, DCAT.spatialResolutionInMeters, + Literal(float(value), datatype=XSD.decimal))) + except (ValueError, TypeError): + self.g.add((dataset_ref, DCAT.spatialResolutionInMeters, Literal(value))) + # TemporalResolution temporal_resolution = self._get_dataset_value(dataset_dict, 'temporal_resolution') if temporal_resolution: diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py index 7eaa062e..6ccb2209 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py @@ -70,6 +70,51 @@ def test_temporal_resolution_multiple(self): assert temporal_resolution in temporal_resolution_list assert temporal_resolution_2 in temporal_resolution_list + def test_spatial_resolution_in_meters(self): + g = Graph() + + dataset = URIRef('http://example.org/datasets/1') + g.add((dataset, RDF.type, DCAT.Dataset)) + + spatial_resolution_in_meters = 30 + g.add((dataset, DCAT.spatialResolutionInMeters, Literal(spatial_resolution_in_meters, datatype=XSD.decimal))) + + p = RDFParser(profiles=DCAT_AP_PROFILES) + + p.g = g + + datasets = [d for d in p.datasets()] + + extras = self._extras(datasets[0]) + + spatial_resolution_list = json.loads(extras['spatial_resolution_in_meters']) + assert len(spatial_resolution_list) == 1 + assert spatial_resolution_in_meters in spatial_resolution_list + + def test_spatial_resolution_in_meters_multiple(self): + g = Graph() + + dataset = URIRef('http://example.org/datasets/1') + g.add((dataset, RDF.type, DCAT.Dataset)) + + spatial_resolution_in_meters = 30 + g.add((dataset, DCAT.spatialResolutionInMeters, Literal(spatial_resolution_in_meters, datatype=XSD.decimal))) + + spatial_resolution_in_meters_2 = 20 + g.add((dataset, DCAT.spatialResolutionInMeters, Literal(spatial_resolution_in_meters_2, datatype=XSD.decimal))) + + p = RDFParser(profiles=DCAT_AP_PROFILES) + + p.g = g + + datasets = [d for d in p.datasets()] + + extras = self._extras(datasets[0]) + + spatial_resolution_list = json.loads(extras['spatial_resolution_in_meters']) + assert len(spatial_resolution_list) == 2 + assert spatial_resolution_in_meters in spatial_resolution_list + assert spatial_resolution_in_meters_2 in spatial_resolution_list class TestEuroDCATAP2ProfileParsingSpatial(BaseParseTest): diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py index 153a4d61..1d6abfe9 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py @@ -40,6 +40,7 @@ def test_graph_from_dataset(self): 'metadata_modified': '2021-06-21T15:21:09.075774', 'extras': [ {'key': 'temporal_resolution', 'value': '[\"PT15M\", \"P1D\"]'}, + {'key': 'spatial_resolution_in_meters', 'value': '[30,20]'}, ] } @@ -57,6 +58,97 @@ def test_graph_from_dataset(self): assert self._triple(g, dataset_ref, DCAT.temporalResolution, Literal(value, datatype=XSD.duration)) + # Spatial Resolution in Meters + values = json.loads(extras['spatial_resolution_in_meters']) + assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == len(values) + + for value in values: + assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(float(value), + datatype=XSD.decimal)) + + def test_spatial_resolution_in_meters_single_value(self): + + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'title': 'Test DCAT 2 dataset', + 'notes': 'Lorem ipsum', + 'url': 'http://example.com/ds1', + 'version': '1.0b', + 'metadata_created': '2021-06-21T15:21:09.034694', + 'metadata_modified': '2021-06-21T15:21:09.075774', + 'extras': [ + {'key': 'spatial_resolution_in_meters', 'value': '30'} + ] + } + + extras = self._extras(dataset) + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == 1 + assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, + Literal(float(extras['spatial_resolution_in_meters']), datatype=XSD.decimal)) + + def test_spatial_resolution_in_meters_a_value_is_not_a_number(self): + + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'title': 'Test DCAT 2 dataset', + 'notes': 'Lorem ipsum', + 'url': 'http://example.com/ds1', + 'version': '1.0b', + 'metadata_created': '2021-06-21T15:21:09.034694', + 'metadata_modified': '2021-06-21T15:21:09.075774', + 'extras': [ + {'key': 'spatial_resolution_in_meters', 'value': '[\"foo\",20]'} + ] + } + + extras = self._extras(dataset) + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + values = json.loads(extras['spatial_resolution_in_meters']) + assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == len(values) + assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(values[0])) + assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, + Literal(float(values[1]), datatype=XSD.decimal)) + + def test_spatial_resolution_value_is_invalid_json(self): + + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'title': 'Test DCAT 2 dataset', + 'notes': 'Lorem ipsum', + 'url': 'http://example.com/ds1', + 'version': '1.0b', + 'metadata_created': '2021-06-21T15:21:09.034694', + 'metadata_modified': '2021-06-21T15:21:09.075774', + 'extras': [ + {'key': 'spatial_resolution_in_meters', 'value': 'foo 30'} + ] + } + + extras = self._extras(dataset) + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == 1 + assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, + Literal(extras['spatial_resolution_in_meters'])) + def test_spatial(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',