Skip to content

Commit

Permalink
Adds support for property dcat:spatialResolutionInMeters
Browse files Browse the repository at this point in the history
  • Loading branch information
seitenbau-govdata committed Jun 22, 2022
1 parent 4b92221 commit 4e434d5
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 17 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ This mapping is compatible with the [DCAT-AP v1.1](https://joinup.ec.europa.eu/a
| dcat:Dataset | dct:spatial | extra:spatial_uri | | text | If the RDF provides them, profiles should store the textual and geometric representation of the location in extra:spatial_text, extra:spatial, extra:spatial_bbox and extra:spatial_centroid respectively |
| dcat:Dataset | dct:temporal | extra:temporal_start + extra:temporal_end | | text | None, one or both extras can be present |
| dcat:Dataset | dcat:temporalResolution| extra:temporal_resolution | | list | |
| dcat:Dataset | dcat:spatialResolutionInMeters| extra:spatial_resolution_in_meters | | list | |
| dcat:Dataset | dct:publisher | extra:publisher_uri | | text | See note about URIs |
| foaf:Agent | foaf:name | extra:publisher_name | | text | |
| foaf:Agent | foaf:mbox | extra:publisher_email | organization:title | text | |
Expand Down
71 changes: 54 additions & 17 deletions ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,24 @@ def _object_value_int(self, subject, predicate):
pass
return None

def _object_value_int_list(self, subject, predicate):
'''
Given a subject and a predicate, returns the value of the object as a
list of integers
Both subject and predicate must be rdflib URIRef or BNode objects
If the value can not be parsed as intger, returns an empty list
'''
object_values = []
for object in self.g.objects(subject, predicate):
if object:
try:
object_values.append(int(float(object)))
except ValueError:
pass
return object_values

def _object_value_list(self, subject, predicate):
'''
Given a subject and a predicate, returns a list with all the values of
Expand Down Expand Up @@ -576,6 +594,24 @@ def _get_dict_value(self, _dict, key, default=None):

return default

def _read_list_value(self, value):
items = []
# List of values
if isinstance(value, list):
items = value
elif isinstance(value, basestring):
try:
items = json.loads(value)
if isinstance(items, ((int, float, complex))):
items = [items] # JSON list
except ValueError:
if ',' in value:
# Comma-separated list
items = value.split(',')
else:
items = [value] # Normal text value
return items

def _add_spatial_value_to_graph(self, spatial_ref, datatype, value):
'''
Adds spatial triples to the graph.
Expand Down Expand Up @@ -687,23 +723,7 @@ def _add_list_triple(self, subject, predicate, value, _type=Literal):
item. If `value` is a string there is an attempt to split it using
commas, to support legacy fields.
'''
items = []
# List of values
if isinstance(value, list):
items = value
elif isinstance(value, basestring):
try:
# JSON list
items = json.loads(value)
if isinstance(items, ((int, int, float, complex))):
items = [items]
except ValueError:
if ',' in value:
# Comma-separated list
items = value.split(',')
else:
# Normal text value
items = [value]
items = self._read_list_value(value)

for item in items:
# ensure URIRef items are preprocessed (space removal/url encoding)
Expand Down Expand Up @@ -1406,6 +1426,12 @@ def parse_dataset(self, dataset_dict, dataset_ref):
for key in ('bbox', 'centroid'):
self._add_spatial_to_dict(dataset_dict, key, spatial)

# Spatial resolution in meters
spatial_resolution_in_meters = self._object_value_int_list(dataset_ref, DCAT.spatialResolutionInMeters)
if spatial_resolution_in_meters:
dataset_dict['extras'].append({'key': 'spatial_resolution_in_meters',
'value': json.dumps(spatial_resolution_in_meters)})

return dataset_dict

def graph_from_dataset(self, dataset_dict, dataset_ref):
Expand All @@ -1426,6 +1452,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
if spatial_cent:
self._add_spatial_value_to_graph(spatial_ref, DCAT.centroid, spatial_cent)

# Spatial resolution in meters
spatial_resolution_in_meters = self._read_list_value(
self._get_dataset_value(dataset_dict, 'spatial_resolution_in_meters'))
if spatial_resolution_in_meters:
for value in spatial_resolution_in_meters:
try:
self.g.add((dataset_ref, DCAT.spatialResolutionInMeters,
Literal(float(value), datatype=XSD.decimal)))
except (ValueError, TypeError):
self.g.add((dataset_ref, DCAT.spatialResolutionInMeters, Literal(value)))

# TemporalResolution
temporal_resolution = self._get_dataset_value(dataset_dict, 'temporal_resolution')
if temporal_resolution:
Expand Down
45 changes: 45 additions & 0 deletions ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,51 @@ def test_temporal_resolution_multiple(self):
assert temporal_resolution in temporal_resolution_list
assert temporal_resolution_2 in temporal_resolution_list

def test_spatial_resolution_in_meters(self):
g = Graph()

dataset = URIRef('http://example.org/datasets/1')
g.add((dataset, RDF.type, DCAT.Dataset))

spatial_resolution_in_meters = 30
g.add((dataset, DCAT.spatialResolutionInMeters, Literal(spatial_resolution_in_meters, datatype=XSD.decimal)))

p = RDFParser(profiles=DCAT_AP_PROFILES)

p.g = g

datasets = [d for d in p.datasets()]

extras = self._extras(datasets[0])

spatial_resolution_list = json.loads(extras['spatial_resolution_in_meters'])
assert len(spatial_resolution_list) == 1
assert spatial_resolution_in_meters in spatial_resolution_list

def test_spatial_resolution_in_meters_multiple(self):
g = Graph()

dataset = URIRef('http://example.org/datasets/1')
g.add((dataset, RDF.type, DCAT.Dataset))

spatial_resolution_in_meters = 30
g.add((dataset, DCAT.spatialResolutionInMeters, Literal(spatial_resolution_in_meters, datatype=XSD.decimal)))

spatial_resolution_in_meters_2 = 20
g.add((dataset, DCAT.spatialResolutionInMeters, Literal(spatial_resolution_in_meters_2, datatype=XSD.decimal)))

p = RDFParser(profiles=DCAT_AP_PROFILES)

p.g = g

datasets = [d for d in p.datasets()]

extras = self._extras(datasets[0])

spatial_resolution_list = json.loads(extras['spatial_resolution_in_meters'])
assert len(spatial_resolution_list) == 2
assert spatial_resolution_in_meters in spatial_resolution_list
assert spatial_resolution_in_meters_2 in spatial_resolution_list

class TestEuroDCATAP2ProfileParsingSpatial(BaseParseTest):

Expand Down
92 changes: 92 additions & 0 deletions ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_graph_from_dataset(self):
'metadata_modified': '2021-06-21T15:21:09.075774',
'extras': [
{'key': 'temporal_resolution', 'value': '[\"PT15M\", \"P1D\"]'},
{'key': 'spatial_resolution_in_meters', 'value': '[30,20]'},
]
}

Expand All @@ -57,6 +58,97 @@ def test_graph_from_dataset(self):
assert self._triple(g, dataset_ref, DCAT.temporalResolution, Literal(value,
datatype=XSD.duration))

# Spatial Resolution in Meters
values = json.loads(extras['spatial_resolution_in_meters'])
assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == len(values)

for value in values:
assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(float(value),
datatype=XSD.decimal))

def test_spatial_resolution_in_meters_single_value(self):

dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'Test DCAT 2 dataset',
'notes': 'Lorem ipsum',
'url': 'http://example.com/ds1',
'version': '1.0b',
'metadata_created': '2021-06-21T15:21:09.034694',
'metadata_modified': '2021-06-21T15:21:09.075774',
'extras': [
{'key': 'spatial_resolution_in_meters', 'value': '30'}
]
}

extras = self._extras(dataset)

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset)

assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == 1
assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters,
Literal(float(extras['spatial_resolution_in_meters']), datatype=XSD.decimal))

def test_spatial_resolution_in_meters_a_value_is_not_a_number(self):

dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'Test DCAT 2 dataset',
'notes': 'Lorem ipsum',
'url': 'http://example.com/ds1',
'version': '1.0b',
'metadata_created': '2021-06-21T15:21:09.034694',
'metadata_modified': '2021-06-21T15:21:09.075774',
'extras': [
{'key': 'spatial_resolution_in_meters', 'value': '[\"foo\",20]'}
]
}

extras = self._extras(dataset)

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset)

values = json.loads(extras['spatial_resolution_in_meters'])
assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == len(values)
assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(values[0]))
assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters,
Literal(float(values[1]), datatype=XSD.decimal))

def test_spatial_resolution_value_is_invalid_json(self):

dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'Test DCAT 2 dataset',
'notes': 'Lorem ipsum',
'url': 'http://example.com/ds1',
'version': '1.0b',
'metadata_created': '2021-06-21T15:21:09.034694',
'metadata_modified': '2021-06-21T15:21:09.075774',
'extras': [
{'key': 'spatial_resolution_in_meters', 'value': 'foo 30'}
]
}

extras = self._extras(dataset)

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset)

assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == 1
assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters,
Literal(extras['spatial_resolution_in_meters']))

def test_spatial(self):
dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
Expand Down

0 comments on commit 4e434d5

Please sign in to comment.