Skip to content

Commit

Permalink
Adds support for the latest Hydra vocabulary
Browse files Browse the repository at this point in the history
  • Loading branch information
seitenbau-govdata committed Apr 2, 2024
1 parent e2183dd commit e75e555
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...HEAD)

* Adds support for the latest Hydra vocabulary. For backward compatibility, the old properties are still supported but marked as deprecated.

## [v1.6.0](https://github.com/ckan/ckanext-dcat/compare/v1.5.1...v1.6.0) - 2024-02-29

Expand Down
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,9 @@ The number of datasets returned is limited. The response will include paging inf
@prefix hydra: <http://www.w3.org/ns/hydra/core#> .

<http://example.com/catalog.ttl?page=1> a hydra:PagedCollection ;
hydra:firstPage "http://example.com/catalog.ttl?page=1" ;
hydra:itemsPerPage 100 ;
hydra:lastPage "http://example.com/catalog.ttl?page=3" ;
hydra:nextPage "http://example.com/catalog.ttl?page=2" ;
hydra:first "http://example.com/catalog.ttl?page=1" ;
hydra:last "http://example.com/catalog.ttl?page=3" ;
hydra:next "http://example.com/catalog.ttl?page=2" ;
hydra:totalItems 283 .

The default number of datasets returned (100) can be modified by CKAN site maintainers using the following configuration option on your ini file:
Expand Down
29 changes: 18 additions & 11 deletions ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,15 @@ def next_page(self):
Returns the URL of the next page or None if there is no next page
'''
for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection):
# Try to find HYDRA.next first
for o in self.g.objects(pagination_node, HYDRA.next):
return str(o)

# If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
for o in self.g.objects(pagination_node, HYDRA.nextPage):
return str(o)
return None


def parse(self, data, _format=None):
'''
Parses and RDF graph serialization and into the class graph
Expand Down Expand Up @@ -178,7 +182,6 @@ def datasets(self):

yield dataset_dict


class RDFSerializer(RDFProcessor):
'''
A CKAN to RDF serializer based on rdflib
Expand Down Expand Up @@ -209,19 +212,23 @@ def _add_pagination_triples(self, paging_info):
pagination_ref = BNode()
self.g.add((pagination_ref, RDF.type, HYDRA.PagedCollection))

# The predicates `nextPage`, `previousPage`, `firstPage`, `lastPage`
# and `itemsPerPage` are deprecated and will be removed in the future
items = [
('next', HYDRA.nextPage),
('previous', HYDRA.previousPage),
('first', HYDRA.firstPage),
('last', HYDRA.lastPage),
('count', HYDRA.totalItems),
('items_per_page', HYDRA.itemsPerPage),
('next', [HYDRA.nextPage, HYDRA.next]),
('previous', [HYDRA.previousPage, HYDRA.previous]),
('first', [HYDRA.firstPage, HYDRA.first]),
('last', [HYDRA.lastPage, HYDRA.last]),
('count', [HYDRA.totalItems]),
('items_per_page', [HYDRA.itemsPerPage]),
]

for item in items:
key, predicate = item
key, predicates = item
if paging_info.get(key):
self.g.add((pagination_ref, predicate,
Literal(paging_info[key])))
for predicate in predicates:
self.g.add((pagination_ref, predicate,
Literal(paging_info[key])))

return pagination_ref

Expand Down
46 changes: 45 additions & 1 deletion ckanext/dcat/tests/test_base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_parse_data(self):

assert len(p.g) == 2

def test_parse_pagination_next_page(self):
def test_parse_pagination_next_page_deprecated_vocabulary_only(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
Expand All @@ -163,6 +163,50 @@ def test_parse_pagination_next_page(self):

assert p.next_page() == 'http://example.com/catalog.xml?page=2'

def test_parse_pagination_next_page_updated_vocabulary_only(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:hydra="http://www.w3.org/ns/hydra/core#">
<hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
<hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
<hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
<hydra:next>http://example.com/catalog.xml?page=2</hydra:next>
<hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
</hydra:PagedCollection>
</rdf:RDF>
'''

p = RDFParser()

p.parse(data)

assert p.next_page() == 'http://example.com/catalog.xml?page=2'

def test_parse_pagination_next_page_both_vocabularies(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:hydra="http://www.w3.org/ns/hydra/core#">
<hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
<hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
<hydra:next>http://example.com/catalog.xml?page=next</hydra:next>
<hydra:nextPage>http://example.com/catalog.xml?page=nextPage</hydra:nextPage>
<hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
</hydra:PagedCollection>
</rdf:RDF>
'''

p = RDFParser()

p.parse(data)

assert p.next_page() == 'http://example.com/catalog.xml?page=next'

def test_parse_without_pagination(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
Expand Down
73 changes: 72 additions & 1 deletion ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ckantoolkit.tests import helpers, factories

from ckanext.dcat import utils
from ckanext.dcat.processors import RDFSerializer
from ckanext.dcat.processors import RDFSerializer, HYDRA
from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT,
DISTRIBUTION_LICENSE_FALLBACK_CONFIG)
Expand Down Expand Up @@ -1250,6 +1250,77 @@ def test_subcatalog(self):
assert len(dataset_title) == 1
assert str(dataset_title[0]) == dataset['title']

def test_catalog_pagination(self):
dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'test dataset',
'extras': [
{'key': 'source_catalog_title', 'value': 'Subcatalog example'},
{'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
{'key': 'source_catalog_description', 'value': 'Subcatalog example description'}
]
}
catalog_dict = {
'title': 'My Catalog',
'description': 'An Open Data Catalog',
'homepage': 'http://example.com',
'language': 'de',
}

expected_first = 'http://subcatalog.example?page=1'
expected_next = 'http://subcatalog.example?page=2'
expected_last = 'http://subcatalog.example?page=3'

pagination = {
'count': 12,
'items_per_page': 5,
'current':expected_first,
'first':expected_first,
'last':expected_last,
'next':expected_next,
}

s = RDFSerializer(profiles=['euro_dcat_ap'])
g = s.g

s.serialize_catalog(catalog_dict, dataset_dicts=[dataset], pagination_info=pagination)

paged_collection = list(g.subjects(RDF.type, HYDRA.PagedCollection))
assert len(paged_collection) == 1

# Pagination item: next
next = list(g.objects(paged_collection[0], HYDRA.next))
assert len(next) == 1
assert str(next[0]) == expected_next
next_page = list(g.objects(paged_collection[0], HYDRA.nextPage))
assert len(next_page) == 1
assert str(next_page[0]) == expected_next

# Pagination item: previous
previous_page = list(g.objects(paged_collection[0], HYDRA.previousPage))
assert len(previous_page) == 0
previous = list(g.objects(paged_collection[0], HYDRA.previous))
assert len(previous) == 0

# Pagination item: last
last = list(g.objects(paged_collection[0], HYDRA.last))
assert len(last) == 1
assert str(last[0]) == expected_last
last_page = list(g.objects(paged_collection[0], HYDRA.lastPage))
assert len(last_page) == 1
assert str(last_page[0]) == expected_last

# Pagination item: count
total_items = list(g.objects(paged_collection[0], HYDRA.totalItems))
assert len(total_items) == 1
assert str(total_items[0]) == "12"

# Pagination item: items_per_page
items_per_page = list(g.objects(paged_collection[0], HYDRA.itemsPerPage))
assert len(items_per_page) == 1
assert str(items_per_page[0]) == "5"

@pytest.mark.ckan_config(DISTRIBUTION_LICENSE_FALLBACK_CONFIG, 'true')
def test_set_missing_license_for_resource(self):
''' Check the behavior if param in config is set: Add license_id to the resource'''
Expand Down

0 comments on commit e75e555

Please sign in to comment.