Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update from ckan/ckanext-dcat #31

Merged
merged 11 commits into from
Sep 19, 2024
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.0.0...HEAD)

* Add support for hydra collection type PartialCollectionView
* Fix DCAT date validator on empty values ([#297](https://github.com/ckan/ckanext-dcat/pull/297))
* Add support for hydra collection type PartialCollectionView ([#299](https://github.com/ckan/ckanext-dcat/pull/299))

## [v2.0.0](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...v2.0.0) - 2024-08-30

Expand Down Expand Up @@ -117,7 +118,7 @@

## [v1.1.0](https://github.com/ckan/ckanext-dcat/compare/v1.0.0...v1.1.0) - 2020-03-12

* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174))
* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174))painful
* Fix `after_show - set_titles` in plugins.py ([#172](https://github.com/ckan/ckanext-dcat/pull/172))
* Add support for DCT.rightsStatement in DCT.accessRights and DCT.rights ([#177](https://github.com/ckan/ckanext-dcat/pull/177))
* Add support for additional vcard representations ([#178](https://github.com/ckan/ckanext-dcat/pull/178))
Expand Down
10 changes: 9 additions & 1 deletion ckanext/dcat/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ def dcat_to_ckan(dcat_dict):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher})
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('mbox'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_publisher_id',
'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734
})

package_dict['extras'].append({
'key': 'language',
Expand Down
3 changes: 2 additions & 1 deletion ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ def _get_from_extra(key):
('name', Literal, FOAF.name, True,),
('email', Literal, FOAF.mbox, False,),
('url', URIRef, FOAF.homepage,False,),
('type', Literal, DCT.type, False,))
('type', Literal, DCT.type, False,),
('identifier', URIRef, DCT.identifier, False,))

_pub = _get_from_extra('source_catalog_publisher')
if _pub:
Expand Down
2 changes: 2 additions & 0 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,8 @@ def _publisher(self, subject, predicate):

publisher["type"] = self._object_value(agent, DCT.type)

publisher['identifier'] = self._object_value(agent, DCT.identifier)

return publisher

def _contact_details(self, subject, predicate):
Expand Down
5 changes: 4 additions & 1 deletion ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):

# Publisher
publisher = self._publisher(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type"):
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
Expand Down Expand Up @@ -372,6 +372,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
"email": self._get_dataset_value(dataset_dict, "publisher_email"),
"url": self._get_dataset_value(dataset_dict, "publisher_url"),
"type": self._get_dataset_value(dataset_dict, "publisher_type"),
"identifier": self._get_dataset_value(dataset_dict, "publisher_identifier"),
}
elif dataset_dict.get("organization"):
# Fall back to dataset org
Expand All @@ -396,6 +397,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
"email": org_dict.get("email"),
"url": org_dict.get("url"),
"type": org_dict.get("dcat_type"),
"identifier": org_dict.get("identifier"),
}
# Add to graph
if publisher_ref:
Expand All @@ -406,6 +408,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
("email", FOAF.mbox, None, Literal),
("url", FOAF.homepage, None, URIRef),
("type", DCT.type, None, URIRefOrLiteral),
("identifier", DCT.identifier, None, URIRefOrLiteral),
]
self._add_triples_from_dict(publisher_details, publisher_ref, items)

Expand Down
7 changes: 7 additions & 0 deletions ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,13 @@ def _not_empty_dict(data_dict):
_type=URIRef,
value_modifier=self._add_mailto,
)
self._add_triple_from_dict(
publisher,
publisher_ref,
DCT.identifier,
"identifier",
_type=URIRefOrLiteral
)

temporal = dataset_dict.get("temporal_coverage")
if (
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dcat/profiles/schemaorg.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ def _publisher_graph(self, dataset_ref, dataset_dict):

self._add_triples_from_dict(dataset_dict, contact_point, items)

publisher_identifier = self._get_dataset_value(dataset_dict, "publisher_identifier")
if publisher_identifier:
self.g.add((publisher_details, SCHEMA.identifier, Literal(publisher_identifier)))

def _temporal_graph(self, dataset_ref, dataset_dict):
start = self._get_dataset_value(dataset_dict, "temporal_start")
end = self._get_dataset_value(dataset_dict, "temporal_end")
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ dataset_fields:

- field_name: type
label: Type

- field_name: identifier
label: Identifier
help_text: Unique identifier for the publisher, such as a ROR ID.
help_text: Entity responsible for making the dataset available.

- field_name: license_id
Expand Down
5 changes: 5 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_recommended.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ dataset_fields:

- field_name: type
label: Type

- field_name: identifier
label: Identifier
help_text: Unique identifier for the publisher, such as a ROR ID.

help_text: Entity responsible for making the dataset available.

- field_name: license_id
Expand Down
18 changes: 18 additions & 0 deletions ckanext/dcat/tests/logic/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,27 @@ def test_dcat_date_invalid():
invalid_values = [
"2024+07",
"not_a_date",
True
]

for value in invalid_values:
data = {key: value}
with pytest.raises(Invalid):
dcat_date(key, data, errors, {}), value


def test_dcat_date_empty_values():

key = ("some_date",)
errors = {key: []}
valid_values = [
None,
False,
""
]

for value in valid_values:
data = {key: value}
dcat_date(key, data, errors, {}), value

assert data[key] is None
2 changes: 2 additions & 0 deletions ckanext/dcat/tests/profiles/base/test_base_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ def test_publisher_foaf(self):
<foaf:mbox>[email protected]</foaf:mbox>
<foaf:homepage>http://some.org</foaf:homepage>
<dct:type rdf:resource="http://purl.org/adms/publishertype/NonProfitOrganisation"/>
<dct:identifier rdf:resource="https://ror.org/05wg1m734"/>
</foaf:Organization>
</dct:publisher>
</rdfs:SomeClass>
Expand All @@ -666,6 +667,7 @@ def test_publisher_foaf(self):
assert publisher['email'] == '[email protected]'
assert publisher['url'] == 'http://some.org'
assert publisher['type'] == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
assert publisher['identifier'] == 'https://ror.org/05wg1m734'

def test_publisher_ref(self):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def _get_extra_value_as_list(key):
assert _get_extra_value('publisher_email') == '[email protected]'
assert _get_extra_value('publisher_url') == 'http://some.org'
assert _get_extra_value('publisher_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
assert _get_extra_value('publisher_identifier') == 'https://ror.org/05wg1m734'
assert _get_extra_value('contact_name') == 'Point of Contact'
# mailto gets removed for storage and is added again on output
assert _get_extra_value('contact_email') == '[email protected]'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_e2e_ckan_to_dcat(self):
"email": "[email protected]",
"url": "https://example.org",
"type": "public_body",
"identifier": "http://example.org/publisher-id",
},
],
"temporal_coverage": [
Expand Down Expand Up @@ -301,6 +302,12 @@ def test_e2e_ckan_to_dcat(self):
DCT.type,
dataset_dict["publisher"][0]["type"],
)
assert self._triple(
g,
publisher[0][2],
DCT.identifier,
URIRef(dataset_dict["publisher"][0]["identifier"])
)

temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def test_publisher_extras(self):
{'key': 'publisher_email', 'value': '[email protected]'},
{'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
{'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
{'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'},
]


Expand All @@ -121,6 +122,7 @@ def test_publisher_extras(self):
assert str(publisher) == extras['publisher_uri']
assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name'])
assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier'])

contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
assert contact_point
Expand All @@ -144,6 +146,7 @@ def test_publisher_no_uri(self):
{'key': 'publisher_email', 'value': '[email protected]'},
{'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
{'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
{'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'},
]
}
extras = self._extras(dataset)
Expand All @@ -158,6 +161,7 @@ def test_publisher_no_uri(self):
assert isinstance(publisher, BNode)
assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name'])
assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier'])

contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
assert contact_point
Expand Down
1 change: 1 addition & 0 deletions ckanext/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def field_labels():
'publisher_email': _('Publisher email'),
'publisher_url': _('Publisher URL'),
'publisher_type': _('Publisher type'),
'publisher_identifier': _('Publisher identifier'),
'contact_name': _('Contact name'),
'contact_email': _('Contact email'),
'contact_uri': _('Contact URI'),
Expand Down
19 changes: 16 additions & 3 deletions ckanext/dcat/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
Invalid,
_,
)
from ckanext.scheming.validation import scheming_validator

try:
from ckanext.scheming.validation import scheming_validator
except ImportError:
def scheming_validator(func):
return func


# https://www.w3.org/TR/xmlschema11-2/#gYear
regexp_xsd_year = re.compile(
Expand Down Expand Up @@ -41,12 +47,19 @@ def is_date(value):
def dcat_date(key, data, errors, context):
value = data[key]

if isinstance(value, datetime.datetime):
if not value:
data[key] = None
return

if is_year(value) or is_year_month(value) or is_date(value):
if isinstance(value, datetime.datetime):
return

try:
if is_year(value) or is_year_month(value) or is_date(value):
return
except TypeError:
raise Invalid(_("Dates must be provided as strings or datetime objects"))

try:
parse_date(value)
except ValueError:
Expand Down
Loading