From 76701536cee7168fd80ec673a95c44c37f2b3dd8 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 2 Sep 2024 14:37:38 +0200 Subject: [PATCH 1/7] Fix DCAT date validator on empty values --- ckanext/dcat/tests/logic/test_validators.py | 18 ++++++++++++++++++ ckanext/dcat/validators.py | 11 +++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/ckanext/dcat/tests/logic/test_validators.py b/ckanext/dcat/tests/logic/test_validators.py index 700cc644..562bd067 100644 --- a/ckanext/dcat/tests/logic/test_validators.py +++ b/ckanext/dcat/tests/logic/test_validators.py @@ -90,9 +90,27 @@ def test_dcat_date_invalid(): invalid_values = [ "2024+07", "not_a_date", + True ] for value in invalid_values: data = {key: value} with pytest.raises(Invalid): dcat_date(key, data, errors, {}), value + + +def test_dcat_date_empty_values(): + + key = ("some_date",) + errors = {key: []} + valid_values = [ + None, + False, + "" + ] + + for value in valid_values: + data = {key: value} + dcat_date(key, data, errors, {}), value + + assert data[key] is None diff --git a/ckanext/dcat/validators.py b/ckanext/dcat/validators.py index c9ee7d50..9bf18d49 100644 --- a/ckanext/dcat/validators.py +++ b/ckanext/dcat/validators.py @@ -41,12 +41,19 @@ def is_date(value): def dcat_date(key, data, errors, context): value = data[key] - if isinstance(value, datetime.datetime): + if not value: + data[key] = None return - if is_year(value) or is_year_month(value) or is_date(value): + if isinstance(value, datetime.datetime): return + try: + if is_year(value) or is_year_month(value) or is_date(value): + return + except TypeError: + raise Invalid(_("Dates must be provided as strings or datetime objects")) + try: parse_date(value) except ValueError: From 748b23fc621462b8413b87f5e336bc2745b02287 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 2 Sep 2024 14:53:36 +0200 Subject: [PATCH 2/7] Changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 059dcbfa..da0748b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.0.0...HEAD) -* ... +* Fix DCAT date validator on empty values ([#297](https://github.com/ckan/ckanext-dcat/pull/297)) ## [v2.0.0](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...v2.0.0) - 2024-08-30 From aefa22c91b97899d18017645ac59a2959d73f1e4 Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 9 Sep 2024 09:34:38 +0200 Subject: [PATCH 3/7] feat: add support for dct:identifier in publisher details --- ckanext/dcat/converters.py | 13 +- ckanext/dcat/processors.py | 3 +- ckanext/dcat/profiles/base.py | 2 + ckanext/dcat/profiles/euro_dcat_ap_base.py | 2 +- ckanext/dcat/profiles/schemaorg.py | 4 + ckanext/dcat/schemas/dcat_ap_recommended.yaml | 5 + .../tests/profiles/base/test_base_profile.py | 2 + .../dcat_ap/test_euro_dcatap_profile_parse.py | 1 + .../test_schemaorg_profile_serialize.py | 4 + docs/mapping.md | 137 +++++++++--------- examples/dcat/dataset.rdf | 1 + 11 files changed, 102 insertions(+), 72 deletions(-) diff --git a/ckanext/dcat/converters.py b/ckanext/dcat/converters.py index ddce1723..2e27a0ee 100644 --- a/ckanext/dcat/converters.py +++ b/ckanext/dcat/converters.py @@ -27,8 +27,17 @@ def dcat_to_ckan(dcat_dict): if isinstance(dcat_publisher, basestring): package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher}) elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'): - package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) - package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) + if dcat_publisher.get('name'): + package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) + + if dcat_publisher.get('mbox'): + package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) + + if dcat_publisher.get('identifier'): + package_dict['extras'].append({ + 'key': 'dcat_publisher_id', + 'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734 + }) package_dict['extras'].append({ 'key': 'language', diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index acf5af57..932d58f5 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -407,7 +407,8 @@ def _get_from_extra(key): ('name', Literal, FOAF.name, True,), ('email', Literal, FOAF.mbox, False,), ('url', URIRef, FOAF.homepage,False,), - ('type', Literal, DCT.type, False,)) + ('type', Literal, DCT.type, False,), + ('identifier', URIRef, DCT.identifier, False,)) _pub = _get_from_extra('source_catalog_publisher') if _pub: diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 396aa152..fd5af492 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -468,6 +468,8 @@ def _publisher(self, subject, predicate): publisher["type"] = self._object_value(agent, DCT.type) + publisher['identifier'] = self._object_value(agent, DCT.identifier) + return publisher def _contact_details(self, subject, predicate): diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index 56525a28..110e04dd 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -123,7 +123,7 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): # Publisher publisher = self._publisher(dataset_ref, DCT.publisher) - for key in ("uri", "name", "email", "url", "type"): + for key in ("uri", "name", "email", "url", "type", "identifier"): if publisher.get(key): dataset_dict["extras"].append( {"key": "publisher_{0}".format(key), "value": publisher.get(key)} diff --git a/ckanext/dcat/profiles/schemaorg.py b/ckanext/dcat/profiles/schemaorg.py index 3b3ec3b0..88e30be6 100644 --- a/ckanext/dcat/profiles/schemaorg.py +++ b/ckanext/dcat/profiles/schemaorg.py @@ -221,6 +221,10 @@ def _publisher_graph(self, dataset_ref, dataset_dict): self._add_triples_from_dict(dataset_dict, contact_point, items) + publisher_identifier = self._get_dataset_value(dataset_dict, "publisher_identifier") + if publisher_identifier: + self.g.add((publisher_details, SCHEMA.identifier, Literal(publisher_identifier))) + def _temporal_graph(self, dataset_ref, dataset_dict): start = self._get_dataset_value(dataset_dict, "temporal_start") end = self._get_dataset_value(dataset_dict, "temporal_end") diff --git a/ckanext/dcat/schemas/dcat_ap_recommended.yaml b/ckanext/dcat/schemas/dcat_ap_recommended.yaml index ed386d67..883f337d 100644 --- a/ckanext/dcat/schemas/dcat_ap_recommended.yaml +++ b/ckanext/dcat/schemas/dcat_ap_recommended.yaml @@ -66,6 +66,11 @@ dataset_fields: - field_name: type label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the publisher, such as a ROR ID. + help_text: Entity responsible for making the dataset available. - field_name: license_id diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py index 235b001f..221c772c 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_profile.py +++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py @@ -647,6 +647,7 @@ def test_publisher_foaf(self): contact@some.org http://some.org + @@ -666,6 +667,7 @@ def test_publisher_foaf(self): assert publisher['email'] == 'contact@some.org' assert publisher['url'] == 'http://some.org' assert publisher['type'] == 'http://purl.org/adms/publishertype/NonProfitOrganisation' + assert publisher['identifier'] == 'https://ror.org/05wg1m734' def test_publisher_ref(self): diff --git a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py index b9ecc880..d2b84ae4 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py +++ b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py @@ -113,6 +113,7 @@ def _get_extra_value_as_list(key): assert _get_extra_value('publisher_email') == 'contact@some.org' assert _get_extra_value('publisher_url') == 'http://some.org' assert _get_extra_value('publisher_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation' + assert _get_extra_value('publisher_identifier') == 'https://ror.org/05wg1m734' assert _get_extra_value('contact_name') == 'Point of Contact' # mailto gets removed for storage and is added again on output assert _get_extra_value('contact_email') == 'contact@some.org' diff --git a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py index 0d1949d5..eb9a5eb4 100644 --- a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py @@ -105,6 +105,7 @@ def test_publisher_extras(self): {'key': 'publisher_email', 'value': 'publisher@example.com'}, {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'}, {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'}, + {'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'}, ] @@ -121,6 +122,7 @@ def test_publisher_extras(self): assert str(publisher) == extras['publisher_uri'] assert self._triple(g, publisher, RDF.type, SCHEMA.Organization) assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name']) + assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier']) contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] assert contact_point @@ -144,6 +146,7 @@ def test_publisher_no_uri(self): {'key': 'publisher_email', 'value': 'publisher@example.com'}, {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'}, {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'}, + {'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'}, ] } extras = self._extras(dataset) @@ -158,6 +161,7 @@ def test_publisher_no_uri(self): assert isinstance(publisher, BNode) assert self._triple(g, publisher, RDF.type, SCHEMA.Organization) assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name']) + assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier']) contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] assert contact_point diff --git a/docs/mapping.md b/docs/mapping.md index 823635a3..89057ab2 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -18,75 +18,76 @@ some cases the way metadata is stored internally and presented at the CKAN API l fields are properly validated, can use the scheming snippets etc. See [Schemas](getting-started.md#schemas) for more details. -| DCAT class | DCAT property | CKAN dataset field | CKAN fallback fields | Stored as | | -|-------------------|------------------------|-------------------------------------------|--------------------------------|-----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| -| dcat:Dataset | - | custom:uri | | text | See [URIs](mapping.md#uris) | -| dcat:Dataset | dct:title | title | | text | | -| dcat:Dataset | dct:description | notes | | text | | -| dcat:Dataset | dcat:keyword | tags | | text | | -| dcat:Dataset | dcat:theme | custom:theme | | list | See [Lists](#lists) | -| dcat:Dataset | dct:identifier | custom:identifier | custom:guid, id | text | | -| dcat:Dataset | adms:identifier | custom:alternate_identifier | | text | | -| dcat:Dataset | dct:issued | custom:issued | metadata_created | text | | -| dcat:Dataset | dct:modified | custom:modified | metadata_modified | text | | -| dcat:Dataset | owl:versionInfo | version | custom:dcat_version | text | | -| dcat:Dataset | adms:versionNotes | custom:version_notes | | text | | -| dcat:Dataset | dct:language | custom:language | | list | See [Lists](#lists) | -| dcat:Dataset | dcat:landingPage | url | | text | | -| dcat:Dataset | dct:accrualPeriodicity | custom:frequency | | text | | -| dcat:Dataset | dct:conformsTo | custom:conforms_to | | list | See [Lists](#lists) | -| dcat:Dataset | dct:accessRights | custom:access_rights | | text | | -| dcat:Dataset | foaf:page | custom:documentation | | list | See [Lists](#lists) | -| dcat:Dataset | dct:provenance | custom:provenance | | text | | -| dcat:Dataset | dct:type | custom:dcat_type | | text | | -| dcat:Dataset | dct:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | dct:isVersionOf | custom:is_version_of | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | dct:source | custom:source | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | adms:sample | custom:sample | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to dcat:Distribution instances | -| dcat:Dataset | dct:spatial | custom:spatial_uri | | text | See [Spatial coverage](#spatial-coverage) | +| DCAT class | DCAT property | CKAN dataset field | CKAN fallback fields | Stored as | | +|-------------------|------------------------|---------------------------------------------|--------------------------------|-----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| dcat:Dataset | - | custom:uri | | text | See [URIs](mapping.md#uris) | +| dcat:Dataset | dct:title | title | | text | | +| dcat:Dataset | dct:description | notes | | text | | +| dcat:Dataset | dcat:keyword | tags | | text | | +| dcat:Dataset | dcat:theme | custom:theme | | list | See [Lists](#lists) | +| dcat:Dataset | dct:identifier | custom:identifier | custom:guid, id | text | | +| dcat:Dataset | adms:identifier | custom:alternate_identifier | | text | | +| dcat:Dataset | dct:issued | custom:issued | metadata_created | text | | +| dcat:Dataset | dct:modified | custom:modified | metadata_modified | text | | +| dcat:Dataset | owl:versionInfo | version | custom:dcat_version | text | | +| dcat:Dataset | adms:versionNotes | custom:version_notes | | text | | +| dcat:Dataset | dct:language | custom:language | | list | See [Lists](#lists) | +| dcat:Dataset | dcat:landingPage | url | | text | | +| dcat:Dataset | dct:accrualPeriodicity | custom:frequency | | text | | +| dcat:Dataset | dct:conformsTo | custom:conforms_to | | list | See [Lists](#lists) | +| dcat:Dataset | dct:accessRights | custom:access_rights | | text | | +| dcat:Dataset | foaf:page | custom:documentation | | list | See [Lists](#lists) | +| dcat:Dataset | dct:provenance | custom:provenance | | text | | +| dcat:Dataset | dct:type | custom:dcat_type | | text | | +| dcat:Dataset | dct:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | dct:isVersionOf | custom:is_version_of | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | dct:source | custom:source | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | adms:sample | custom:sample | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to dcat:Distribution instances | +| dcat:Dataset | dct:spatial | custom:spatial_uri | | text | See [Spatial coverage](#spatial-coverage) | | dcat:Dataset | dct:temporal | custom:temporal_start + custom:temporal_end | | text | None, one or both extras can be present | -| dcat:Dataset | dcat:temporalResolution| custom:temporal_resolution | | list | | -| dcat:Dataset | dcat:spatialResolutionInMeters| custom:spatial_resolution_in_meters | | list | | -| dcat:Dataset | dct:isReferencedBy | custom:is_referenced_by | | list | | -| dcat:Dataset | dct:publisher | custom:publisher_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | -| foaf:Agent | foaf:name | custom:publisher_name | | text | | -| foaf:Agent | foaf:mbox | custom:publisher_email | organization:title | text | | -| foaf:Agent | foaf:homepage | custom:publisher_url | | text | | -| foaf:Agent | dct:type | custom:publisher_type | | text | | -| dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) | -| vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | | -| vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | | -| dcat:Dataset | dcat:distribution | resources | | text | | -| dcat:Distribution | - | resource:uri | | text | See [URIs](mapping.md#uris) | -| dcat:Distribution | dct:title | resource:name | | text | | -| dcat:Distribution | dcat:accessURL | resource:access_url | resource:url | text | If downloadURL is not present, accessURL will be used as resource url | -| dcat:Distribution | dcat:downloadURL | resource:download_url | | text | If present, downloadURL will be used as resource url | -| dcat:Distribution | dct:description | resource:description | | text | | -| dcat:Distribution | dcat:mediaType | resource:mimetype | | text | | -| dcat:Distribution | dct:format | resource:format | | text | | -| dcat:Distribution | dct:license | resource:license | | text | See [Licenses](#licenses) | -| dcat:Distribution | adms:status | resource:status | | text | | -| dcat:Distribution | dcat:byteSize | resource:size | | number | | -| dcat:Distribution | dct:issued | resource:issued | created | text | | -| dcat:Distribution | dct:modified | resource:modified | metadata_modified | text | | -| dcat:Distribution | dct:rights | resource:rights | | text | | -| dcat:Distribution | foaf:page | resource:documentation | | list | See [Lists](#lists) | -| dcat:Distribution | dct:language | resource:language | | list | See [Lists](#lists) | -| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See [Lists](#lists) | -| dcat:Distribution | dcatap:availability | resource:availability | | text | | -| dcat:Distribution | dcat:compressFormat | resource:compress_format | | text | | -| dcat:Distribution | dcat:packageFormat | resource:package_format | | text | | -| dcat:Distribution | dcat:accessService | resource:access_services | | text | | -| dcat:DataService | dct:title | access_service:title | | text | | -| dcat:DataService | dcat:endpointURL | access_service:endpoint_url | | list | | -| dcat:DataService | dcat:endpointDescription| access_service:endpoint_description | | text | | -| dcat:DataService | dcatap:availability | access_service:availability | | text | | -| dcat:DataService | dcat:servesDataset | access_service:serves_dataset | | list | | -| dcat:DataService | dct:description | access_service:description | | text | | -| dcat:DataService | dct:license | access_service:license | | text | | -| dcat:DataService | dct:accessRights | access_service:access_rights | | text | | -| spdx:Checksum | spdx:checksumValue | resource:hash | | text | | -| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | | +| dcat:Dataset | dcat:temporalResolution| custom:temporal_resolution | | list | | +| dcat:Dataset | dcat:spatialResolutionInMeters| custom:spatial_resolution_in_meters | | list | | +| dcat:Dataset | dct:isReferencedBy | custom:is_referenced_by | | list | | +| dcat:Dataset | dct:publisher | custom:publisher_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | +| foaf:Agent | foaf:name | custom:publisher_name | | text | | +| foaf:Agent | foaf:mbox | custom:publisher_email | organization:title | text | | +| foaf:Agent | foaf:homepage | custom:publisher_url | | text | | +| foaf:Agent | dct:type | custom:publisher_type | | text | | +| foaf:Agent | dct:identifier | custom:publisher_id | | text | +| dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) | +| vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | | +| vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | | +| dcat:Dataset | dcat:distribution | resources | | text | | +| dcat:Distribution | - | resource:uri | | text | See [URIs](mapping.md#uris) | +| dcat:Distribution | dct:title | resource:name | | text | | +| dcat:Distribution | dcat:accessURL | resource:access_url | resource:url | text | If downloadURL is not present, accessURL will be used as resource url | +| dcat:Distribution | dcat:downloadURL | resource:download_url | | text | If present, downloadURL will be used as resource url | +| dcat:Distribution | dct:description | resource:description | | text | | +| dcat:Distribution | dcat:mediaType | resource:mimetype | | text | | +| dcat:Distribution | dct:format | resource:format | | text | | +| dcat:Distribution | dct:license | resource:license | | text | See [Licenses](#licenses) | +| dcat:Distribution | adms:status | resource:status | | text | | +| dcat:Distribution | dcat:byteSize | resource:size | | number | | +| dcat:Distribution | dct:issued | resource:issued | created | text | | +| dcat:Distribution | dct:modified | resource:modified | metadata_modified | text | | +| dcat:Distribution | dct:rights | resource:rights | | text | | +| dcat:Distribution | foaf:page | resource:documentation | | list | See [Lists](#lists) | +| dcat:Distribution | dct:language | resource:language | | list | See [Lists](#lists) | +| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See [Lists](#lists) | +| dcat:Distribution | dcatap:availability | resource:availability | | text | | +| dcat:Distribution | dcat:compressFormat | resource:compress_format | | text | | +| dcat:Distribution | dcat:packageFormat | resource:package_format | | text | | +| dcat:Distribution | dcat:accessService | resource:access_services | | text | | +| dcat:DataService | dct:title | access_service:title | | text | | +| dcat:DataService | dcat:endpointURL | access_service:endpoint_url | | list | | +| dcat:DataService | dcat:endpointDescription| access_service:endpoint_description | | text | | +| dcat:DataService | dcatap:availability | access_service:availability | | text | | +| dcat:DataService | dcat:servesDataset | access_service:serves_dataset | | list | | +| dcat:DataService | dct:description | access_service:description | | text | | +| dcat:DataService | dct:license | access_service:license | | text | | +| dcat:DataService | dct:accessRights | access_service:access_rights | | text | | +| spdx:Checksum | spdx:checksumValue | resource:hash | | text | | +| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | | ### Custom fields diff --git a/examples/dcat/dataset.rdf b/examples/dcat/dataset.rdf index b2f925c8..9e117752 100644 --- a/examples/dcat/dataset.rdf +++ b/examples/dcat/dataset.rdf @@ -75,6 +75,7 @@ contact@some.org http://some.org + From 87dd3c08974001f42c1f3c4914fb83555bfb042b Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 9 Sep 2024 15:29:59 +0200 Subject: [PATCH 4/7] missing label --- ckanext/dcat/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/dcat/utils.py b/ckanext/dcat/utils.py index 87aba428..de17e9ad 100644 --- a/ckanext/dcat/utils.py +++ b/ckanext/dcat/utils.py @@ -78,6 +78,7 @@ def field_labels(): 'publisher_email': _('Publisher email'), 'publisher_url': _('Publisher URL'), 'publisher_type': _('Publisher type'), + 'publisher_identifier': _('Publisher identifier'), 'contact_name': _('Contact name'), 'contact_email': _('Contact email'), 'contact_uri': _('Contact URI'), From e0becf6bec202de5455daae2f65f7d8838d335c4 Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 9 Sep 2024 20:01:27 +0200 Subject: [PATCH 5/7] - Add CKAN -> DCAT support publisher identifier --- ckanext/dcat/converters.py | 3 +-- ckanext/dcat/profiles/euro_dcat_ap_base.py | 3 +++ ckanext/dcat/profiles/euro_dcat_ap_scheming.py | 7 +++++++ ckanext/dcat/schemas/dcat_ap_full.yaml | 4 ++++ .../dcat/tests/profiles/dcat_ap_2/test_scheming_support.py | 7 +++++++ 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/ckanext/dcat/converters.py b/ckanext/dcat/converters.py index 2e27a0ee..afb2b773 100644 --- a/ckanext/dcat/converters.py +++ b/ckanext/dcat/converters.py @@ -27,8 +27,7 @@ def dcat_to_ckan(dcat_dict): if isinstance(dcat_publisher, basestring): package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher}) elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'): - if dcat_publisher.get('name'): - package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) + package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) if dcat_publisher.get('mbox'): package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index 110e04dd..28b476bb 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -372,6 +372,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): "email": self._get_dataset_value(dataset_dict, "publisher_email"), "url": self._get_dataset_value(dataset_dict, "publisher_url"), "type": self._get_dataset_value(dataset_dict, "publisher_type"), + "identifier": self._get_dataset_value(dataset_dict, "publisher_identifier"), } elif dataset_dict.get("organization"): # Fall back to dataset org @@ -396,6 +397,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): "email": org_dict.get("email"), "url": org_dict.get("url"), "type": org_dict.get("dcat_type"), + "identifier": org_dict.get("identifier"), } # Add to graph if publisher_ref: @@ -406,6 +408,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): ("email", FOAF.mbox, None, Literal), ("url", FOAF.homepage, None, URIRef), ("type", DCT.type, None, URIRefOrLiteral), + ("identifier", DCT.identifier, None, URIRefOrLiteral), ] self._add_triples_from_dict(publisher_details, publisher_ref, items) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index acf5fa2d..8d0ffb79 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -187,6 +187,13 @@ def _not_empty_dict(data_dict): _type=URIRef, value_modifier=self._add_mailto, ) + self._add_triple_from_dict( + publisher, + publisher_ref, + DCT.identifier, + "identifier", + _type=URIRefOrLiteral + ) temporal = dataset_dict.get("temporal_coverage") if ( diff --git a/ckanext/dcat/schemas/dcat_ap_full.yaml b/ckanext/dcat/schemas/dcat_ap_full.yaml index 8f9f4afc..36508d9a 100644 --- a/ckanext/dcat/schemas/dcat_ap_full.yaml +++ b/ckanext/dcat/schemas/dcat_ap_full.yaml @@ -66,6 +66,10 @@ dataset_fields: - field_name: type label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the publisher, such as a ROR ID. help_text: Entity responsible for making the dataset available. - field_name: license_id diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py index bec6d911..f9eb14fb 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py @@ -94,6 +94,7 @@ def test_e2e_ckan_to_dcat(self): "email": "publisher@example.org", "url": "https://example.org", "type": "public_body", + "identifier": "http://example.org/publisher-id", }, ], "temporal_coverage": [ @@ -301,6 +302,12 @@ def test_e2e_ckan_to_dcat(self): DCT.type, dataset_dict["publisher"][0]["type"], ) + assert self._triple( + g, + publisher[0][2], + DCT.identifier, + dataset_dict["publisher"][0]["identifier"] + ) temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))] From 6bd994a43bedaf86dec1a0c7d7ad4946ac9aca99 Mon Sep 17 00:00:00 2001 From: Hans-Christian Date: Wed, 11 Sep 2024 14:25:51 +0200 Subject: [PATCH 6/7] Update ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py Co-authored-by: Mark --- ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py index f9eb14fb..b249d600 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py @@ -306,7 +306,7 @@ def test_e2e_ckan_to_dcat(self): g, publisher[0][2], DCT.identifier, - dataset_dict["publisher"][0]["identifier"] + URIRef(dataset_dict["publisher"][0]["identifier"]) ) temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))] From df234719993ec0d15ba5379dd09cfa187756dada Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 16 Sep 2024 12:32:42 +0200 Subject: [PATCH 7/7] Don't fail if ckanext-scheming is not installed --- ckanext/dcat/validators.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ckanext/dcat/validators.py b/ckanext/dcat/validators.py index 9bf18d49..6fc741a5 100644 --- a/ckanext/dcat/validators.py +++ b/ckanext/dcat/validators.py @@ -9,7 +9,13 @@ Invalid, _, ) -from ckanext.scheming.validation import scheming_validator + +try: + from ckanext.scheming.validation import scheming_validator +except ImportError: + def scheming_validator(func): + return func + # https://www.w3.org/TR/xmlschema11-2/#gYear regexp_xsd_year = re.compile(