diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 06f10076..06b25f56 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -77,4 +77,3 @@ jobs: ckan -c test.ini db pending-migrations --apply - name: Run tests run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests - diff --git a/CHANGELOG.md b/CHANGELOG.md index c1aa08c7..69aea84f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,21 @@ in the `ckanext/dcat/schemas` folder. See the [documentation](https://github.com/ckan/ckanext-dcat?tab=readme-ov-file#schemas) for all details. Some highlights of the new scheming based profiles: - * Actual list support in the API ooutput for list properties like `dct:language` + * Actual list support in the API output for list properties like `dct:language` * Multiple objects now allowed for properties like `dcat:ContactPoint`, `dct:spatial` or `dct:temporal` * Custom validators for date values that allow `xsd:gYear`, `xsd:gYearMonth`, `xsd:date` and `xsd:dateTime` (#281) +* [SHACL validation](https://github.com/SEMICeu/DCAT-AP/tree/master/releases/2.1.1) for DCAT-AP 2.1.1 profile (scheming and legacy). + SHACL validation made surface the following issues in the existing profiles, which are now fixed: + * Cast `dcat:byteSize` and `dcat:spatialResolutionInMeters` as Decimal, not float + * Allow only one value of `dcat:spatialResolutionInMeters` and `dcat:temporalResolution` + * Only output the WKT version of geometries in `locn:geometry`, `dcat:bbox` and `dcat:centroid`. Sites that for some reason + require GeoJSON (or both) can use the `ckanext.dcat.output_spatial_format` config option + to choose which format to use + * When using the `euro_dcat_ap_2` profile, don't output temporal extent namespaced + both with `schema` and `dcat`, just with the latter (`dcat:startDate` and `dcat:endDate`) + (#288) * New `ckan dcat consume` and `ckan dcat produce` CLI commands (#279) * Parse dcat:spatialResolutionInMeters as float (#285) * Split profile classes into their own separate files (#282) diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index d1ff561b..0b307bdd 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -7,7 +7,7 @@ from rdflib.namespace import Namespace, RDF, XSD, SKOS, RDFS from geomet import wkt, InvalidGeoJSONException -from ckantoolkit import config, url_for, asbool, get_action, ObjectNotFound +from ckantoolkit import config, url_for, asbool, aslist, get_action, ObjectNotFound from ckan.model.license import LicenseRegister from ckan.lib.helpers import resource_formats from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS @@ -46,6 +46,8 @@ GEOJSON_IMT = "https://www.iana.org/assignments/media-types/application/vnd.geo+json" +DEFAULT_SPATIAL_FORMATS = ["wkt"] + ROOT_DATASET_FIELDS = [ 'name', 'title', @@ -728,26 +730,41 @@ def _read_list_value(self, value): def _add_spatial_value_to_graph(self, spatial_ref, predicate, value): """ - Adds spatial triples to the graph. + Adds spatial triples to the graph. Assumes that value is a GeoJSON string + or object. """ - # GeoJSON - self.g.add((spatial_ref, predicate, Literal(value, datatype=GEOJSON_IMT))) - # WKT, because GeoDCAT-AP says so - try: - if isinstance(value, str): + spatial_formats = aslist( + config.get( + "ckanext.dcat.output_spatial_format", DEFAULT_SPATIAL_FORMATS + ) + ) + + if isinstance(value, str): + try: value = json.loads(value) - self.g.add( - ( - spatial_ref, - predicate, - Literal( - wkt.dumps(value, decimals=4), - datatype=GSP.wktLiteral, - ), + except (TypeError, ValueError): + return + + if "wkt" in spatial_formats: + # WKT, because GeoDCAT-AP says so + try: + self.g.add( + ( + spatial_ref, + predicate, + Literal( + wkt.dumps(value, decimals=4), + datatype=GSP.wktLiteral, + ), + ) ) - ) - except (TypeError, ValueError, InvalidGeoJSONException) as e: - pass + except (TypeError, ValueError, InvalidGeoJSONException): + pass + + if "geojson" in spatial_formats: + # GeoJSON + self.g.add((spatial_ref, predicate, Literal(json.dumps(value), datatype=GEOJSON_IMT))) + def _add_spatial_to_dict(self, dataset_dict, key, spatial): if spatial.get(key): diff --git a/ckanext/dcat/profiles/euro_dcat_ap.py b/ckanext/dcat/profiles/euro_dcat_ap.py index b7e4cae4..b0057110 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap.py +++ b/ckanext/dcat/profiles/euro_dcat_ap.py @@ -1,4 +1,5 @@ import json +from decimal import Decimal, DecimalException from rdflib import term, URIRef, BNode, Literal import ckantoolkit as toolkit @@ -545,10 +546,10 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ( distribution, DCAT.byteSize, - Literal(float(resource_dict["size"]), datatype=XSD.decimal), + Literal(Decimal(resource_dict["size"]), datatype=XSD.decimal), ) ) - except (ValueError, TypeError): + except (ValueError, TypeError, DecimalException): g.add((distribution, DCAT.byteSize, Literal(resource_dict["size"]))) # Checksum if resource_dict.get("hash"): diff --git a/ckanext/dcat/profiles/euro_dcat_ap_2.py b/ckanext/dcat/profiles/euro_dcat_ap_2.py index c1f9274f..02c726d3 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_2.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_2.py @@ -1,4 +1,5 @@ import json +from decimal import Decimal, DecimalException from rdflib import URIRef, BNode, Literal from ckanext.dcat.utils import resource_uri @@ -11,6 +12,7 @@ DCATAP, DCT, XSD, + SCHEMA, ) from .euro_dcat_ap import EuropeanDCATAPProfile @@ -31,9 +33,13 @@ def parse_dataset(self, dataset_dict, dataset_ref): # call super method super(EuropeanDCATAP2Profile, self).parse_dataset(dataset_dict, dataset_ref) + # Standard values + value = self._object_value(dataset_ref, DCAT.temporalResolution) + if value: + dataset_dict["extras"].append({"key": "temporal_resolution", "value": value}) + # Lists for key, predicate in ( - ("temporal_resolution", DCAT.temporalResolution), ("is_referenced_by", DCT.isReferencedBy), ("applicable_legislation", DCATAP.applicableLegislation), ("hvd_category", DCATAP.hvdCategory), @@ -54,14 +60,20 @@ def parse_dataset(self, dataset_dict, dataset_ref): self._add_spatial_to_dict(dataset_dict, key, spatial) # Spatial resolution in meters - spatial_resolution_in_meters = self._object_value_float_list( + spatial_resolution = self._object_value_float_list( dataset_ref, DCAT.spatialResolutionInMeters ) - if spatial_resolution_in_meters: + if spatial_resolution: + # For some reason we incorrectly allowed lists in this property at some point + # keep support for it but default to single value + value = ( + spatial_resolution[0] if len(spatial_resolution) == 1 + else json.dumps(spatial_resolution) + ) dataset_dict["extras"].append( { "key": "spatial_resolution_in_meters", - "value": json.dumps(spatial_resolution_in_meters), + "value": value, } ) @@ -147,15 +159,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): dataset_dict, dataset_ref ) + # Standard values + self._add_triple_from_dict( + dataset_dict, + dataset_ref, + DCAT.temporalResolution, + "temporal_resolution", + _datatype=XSD.duration, + ) + # Lists for key, predicate, fallbacks, type, datatype in ( - ( - "temporal_resolution", - DCAT.temporalResolution, - None, - Literal, - XSD.duration, - ), ("is_referenced_by", DCT.isReferencedBy, None, URIRefOrLiteral, None), ( "applicable_legislation", @@ -178,6 +192,14 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ) # Temporal + + # The profile for DCAT-AP 1 stored triples using schema:startDate, + # remove them to avoid duplication + for temporal in self.g.objects(dataset_ref, DCT.temporal): + if SCHEMA.startDate in [t for t in self.g.predicates(temporal, None)]: + self.g.remove((temporal, None, None)) + self.g.remove((dataset_ref, DCT.temporal, temporal)) + start = self._get_dataset_value(dataset_dict, "temporal_start") end = self._get_dataset_value(dataset_dict, "temporal_end") if start or end: @@ -216,10 +238,10 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ( dataset_ref, DCAT.spatialResolutionInMeters, - Literal(float(value), datatype=XSD.decimal), + Literal(Decimal(value), datatype=XSD.decimal), ) ) - except (ValueError, TypeError): + except (ValueError, TypeError, DecimalException): self.g.add( (dataset_ref, DCAT.spatialResolutionInMeters, Literal(value)) ) @@ -278,7 +300,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ("license", DCT.license, None, URIRefOrLiteral), ("access_rights", DCT.accessRights, None, URIRefOrLiteral), ("title", DCT.title, None, Literal), - ("endpoint_description", DCAT.endpointDescription, None, Literal), + ("endpoint_description", DCAT.endpointDescription, None, URIRefOrLiteral), ("description", DCT.description, None, Literal), ] diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index 12eb540e..5fdd4ced 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -180,9 +180,9 @@ def _not_empty_dict(data_dict): temporal_ref = BNode() self.g.add((temporal_ref, RDF.type, DCT.PeriodOfTime)) if item.get("start"): - self._add_date_triple(temporal_ref, SCHEMA.startDate, item["start"]) + self._add_date_triple(temporal_ref, DCAT.startDate, item["start"]) if item.get("end"): - self._add_date_triple(temporal_ref, SCHEMA.endDate, item["end"]) + self._add_date_triple(temporal_ref, DCAT.endDate, item["end"]) self.g.add((dataset_ref, DCT.temporal, temporal_ref)) spatial = dataset_dict.get("spatial_coverage") diff --git a/ckanext/dcat/schemas/dcat_ap_2.1_full.yaml b/ckanext/dcat/schemas/dcat_ap_2.1_full.yaml index d9532011..8f9f4afc 100644 --- a/ckanext/dcat/schemas/dcat_ap_2.1_full.yaml +++ b/ckanext/dcat/schemas/dcat_ap_2.1_full.yaml @@ -143,8 +143,6 @@ dataset_fields: - field_name: temporal_resolution label: Temporal resolution - preset: multiple_text - validators: ignore_missing scheming_multiple_text help_text: Minimum time period resolvable in the dataset. - field_name: spatial_coverage @@ -169,8 +167,6 @@ dataset_fields: - field_name: spatial_resolution_in_meters label: Spatial resolution in meters - preset: multiple_text - validators: ignore_missing scheming_multiple_number help_text: Minimum spatial separation resolvable in a dataset, measured in meters. - field_name: access_rights @@ -368,9 +364,18 @@ resource_fields: - field_name: title label: Title + - field_name: endpoint_description + label: Endpoint description + - field_name: endpoint_url label: Endpoint URL preset: multiple_text + + - field_name: serves_dataset + label: Serves dataset + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A data service that gives access to the resource. # Note: if not provided, this will be autogenerated diff --git a/ckanext/dcat/tests/shacl/dcat-ap_2.1.1_shacl_shapes.ttl b/ckanext/dcat/tests/shacl/dcat-ap_2.1.1_shacl_shapes.ttl new file mode 100644 index 00000000..8c441529 --- /dev/null +++ b/ckanext/dcat/tests/shacl/dcat-ap_2.1.1_shacl_shapes.ttl @@ -0,0 +1,639 @@ +@prefix rdf: . +@prefix : . +@prefix adms: . +@prefix cc: . +@prefix dc: . +@prefix dcat: . +@prefix dct: . +@prefix foaf: . +@prefix lcon: . +@prefix org: . +@prefix owl: . +@prefix odrl: . +@prefix prov: . +@prefix rdfs: . +@prefix schema: . +@prefix sh: . +@prefix skos: . +@prefix spdx: . +@prefix time: . +@prefix vcard: . +@prefix xsd: . +@prefix dcatap: . + + + dcat:accessURL ; + dcat:downloadURL ; + dcatap:availability ; + dct:format ; + dct:conformsTo ; + dct:creator [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Bert Van Nuffelen" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Natasa Sofou" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Eugeniu Costetchi" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Makx Dekkers" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Nikolaos Loutas" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Vassilios Peristeras" + ] ; + dct:license ; + cc:attributionURL ; + dct:modified "2021-12-01"^^xsd:date ; + dct:publisher ; + dct:relation ; + dct:description "This document specifies the constraints on properties and classes expressed by DCAT-AP in SHACL."@en ; + dct:title "The constraints of DCAT Application Profile for Data Portals in Europe"@en ; + owl:versionInfo "2.1.1" ; + foaf:homepage ; + foaf:maker [ + foaf:mbox ; + foaf:name "DCAT-AP Working Group" ; + foaf:page , + ] . + + + +#------------------------------------------------------------------------- +# The shapes in this file cover all classes in DCAT-AP 2.1.1. +# It covers all constraints that must be satisfied except those checking the ranges. +# +#------------------------------------------------------------------------- + +:Agent_Shape + a sh:NodeShape ; + sh:name "Agent"@en ; + sh:property [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path foaf:name ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:type ; + sh:severity sh:Violation + ] ; + sh:targetClass foaf:Agent . + +:CatalogRecord_Shape + a sh:NodeShape ; + sh:name "Catalog Record"@en ; + sh:property [ + sh:maxCount 1 ; + sh:minCount 1 ; + sh:node :DcatResource_Shape ; + sh:path foaf:primaryTopic ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path dct:modified ; + sh:severity sh:Violation ; + sh:node :DateOrDateTimeDataType_Shape + ], [ + sh:maxCount 1 ; + sh:path dct:conformsTo ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:node :DateOrDateTimeDataType_Shape ; + sh:path dct:issued ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path adms:status ; + sh:severity sh:Violation + ], [ + sh:path dct:language ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:source ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path dct:title ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path dct:description ; + sh:severity sh:Violation + ] ; + sh:targetClass dcat:CatalogRecord . + +:Catalog_Shape + a sh:NodeShape ; + sh:name "Catalog"@en ; + sh:property [ + sh:path dct:language ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:license ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:node :DateOrDateTimeDataType_Shape ; + sh:path dct:issued ; + sh:severity sh:Violation + ], [ + sh:path dct:spatial ; + sh:severity sh:Violation + ], [ + sh:path dct:hasPart ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:isPartOf ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:node :DateOrDateTimeDataType_Shape ; + sh:path dct:modified ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:rights ; + sh:severity sh:Violation + ], [ + sh:path dcat:record ; + sh:severity sh:Violation + ], [ + sh:path dcat:themeTaxonomy ; + sh:severity sh:Violation + ], [ + sh:path dcat:service ; + sh:severity sh:Violation + ], [ + sh:path dcat:catalog ; + sh:severity sh:Violation + ], [ + sh:path dct:creator ; + sh:severity sh:Violation + ], [ + sh:path dcat:dataset ; + sh:severity sh:Violation + ], [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dct:description ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path dct:publisher ; + sh:severity sh:Violation + ], [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dct:title ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path foaf:homepage ; + sh:severity sh:Violation + ] ; + sh:targetClass dcat:Catalog . + +:CategoryScheme_Shape + a sh:NodeShape ; + sh:name "Category Scheme"@en ; + sh:property [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dct:title ; + sh:severity sh:Violation + ] ; + sh:targetClass skos:ConceptScheme . + +:Category_Shape + a sh:NodeShape ; + sh:name "Category"@en ; + sh:property [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path skos:prefLabel ; + sh:severity sh:Violation + ] ; + sh:targetClass skos:Concept . + +:Checksum_Shape + a sh:NodeShape ; + sh:name "Checksum"@en ; + sh:property [ + sh:hasValue spdx:checksumAlgorithm_sha1 ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path spdx:algorithm ; + sh:severity sh:Violation + ], [ + sh:datatype xsd:hexBinary ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path spdx:checksumValue ; + sh:severity sh:Violation + ] ; + sh:targetClass spdx:Checksum . + +:DataService_Shape + a sh:NodeShape ; + sh:name "Data Service"@en ; + sh:property [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dct:title ; + sh:severity sh:Violation + ], [ + sh:minCount 1 ; + sh:nodeKind sh:BlankNodeOrIRI ; + sh:path dcat:endpointURL ; + sh:severity sh:Violation + ], [ + sh:path dcat:servesDataset ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path dct:description ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:BlankNodeOrIRI ; + sh:path dcat:endpointDescription ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:license ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:accessRights ; + sh:severity sh:Violation + ] ; + sh:targetClass dcat:DataService . + +:Dataset_Shape + a sh:NodeShape ; + sh:name "Dataset"@en ; + sh:property [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dct:description ; + sh:severity sh:Violation + ], [ + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dct:title ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path dct:identifier ; + sh:severity sh:Violation + ], [ + sh:path dcat:contactPoint ; + sh:severity sh:Violation + ], [ + sh:path dcat:distribution ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path dcat:keyword ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:publisher ; + sh:severity sh:Violation + ], [ + sh:path dct:spatial ; + sh:severity sh:Violation + ], [ + sh:path dct:temporal ; + sh:severity sh:Violation + ], [ + sh:path dcat:theme ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:accessRights ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:accrualPeriodicity ; + sh:severity sh:Violation + ], [ + sh:path dct:conformsTo ; + sh:severity sh:Violation + ], [ + sh:path dct:hasVersion ; + sh:severity sh:Violation + ], [ + sh:path dct:isVersionOf ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:issued ; + sh:severity sh:Violation ; + sh:node :DateOrDateTimeDataType_Shape + ], [ + sh:path dct:language ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:modified ; + sh:severity sh:Violation ; + sh:node :DateOrDateTimeDataType_Shape + ], [ + sh:path dct:provenance ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:BlankNodeOrIRI ; + sh:path dct:relation ; + sh:severity sh:Violation + ], [ + sh:path dct:source ; + sh:severity sh:Violation + ], [ + sh:path dct:type ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:path owl:versionInfo ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path adms:versionNotes ; + sh:severity sh:Violation + ], [ + sh:path adms:identifier ; + sh:severity sh:Violation + ], [ + sh:path adms:sample ; + sh:severity sh:Violation + ], [ + sh:path dcat:landingPage ; + sh:severity sh:Violation + ], [ + sh:path foaf:page ; + sh:severity sh:Violation + ], [ + sh:path dcat:qualifiedRelation ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:BlankNodeOrIRI ; + sh:path dc:isReferencedBy ; + sh:severity sh:Violation + ], [ + sh:path prov:qualifiedAttribution ; + sh:severity sh:Violation + ], [ + sh:path prov:wasGeneratedBy ; + sh:severity sh:Violation + ], [ + sh:datatype xsd:duration ; + sh:maxCount 1 ; + sh:path dcat:temporalResolution ; + sh:severity sh:Violation + ], [ + sh:datatype xsd:decimal ; + sh:maxCount 1 ; + sh:path dcat:spatialResolutionInMeters ; + sh:severity sh:Violation + ], [ + sh:path dct:creator ; + sh:severity sh:Violation + ] ; + sh:targetClass dcat:Dataset . + +:DateOrDateTimeDataType_Shape + a sh:NodeShape ; + rdfs:comment "Date time date disjunction shape checks that a datatype property receives a temporal value: date, dateTime, gYear or gYearMonth literal" ; + rdfs:label "Date time date disjunction" ; + sh:message "The values must be data typed as either xsd:date, xsd:dateTime, xsd:gYear or xsd:gYearMonth" ; + sh:or ([ + sh:datatype xsd:date + ] + [ + sh:datatype xsd:dateTime + ] + [ + sh:datatype xsd:gYear + ] + [ + sh:datatype xsd:gYearMonth + ] + ) . + +:DcatResource_Shape + a sh:NodeShape ; + rdfs:comment "the union of Catalog, Dataset and DataService" ; + rdfs:label "dcat:Resource" ; + sh:message "The node is either a Catalog, Dataset or a DataService" ; + sh:or ([ + sh:class dcat:Catalog + ] + [ + sh:class dcat:Dataset + ] + [ + sh:class dcat:DataService + ] + ) . + +:Distribution_Shape + a sh:NodeShape ; + sh:name "Distribution"@en ; + sh:property [ + sh:path dct:conformsTo ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:node :DateOrDateTimeDataType_Shape ; + sh:path dct:issued ; + sh:severity sh:Violation + ], [ + sh:path dct:language ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:node :DateOrDateTimeDataType_Shape ; + sh:path dct:modified ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:rights ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path dct:title ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path spdx:checksum ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path adms:status ; + sh:severity sh:Violation + ], [ + sh:datatype xsd:decimal ; + sh:maxCount 1 ; + sh:path dcat:byteSize ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:BlankNodeOrIRI; + sh:path dcat:downloadURL ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dcat:mediaType ; + sh:severity sh:Violation + ], [ + sh:path foaf:page ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path odrl:hasPolicy ; + sh:severity sh:Violation + ], [ + sh:path dcat:accessService ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dcat:compressFormat ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dcat:packageFormat ; + sh:severity sh:Violation + ], [ + sh:datatype xsd:duration ; + sh:maxCount 1 ; + sh:path dcat:temporalResolution ; + sh:severity sh:Violation + ], [ + sh:datatype xsd:decimal ; + sh:maxCount 1 ; + sh:path dcat:spatialResolutionInMeters ; + sh:severity sh:Violation + ], [ + sh:minCount 1 ; + sh:nodeKind sh:BlankNodeOrIRI; + sh:path dcat:accessURL ; + sh:severity sh:Violation + ], [ + sh:nodeKind sh:Literal ; + sh:path dct:description ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dcatap:availability ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:format ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dct:license ; + sh:severity sh:Violation + ] ; + sh:targetClass dcat:Distribution . + +:Identifier_Shape + a sh:NodeShape ; + sh:name "Identifier"@en ; + sh:property [ + sh:maxCount 1 ; + sh:path skos:notation ; + sh:severity sh:Violation + ] ; + sh:targetClass adms:Identifier . + +:LicenceDocument_Shape + a sh:NodeShape ; + sh:name "Licence Document"@en ; + sh:property [ + sh:path dct:type ; + sh:severity sh:Violation + ] ; + sh:targetClass dct:LicenseDocument . + +:Location_Shape + a sh:NodeShape ; + sh:name "Location"@en ; + sh:property [ + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dcat:bbox ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:path dcat:centroid ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:path lcon:geometry ; + sh:severity sh:Violation + ] ; + sh:targetClass dct:Location . + +:PeriodOfTime_Shape + a sh:NodeShape ; + sh:name "PeriodOfTime"@en ; + sh:property [ + sh:maxCount 1 ; + sh:path dcat:endDate ; + sh:severity sh:Violation ; + sh:node :DateOrDateTimeDataType_Shape + ], [ + sh:maxCount 1 ; + sh:path time:hasBeginning ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path time:hasEnd ; + sh:severity sh:Violation + ], [ + sh:maxCount 1 ; + sh:path dcat:startDate ; + sh:severity sh:Violation ; + sh:node :DateOrDateTimeDataType_Shape + ] ; + sh:targetClass dct:PeriodOfTime . + +:Relationship_Shape + a sh:NodeShape ; + sh:name "Relationship"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dct:relation ; + sh:severity sh:Violation + ], [ + sh:minCount 1 ; + sh:path dcat:hadRole ; + sh:severity sh:Violation + ] ; + sh:targetClass dcat:Relationship . + diff --git a/ckanext/dcat/tests/shacl/dcat-ap_2.1.1_shacl_shapes_recommended.ttl b/ckanext/dcat/tests/shacl/dcat-ap_2.1.1_shacl_shapes_recommended.ttl new file mode 100644 index 00000000..13630a19 --- /dev/null +++ b/ckanext/dcat/tests/shacl/dcat-ap_2.1.1_shacl_shapes_recommended.ttl @@ -0,0 +1,335 @@ +@prefix rdf: . +@prefix : . +@prefix adms: . +@prefix cc: . +@prefix dc: . +@prefix dcat: . +@prefix dct: . +@prefix foaf: . +@prefix lcon: . +@prefix org: . +@prefix owl: . +@prefix odrl: . +@prefix prov: . +@prefix rdfs: . +@prefix schema: . +@prefix sh: . +@prefix skos: . +@prefix spdx: . +@prefix time: . +@prefix vcard: . +@prefix xsd: . +@prefix dcatap: . + + + dcatap:availability ; + dct:format ; + dct:conformsTo ; + dct:creator [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Bert Van Nuffelen" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Natasa Sofou" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Eugeniu Costetchi" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Makx Dekkers" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Nikolaos Loutas" + ], [ + rdfs:seeAlso ; + org:memberOf ; + foaf:homepage ; + foaf:name "Vassilios Peristeras" + ] ; + dct:license ; + cc:attributionURL ; + dct:publisher ; + dct:description "This document specifies the constraints on properties and classes expressed by DCAT-AP in SHACL."@en ; + dct:title "The constraints of DCAT Application Profile for Data Portals in Europe"@en ; + owl:versionInfo "2.1.1" ; + foaf:maker [ + foaf:mbox ; + foaf:name "DCAT-AP Working Group" ; + foaf:page , + ] . + + + +#------------------------------------------------------------------------- +# The shapes in this file cover all recommendations in DCAT-AP 2.1.1. +# +# +#------------------------------------------------------------------------- + +:Agent_Shape + a sh:NodeShape ; + sh:name "Agent"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dct:type ; + sh:severity sh:Warning + ] ; + sh:targetClass foaf:Agent . + +:CatalogRecord_Shape + a sh:NodeShape ; + sh:name "Catalog Record"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dct:conformsTo ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:issued ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path adms:status ; + sh:severity sh:Warning + ] ; + sh:targetClass dcat:CatalogRecord . + + +:Catalog_Shape + a sh:NodeShape ; + sh:name "Catalog"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dct:language ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:issued ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:license; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:spatial ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:modified ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dcat:themeTaxonomy ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path foaf:homepage ; + sh:severity sh:Warning + ] ; + sh:targetClass dcat:Catalog . + +:Catalog_Shape2 + a sh:NodeShape ; + sh:name "Catalog"@en ; + sh:or ( + [ + sh:path dcat:dataset ; + sh:minCount 1 ; + ] + [ + sh:path dcat:service ; + sh:minCount 1 ; + ] + ) ; + sh:severity sh:Warning; + sh:targetClass dcat:Catalog . + + +# +# Outcommented because no constraint is present +# and according to the W3C spec sh:property expects at least a sh:path +# + +#:CategoryScheme_Shape +# a sh:NodeShape ; +# sh:name "Category Scheme"@en ; +# sh:property [ +# ] ; +# sh:targetClass skos:ConceptScheme . +# +#:Category_Shape +# a sh:NodeShape ; +# sh:name "Category"@en ; +# sh:property [ +# ] ; +# sh:targetClass skos:Concept . +# +#:Checksum_Shape +# a sh:NodeShape ; +# sh:name "Checksum"@en ; +# sh:property [ +# ] ; +# sh:targetClass spdx:Checksum . + +:DataService_Shape + a sh:NodeShape ; + sh:name "Data Service"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dcat:servesDataset ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dcat:endpointDescription ; + sh:severity sh:Warning + ] ; + sh:targetClass dcat:DataService . + +:Dataset_Shape + a sh:NodeShape ; + sh:name "Dataset"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dcat:contactPoint ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dcat:distribution ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dcat:keyword ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:publisher ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:spatial ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:temporal ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dcat:theme ; + sh:severity sh:Warning + ] ; + sh:targetClass dcat:Dataset . + +:DateOrDateTimeDataType_Shape + a sh:NodeShape ; + rdfs:comment "Date time date disjunction shape checks that a datatype property receives a date or a dateTime literal" ; + rdfs:label "Date time date disjunction" ; + sh:message "The values must be data typed as either xsd:date or xsd:dateTime" ; + sh:or ([ + sh:datatype xsd:date + ] + [ + sh:datatype xsd:dateTime + ] + ) . + +:DcatResource_Shape + a sh:NodeShape ; + rdfs:comment "the union of Catalog, Dataset and DataService" ; + rdfs:label "dcat:Resource" ; + sh:message "The node is either a Catalog, Dataset or a DataService" ; + sh:or ([ + sh:class dcat:Catalog + ] + [ + sh:class dcat:Dataset + ] + [ + sh:class dcat:DataService + ] + ) . + +:Distribution_Shape + a sh:NodeShape ; + sh:name "Distribution"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dct:description ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dcatap:availability ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:format ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dct:license ; + sh:severity sh:Warning + ] ; + sh:targetClass dcat:Distribution . + +#:Identifier_Shape +# a sh:NodeShape ; +# sh:name "Identifier"@en ; +# sh:property [ +# ] ; +# sh:targetClass adms:Identifier . + +:LicenceDocument_Shape + a sh:NodeShape ; + sh:name "Licence Document"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dct:type ; + sh:severity sh:Warning + ] ; + sh:targetClass dct:LicenseDocument . + +:Location_Shape + a sh:NodeShape ; + sh:name "Location"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dcat:bbox ; + sh:severity sh:Warning + ], [ + sh:minCount 1 ; + sh:path dcat:centroid ; + sh:severity sh:Warning + ] ; + sh:targetClass dct:Location . + +:PeriodOfTime_Shape + a sh:NodeShape ; + sh:name "PeriodOfTime"@en ; + sh:property [ + sh:minCount 1 ; + sh:path dcat:endDate ; + sh:severity sh:Warning ; + ], [ + sh:minCount 1 ; + sh:path dcat:startDate ; + sh:severity sh:Warning ; + ] ; + sh:targetClass dct:PeriodOfTime . + +#:Relationship_Shape +# a sh:NodeShape ; +# sh:name "Relationship"@en ; +# sh:property [ +# ] ; +# sh:targetClass dcat:Relationship . + diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py index 69ffe7d1..4ce4a2de 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py @@ -106,13 +106,9 @@ def test_dataset_all_fields(self): # Dataset extras = self._extras(dataset) - temporal_resolution_list = json.loads(extras['temporal_resolution']) - assert len(temporal_resolution_list) == 1 - assert temporal_resolution in temporal_resolution_list + assert extras['temporal_resolution'] == temporal_resolution - spatial_resolution_list = json.loads(extras['spatial_resolution_in_meters']) - assert len(spatial_resolution_list) == 1 - assert spatial_resolution_in_meters in spatial_resolution_list + assert extras['spatial_resolution_in_meters'] == spatial_resolution_in_meters isreferencedby_list = json.loads(extras['is_referenced_by']) assert len(isreferencedby_list) == 1 @@ -329,10 +325,8 @@ def test_temporal_resolution_multiple(self): dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) - temporal_resolution = 'P1D' + temporal_resolution = 'PT15M' g.add((dataset, DCAT.temporalResolution, Literal(temporal_resolution, datatype=XSD.duration))) - temporal_resolution_2 = 'PT15M' - g.add((dataset, DCAT.temporalResolution, Literal(temporal_resolution_2, datatype=XSD.duration))) p = RDFParser(profiles=DCAT_AP_PROFILES) @@ -342,10 +336,7 @@ def test_temporal_resolution_multiple(self): extras = self._extras(datasets[0]) - temporal_resolution_list = json.loads(extras['temporal_resolution']) - assert len(temporal_resolution_list) == 2 - assert temporal_resolution in temporal_resolution_list - assert temporal_resolution_2 in temporal_resolution_list + assert extras['temporal_resolution'] == temporal_resolution def test_spatial_resolution_in_meters_multiple(self): g = Graph() diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py index abf80363..816e9ace 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py @@ -3,6 +3,7 @@ from builtins import str from builtins import object import json +from decimal import Decimal import six import pytest @@ -42,8 +43,8 @@ def test_graph_from_dataset(self): 'metadata_created': '2021-06-21T15:21:09.034694', 'metadata_modified': '2021-06-21T15:21:09.075774', 'extras': [ - {'key': 'temporal_resolution', 'value': '[\"PT15M\", \"P1D\"]'}, - {'key': 'spatial_resolution_in_meters', 'value': '[30,20]'}, + {'key': 'temporal_resolution', 'value': 'PT15M'}, + {'key': 'spatial_resolution_in_meters', 'value': '30'}, {'key': 'is_referenced_by', 'value': '[\"https://doi.org/10.1038/sdata.2018.22\", \"test_isreferencedby\"]'}, ] } @@ -55,9 +56,14 @@ def test_graph_from_dataset(self): dataset_ref = s.graph_from_dataset(dataset) + # Standard values + assert self._triple( + g, dataset_ref, DCAT.temporalResolution, extras["temporal_resolution"], + data_type=XSD.duration + ) + # List for item in [ - ('temporal_resolution', DCAT.temporalResolution, [Literal, Literal], [XSD.duration, XSD.duration]), ('is_referenced_by', DCT.isReferencedBy, [URIRef, Literal], [None, None]), ]: values = json.loads(extras[item[0]]) @@ -72,11 +78,8 @@ def test_graph_from_dataset(self): assert self._triple(g, dataset_ref, item[1], _type(value), _datatype) # Spatial Resolution in Meters - values = json.loads(extras['spatial_resolution_in_meters']) - assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == len(values) - - for value in values: - assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(float(value), + value = extras['spatial_resolution_in_meters'] + assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(Decimal(value), datatype=XSD.decimal)) def test_spatial_resolution_in_meters_single_value(self): @@ -104,7 +107,7 @@ def test_spatial_resolution_in_meters_single_value(self): assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == 1 assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, - Literal(float(extras['spatial_resolution_in_meters']), datatype=XSD.decimal)) + Literal(Decimal(extras['spatial_resolution_in_meters']), datatype=XSD.decimal)) def test_spatial_resolution_in_meters_a_value_is_not_a_number(self): @@ -118,7 +121,7 @@ def test_spatial_resolution_in_meters_a_value_is_not_a_number(self): 'metadata_created': '2021-06-21T15:21:09.034694', 'metadata_modified': '2021-06-21T15:21:09.075774', 'extras': [ - {'key': 'spatial_resolution_in_meters', 'value': '[\"foo\",20]'} + {'key': 'spatial_resolution_in_meters', 'value': 'foo'} ] } @@ -129,11 +132,8 @@ def test_spatial_resolution_in_meters_a_value_is_not_a_number(self): dataset_ref = s.graph_from_dataset(dataset) - values = json.loads(extras['spatial_resolution_in_meters']) - assert len([t for t in g.triples((dataset_ref, DCAT.spatialResolutionInMeters, None))]) == len(values) - assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(values[0])) - assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, - Literal(float(values[1]), datatype=XSD.decimal)) + value = extras['spatial_resolution_in_meters'] + assert self._triple(g, dataset_ref, DCAT.spatialResolutionInMeters, Literal(value)) def test_spatial_resolution_value_is_invalid_json(self): @@ -188,13 +188,89 @@ def test_spatial(self): assert self._triple(g, spatial, RDF.type, DCT.Location) assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text']) + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1 + assert len([t for t in g.triples((spatial, DCAT.bbox, None))]) == 1 + assert len([t for t in g.triples((spatial, DCAT.centroid, None))]) == 1 + + # Geometry in WKT + wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4) + assert self._triple(g, spatial, LOCN.geometry, wkt_geom, GSP.wktLiteral) + wkt_bbox = wkt.dumps(json.loads(extras['spatial_bbox']), decimals=4) + assert self._triple(g, spatial, DCAT.bbox, wkt_bbox, GSP.wktLiteral) + wkt_cent = wkt.dumps(json.loads(extras['spatial_centroid']), decimals=4) + assert self._triple(g, spatial, DCAT.centroid, wkt_cent, GSP.wktLiteral) + + @pytest.mark.ckan_config("ckanext.dcat.output_spatial_format", "geojson") + def test_spatial_geojson(self): + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'extras': [ + {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'}, + {'key': 'spatial_text', 'value': 'Tarragona'}, + {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'}, + {'key': 'spatial_bbox', 'value': '{"type": "Polygon", "coordinates": [[[2.1870606,42.0786393],[2.1870606,42.1655218],[2.3752339,42.1655218],[2.3752339,42.0786393],[2.1870606,42.0786393]]]}'}, + {'key': 'spatial_centroid', 'value': '{"type": "Point", "coordinates": [2.28114725,42.12208055]}'}, + + ] + } + extras = self._extras(dataset) + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] + assert spatial + assert str(spatial) == extras['spatial_uri'] + assert self._triple(g, spatial, RDF.type, DCT.Location) + assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text']) + + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1 + assert len([t for t in g.triples((spatial, DCAT.bbox, None))]) == 1 + assert len([t for t in g.triples((spatial, DCAT.centroid, None))]) == 1 + + # Geometry in GeoJSON (load and dump to match the formatting) + assert self._triple(g, spatial, LOCN.geometry, json.dumps(json.loads(extras['spatial'])), GEOJSON_IMT) + assert self._triple(g, spatial, DCAT.bbox, json.dumps(json.loads(extras['spatial_bbox'])), GEOJSON_IMT) + assert self._triple(g, spatial, DCAT.centroid, json.dumps(json.loads(extras['spatial_centroid'])), GEOJSON_IMT) + + @pytest.mark.ckan_config("ckanext.dcat.output_spatial_format", "wkt geojson") + def test_spatial_two_formats_legacy(self): + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'extras': [ + {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'}, + {'key': 'spatial_text', 'value': 'Tarragona'}, + {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'}, + {'key': 'spatial_bbox', 'value': '{"type": "Polygon", "coordinates": [[[2.1870606,42.0786393],[2.1870606,42.1655218],[2.3752339,42.1655218],[2.3752339,42.0786393],[2.1870606,42.0786393]]]}'}, + {'key': 'spatial_centroid', 'value': '{"type": "Point", "coordinates": [2.28114725,42.12208055]}'}, + + ] + } + extras = self._extras(dataset) + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] + assert spatial + assert str(spatial) == extras['spatial_uri'] + assert self._triple(g, spatial, RDF.type, DCT.Location) + assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text']) + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 2 assert len([t for t in g.triples((spatial, DCAT.bbox, None))]) == 2 assert len([t for t in g.triples((spatial, DCAT.centroid, None))]) == 2 - # Geometry in GeoJSON - assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) - assert self._triple(g, spatial, DCAT.bbox, extras['spatial_bbox'], GEOJSON_IMT) - assert self._triple(g, spatial, DCAT.centroid, extras['spatial_centroid'], GEOJSON_IMT) + + # Geometry in GeoJSON (load and dump to match the formatting) + assert self._triple(g, spatial, LOCN.geometry, json.dumps(json.loads(extras['spatial'])), GEOJSON_IMT) + assert self._triple(g, spatial, DCAT.bbox, json.dumps(json.loads(extras['spatial_bbox'])), GEOJSON_IMT) + assert self._triple(g, spatial, DCAT.centroid, json.dumps(json.loads(extras['spatial_centroid'])), GEOJSON_IMT) # Geometry in WKT wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4) @@ -204,7 +280,7 @@ def test_spatial(self): wkt_cent = wkt.dumps(json.loads(extras['spatial_centroid']), decimals=4) assert self._triple(g, spatial, DCAT.centroid, wkt_cent, GSP.wktLiteral) - def test_spatial_bad_geojson_no_wkt(self): + def test_spatial_bad_geojson_no_location(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', @@ -215,7 +291,6 @@ def test_spatial_bad_geojson_no_wkt(self): ] } - extras = self._extras(dataset) s = RDFSerializer(profiles=DCAT_AP_PROFILES) g = s.g @@ -225,18 +300,13 @@ def test_spatial_bad_geojson_no_wkt(self): spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial assert isinstance(spatial, BNode) - # Geometry in GeoJSON - assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) - assert self._triple(g, spatial, LOCN.geometry, extras['spatial_bbox'], GEOJSON_IMT) - assert self._triple(g, spatial, LOCN.geometry, extras['spatial_centroid'], GEOJSON_IMT) - # Geometry in WKT - assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1 - assert len([t for t in g.triples((spatial, DCAT.bbox, None))]) == 1 - assert len([t for t in g.triples((spatial, DCAT.centroid, None))]) == 1 + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 0 + assert len([t for t in g.triples((spatial, DCAT.bbox, None))]) == 0 + assert len([t for t in g.triples((spatial, DCAT.centroid, None))]) == 0 - def test_spatial_bad_json_no_wkt(self): + def test_spatial_bad_json_no_location(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', @@ -247,7 +317,6 @@ def test_spatial_bad_json_no_wkt(self): ] } - extras = self._extras(dataset) s = RDFSerializer(profiles=DCAT_AP_PROFILES) g = s.g @@ -257,20 +326,14 @@ def test_spatial_bad_json_no_wkt(self): spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial assert isinstance(spatial, BNode) - # Geometry in GeoJSON - assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) - assert self._triple(g, spatial, LOCN.geometry, extras['spatial_bbox'], GEOJSON_IMT) - assert self._triple(g, spatial, LOCN.geometry, extras['spatial_centroid'], GEOJSON_IMT) - # No Geometry in WKT, only one single triple for GeoJSON - assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1 - # Always only one single triple - assert len([t for t in g.triples((spatial, DCAT.bbox, None))]) == 1 - assert len([t for t in g.triples((spatial, DCAT.centroid, None))]) == 1 + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 0 + assert len([t for t in g.triples((spatial, DCAT.bbox, None))]) == 0 + assert len([t for t in g.triples((spatial, DCAT.centroid, None))]) == 0 def test_temporal(self): """ - Tests that the DCAT date properties are included in the graph in addition to schema.org dates. + Tests that the DCAT date properties are included in the graph """ dataset = { @@ -288,24 +351,13 @@ def test_temporal(self): dataset_ref = s.graph_from_dataset(dataset) - temporals = self._triples(g, dataset_ref, DCT.temporal, None) - assert temporals - assert len(temporals) == 2 - - assert len([self._triple(g, temporal[2] , RDF.type, DCT.PeriodOfTime) for temporal in temporals]) == 2 - - temporal_obj_list = [temporal[2] for temporal in temporals] - for predicate in [SCHEMA.startDate, DCAT.startDate]: - triples = [] - for temporal_obj in temporal_obj_list: - triples.extend(self._triples(g, temporal_obj, predicate, extras['temporal_start'], XSD.dateTime)) - assert len(triples) == 1 - - for predicate in [SCHEMA.endDate, DCAT.endDate]: - triples = [] - for temporal_obj in temporal_obj_list: - triples.extend(self._triples(g, temporal_obj, predicate, extras['temporal_end'], XSD.date)) - assert len(triples) == 1 + temporal = self._triples(g, dataset_ref, DCT.temporal, None) + assert temporal + assert len(temporal) == 1 + temporal_ref = temporal[0][2] + assert self._triple(g, temporal_ref, RDF.type, DCT.PeriodOfTime) + assert self._triple(g, temporal_ref, DCAT.startDate, extras['temporal_start'], XSD.dateTime) + assert self._triple(g, temporal_ref, DCAT.endDate, extras['temporal_end'], XSD.date) def test_high_value_datasets(self): """ diff --git a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py index edec0c5a..73229253 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py @@ -2,6 +2,7 @@ from builtins import object import json import uuid +from decimal import Decimal import pytest @@ -530,9 +531,7 @@ def test_spatial(self): assert self._triple(g, spatial, RDF.type, DCT.Location) assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text']) - assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 2 - # Geometry in GeoJSON - assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1 # Geometry in WKT wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4) @@ -557,11 +556,7 @@ def test_spatial_bad_geojson_no_wkt(self): spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial assert isinstance(spatial, BNode) - # Geometry in GeoJSON - assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) - - # Geometry in WKT - assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1 + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 0 def test_spatial_bad_json_no_wkt(self): dataset = { @@ -582,11 +577,8 @@ def test_spatial_bad_json_no_wkt(self): spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial assert isinstance(spatial, BNode) - # Geometry in GeoJSON - assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) - # Geometry in WKT - assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1 + assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 0 def test_distributions(self): @@ -702,7 +694,7 @@ def test_distribution_fields(self): assert self._triple(g, distribution, DCT.modified, resource['modified'], XSD.dateTime) # Numbers - assert self._triple(g, distribution, DCAT.byteSize, float(resource['size']), XSD.decimal) + assert self._triple(g, distribution, DCAT.byteSize, Decimal(resource['size']), XSD.decimal) # Checksum checksum = self._triple(g, distribution, SPDX.checksum, None)[2] diff --git a/ckanext/dcat/tests/test_scheming_support.py b/ckanext/dcat/tests/test_scheming_support.py index 5c597dce..fc9ad9bf 100644 --- a/ckanext/dcat/tests/test_scheming_support.py +++ b/ckanext/dcat/tests/test_scheming_support.py @@ -1,5 +1,6 @@ from unittest import mock import json +from decimal import Decimal import pytest from rdflib.namespace import RDF @@ -19,7 +20,6 @@ XSD, VCARD, FOAF, - SCHEMA, SKOS, LOCN, GSP, @@ -101,7 +101,7 @@ def test_e2e_ckan_to_dcat(self): {"start": "1905-03-01", "end": "2013-01-05"}, {"start": "2024-04-10", "end": "2024-05-29"}, ], - "temporal_resolution": ["PT15M", "P1D"], + "temporal_resolution": "PT15M", "spatial_coverage": [ { "geom": { @@ -133,7 +133,7 @@ def test_e2e_ckan_to_dcat(self): "centroid": {"type": "Point", "coordinates": [1.26639, 41.12386]}, } ], - "spatial_resolution_in_meters": [1.5, 2.0], + "spatial_resolution_in_meters": 1.5, "resources": [ { "name": "Resource 1", @@ -195,6 +195,13 @@ def test_e2e_ckan_to_dcat(self): assert self._triple(g, dataset_ref, DCT.type, dataset["dcat_type"]) assert self._triple(g, dataset_ref, ADMS.versionNotes, dataset["version_notes"]) assert self._triple(g, dataset_ref, DCT.accessRights, dataset["access_rights"]) + assert self._triple( + g, + dataset_ref, + DCAT.spatialResolutionInMeters, + dataset["spatial_resolution_in_meters"], + data_type=XSD.decimal, + ) # Dates assert self._triple( @@ -211,6 +218,13 @@ def test_e2e_ckan_to_dcat(self): dataset["modified"], data_type=XSD.date, ) + assert self._triple( + g, + dataset_ref, + DCAT.temporalResolution, + dataset["temporal_resolution"], + data_type=XSD.duration, + ) # List fields @@ -231,10 +245,6 @@ def test_e2e_ckan_to_dcat(self): self._triples_list_values(g, dataset_ref, FOAF.page) == dataset["documentation"] ) - assert ( - self._triples_list_values(g, dataset_ref, DCAT.temporalResolution) - == dataset["temporal_resolution"] - ) assert ( self._triples_list_values(g, dataset_ref, DCT.isReferencedBy) == dataset["is_referenced_by"] @@ -244,13 +254,6 @@ def test_e2e_ckan_to_dcat(self): == dataset["applicable_legislation"] ) - assert ( - self._triples_list_python_values( - g, dataset_ref, DCAT.spatialResolutionInMeters - ) - == dataset["spatial_resolution_in_meters"] - ) - # Repeating subfields contact_details = [t for t in g.triples((dataset_ref, DCAT.contactPoint, None))] @@ -306,28 +309,28 @@ def test_e2e_ckan_to_dcat(self): assert self._triple( g, temporal[0][2], - SCHEMA.startDate, + DCAT.startDate, dataset_dict["temporal_coverage"][0]["start"], data_type=XSD.date, ) assert self._triple( g, temporal[0][2], - SCHEMA.endDate, + DCAT.endDate, dataset_dict["temporal_coverage"][0]["end"], data_type=XSD.date, ) assert self._triple( g, temporal[1][2], - SCHEMA.startDate, + DCAT.startDate, dataset_dict["temporal_coverage"][1]["start"], data_type=XSD.date, ) assert self._triple( g, temporal[1][2], - SCHEMA.endDate, + DCAT.endDate, dataset_dict["temporal_coverage"][1]["end"], data_type=XSD.date, ) @@ -340,15 +343,7 @@ def test_e2e_ckan_to_dcat(self): g, spatial[0][2], SKOS.prefLabel, dataset["spatial_coverage"][0]["text"] ) - assert len([t for t in g.triples((spatial[0][2], LOCN.geometry, None))]) == 2 - # Geometry in GeoJSON - assert self._triple( - g, - spatial[0][2], - LOCN.geometry, - dataset["spatial_coverage"][0]["geom"], - GEOJSON_IMT, - ) + assert len([t for t in g.triples((spatial[0][2], LOCN.geometry, None))]) == 1 # Geometry in WKT wkt_geom = wkt.dumps(dataset["spatial_coverage"][0]["geom"], decimals=4) assert self._triple(g, spatial[0][2], LOCN.geometry, wkt_geom, GSP.wktLiteral) @@ -404,7 +399,7 @@ def test_e2e_ckan_to_dcat(self): ) assert self._triple( - g, distribution_ref, DCAT.byteSize, float(resource["size"]), XSD.decimal + g, distribution_ref, DCAT.byteSize, Decimal(resource["size"]), XSD.decimal ) # Checksum checksum = self._triple(g, distribution_ref, SPDX.checksum, None)[2] @@ -621,7 +616,7 @@ def test_dcat_date(self): assert self._triple( g, temporal[0][2], - SCHEMA.endDate, + DCAT.endDate, dataset_dict["temporal_coverage"][0]["end"], data_type=XSD.date, ) @@ -634,7 +629,7 @@ def test_dcat_date(self): assert self._triple( g, temporal[0][2], - SCHEMA.startDate, + DCAT.startDate, dataset_dict["temporal_coverage"][0]["start"], data_type=XSD.dateTime, ) @@ -646,7 +641,7 @@ def test_dcat_date(self): assert self._triple( g, temporal[1][2], - SCHEMA.startDate, + DCAT.startDate, dataset_dict["temporal_coverage"][1]["start"], data_type=XSD.dateTime, ) @@ -659,7 +654,7 @@ def test_dcat_date(self): assert self._triple( g, temporal[2][2], - SCHEMA.startDate, + DCAT.startDate, "2024-11-24T00:00:00", data_type=XSD.dateTime, ) @@ -670,7 +665,7 @@ def test_dcat_date(self): assert self._triple( g, temporal[2][2], - SCHEMA.endDate, + DCAT.endDate, "2012-06-12T00:00:00", data_type=XSD.dateTime, ) @@ -767,6 +762,8 @@ def test_e2e_dcat_to_ckan(self): assert dataset["issued"] == u"2012-05-10" assert dataset["modified"] == u"2012-05-10T21:04:00" + assert dataset["temporal_resolution"] == "PT15M" + assert dataset["spatial_resolution_in_meters"] == "1.5" # List fields assert sorted(dataset["conforms_to"]) == ["Standard 1", "Standard 2"] @@ -784,14 +781,7 @@ def test_e2e_dcat_to_ckan(self): "http://dataset.info.org/doc1", "http://dataset.info.org/doc2", ] - assert sorted(dataset["temporal_resolution"]) == [ - "P1D", - "PT15M", - ] - assert sorted(dataset["spatial_resolution_in_meters"]) == [ - 1.5, - 2.0, - ] + assert sorted(dataset["is_referenced_by"]) == [ "https://doi.org/10.1038/sdata.2018.22", "test_isreferencedby", diff --git a/ckanext/dcat/tests/test_shacl.py b/ckanext/dcat/tests/test_shacl.py new file mode 100644 index 00000000..dd445fdd --- /dev/null +++ b/ckanext/dcat/tests/test_shacl.py @@ -0,0 +1,114 @@ +import json +import os +from random import randrange + +from pyshacl import validate +import pytest + +from ckan.tests.helpers import call_action + +from ckanext.dcat.processors import RDFSerializer +from ckanext.dcat.tests.utils import get_file_contents + + +def _get_shacl_file_path(file_name): + return os.path.join(os.path.dirname(__file__), "shacl", file_name) + + +generated_graphs = {} + + +def graph_from_dataset(file_name): + global generated_graphs + + if not generated_graphs.get(file_name): + if not file_name.startswith("ckan/"): + file_name = "ckan/" + file_name + dataset_dict = json.loads(get_file_contents(file_name)) + dataset_dict["name"] += "-" + str(randrange(0, 1000)) + dataset = call_action("package_create", **dataset_dict) + + s = RDFSerializer() + s.graph_from_dataset(dataset) + + generated_graphs[file_name] = s.g + + return generated_graphs[file_name] + + +@pytest.mark.usefixtures("with_plugins") +@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") +@pytest.mark.ckan_config( + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" +) +@pytest.mark.ckan_config( + "scheming.presets", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", +) +@pytest.mark.ckan_config( + "ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming" +) +def test_validate_dcat_ap_2_graph_shapes(): + + graph = graph_from_dataset("ckan_full_dataset_dcat_ap_2.json") + + # dcat-ap_2.1.1_shacl_shapes.ttl: constraints concerning existance, domain and + # literal range, and cardinalities. + path = _get_shacl_file_path("dcat-ap_2.1.1_shacl_shapes.ttl") + r = validate(graph, shacl_graph=path) + conforms, results_graph, results_text = r + assert conforms, results_text + + +@pytest.mark.usefixtures("with_plugins") +@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") +@pytest.mark.ckan_config( + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1_full.yaml" +) +@pytest.mark.ckan_config( + "scheming.presets", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", +) +@pytest.mark.ckan_config( + "ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming" +) +def test_validate_dcat_ap_2_graph_shapes_recommended(): + + graph = graph_from_dataset("ckan_full_dataset_dcat_ap_2.json") + + # dcat-ap_2.1.1_shacl_shapes_recommended.ttl: constraints concerning existance + # of recommended properties. + path = _get_shacl_file_path("dcat-ap_2.1.1_shacl_shapes_recommended.ttl") + r = validate(graph, shacl_graph=path) + conforms, results_graph, results_text = r + assert conforms, results_text + + +@pytest.mark.usefixtures("with_plugins") +@pytest.mark.ckan_config("ckan.plugins", "dcat") +@pytest.mark.ckan_config("ckanext.dcat.rdf.profiles", "euro_dcat_ap_2") +def test_validate_dcat_ap_2_legacy_graph_shapes(): + + graph = graph_from_dataset("ckan_full_dataset_dcat_ap_2_legacy.json") + + # dcat-ap_2.1.1_shacl_shapes.ttl: constraints concerning existance, domain and + # literal range, and cardinalities. + path = _get_shacl_file_path("dcat-ap_2.1.1_shacl_shapes.ttl") + r = validate(graph, shacl_graph=path) + conforms, results_graph, results_text = r + assert conforms, results_text + + +@pytest.mark.usefixtures("with_plugins") +@pytest.mark.ckan_config("ckan.plugins", "dcat") +@pytest.mark.ckan_config("ckanext.dcat.rdf.profiles", "euro_dcat_ap_2") +def test_validate_dcat_ap_2_legacy_graph_shapes_recommended(): + + graph = graph_from_dataset("ckan_full_dataset_dcat_ap_2_legacy.json") + + # dcat-ap_2.1.1_shacl_shapes_recommended.ttl: constraints concerning existance + # of recommended properties. + path = _get_shacl_file_path("dcat-ap_2.1.1_shacl_shapes_recommended.ttl") + r = validate(graph, shacl_graph=path) + conforms, results_graph, results_text = r + assert conforms, results_text diff --git a/dev-requirements.txt b/dev-requirements.txt index 5cd11822..56aefb67 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,5 @@ responses>=0.25.2 +pyshacl mock pytest-ckan pytest-cov diff --git a/examples/ckan/ckan_full_dataset_dcat_ap_2.json b/examples/ckan/ckan_full_dataset_dcat_ap_2.json new file mode 100644 index 00000000..381804a1 --- /dev/null +++ b/examples/ckan/ckan_full_dataset_dcat_ap_2.json @@ -0,0 +1,185 @@ +{ + "name": "test-dataset-shacl", + "title": "Test DCAT dataset", + "notes": "Lorem ipsum", + "url": "http://example.org/ds1", + "version": "1.0b", + "tags": [ + { + "name": "Tag 1" + }, + { + "name": "Tag 2" + } + ], + "issued": "2024-05-01", + "modified": "2024-05-05", + "identifier": "xx-some-dataset-id-yy", + "frequency": "monthly", + "provenance": "Statement about provenance", + "dcat_type": "test-type", + "version_notes": "Some version notes", + "access_rights": "Statement about access rights", + "alternate_identifier": [ + "alt-id-1", + "alt-id-2" + ], + "theme": [ + "https://example.org/uri/theme1", + "https://example.org/uri/theme2", + "https://example.org/uri/theme3" + ], + "language": [ + "en", + "ca", + "es" + ], + "documentation": [ + "https://example.org/some-doc.html" + ], + "conforms_to": [ + "Standard 1", + "Standard 2" + ], + "is_referenced_by": [ + "https://doi.org/10.1038/sdata.2018.22", + "test_isreferencedby" + ], + "applicable_legislation": [ + "http://data.europa.eu/eli/reg_impl/2023/138/oj", + "http://data.europa.eu/eli/reg_impl/2023/138/oj_alt" + ], + "contact": [ + { + "name": "Contact 1", + "email": "contact1@example.org" + }, + { + "name": "Contact 2", + "email": "contact2@example.org" + } + ], + "publisher": [ + { + "name": "Test Publisher", + "email": "publisher@example.org", + "url": "https://example.org", + "type": "public_body" + } + ], + "temporal_coverage": [ + { + "start": "1905-03-01", + "end": "2013-01-05" + } + ], + "temporal_resolution": "PT15M", + "spatial_coverage": [ + { + "geom": { + "type": "Polygon", + "coordinates": [ + [ + [ + 11.9936, + 54.0486 + ], + [ + 11.9936, + 54.2466 + ], + [ + 12.3045, + 54.2466 + ], + [ + 12.3045, + 54.0486 + ], + [ + 11.9936, + 54.0486 + ] + ] + ] + }, + "text": "Tarragona", + "uri": "https://sws.geonames.org/6361390/", + "bbox": { + "type": "Polygon", + "coordinates": [ + [ + [ + -2.1604, + 42.7611 + ], + [ + -2.0938, + 42.7611 + ], + [ + -2.0938, + 42.7931 + ], + [ + -2.1604, + 42.7931 + ], + [ + -2.1604, + 42.7611 + ] + ] + ] + }, + "centroid": { + "type": "Point", + "coordinates": [ + 1.26639, + 41.12386 + ] + } + } + ], + "spatial_resolution_in_meters": 1.5, + "resources": [ + { + "name": "Resource 1", + "description": "Some description", + "url": "https://example.com/data.csv", + "format": "CSV", + "availability": "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL", + "compress_format": "http://www.iana.org/assignments/media-types/application/gzip", + "package_format": "http://publications.europa.eu/resource/authority/file-type/TAR", + "size": 12323, + "hash": "4304cf2e751e6053c90b1804c89c0ebb758f395a", + "hash_algorithm": "http://spdx.org/rdf/terms#checksumAlgorithm_sha1", + "status": "http://purl.org/adms/status/Completed", + "access_url": "https://example.com/data.csv", + "download_url": "https://example.com/data.csv", + "issued": "2024-05-01T01:20:33", + "modified": "2024-05-05T09:33:20", + "license": "http://creativecommons.org/licenses/by/3.0/", + "rights": "Some stament about rights", + "language": [ + "en", + "ca", + "es" + ], + "access_services": [ + { + "title": "Access Service 1", + "endpoint_description": "https://example.org/endpoint_description", + "endpoint_url": [ + "https://example.org/access_service/1", + "https://example.org/access_service/2" + ], + "serves_dataset": [ + "https://example.org/dataset/1", + "https://example.org/dataset/2" + ] + } + ] + } + ] +} diff --git a/examples/ckan/ckan_full_dataset_dcat_ap_2_legacy.json b/examples/ckan/ckan_full_dataset_dcat_ap_2_legacy.json new file mode 100644 index 00000000..358b2c40 --- /dev/null +++ b/examples/ckan/ckan_full_dataset_dcat_ap_2_legacy.json @@ -0,0 +1,160 @@ +{ + "name": "test-dataset-shacl", + "title": "Test DCAT dataset", + "notes": "Lorem ipsum", + "url": "http://example.org/ds1", + "version": "1.0b", + "tags": [ + { + "name": "Tag 1" + }, + { + "name": "Tag 2" + } + ], + "extras": [ + { + "key": "issued", + "value": "2024-05-01" + }, + { + "key": "modified", + "value": "2024-05-05" + }, + { + "key": "identifier", + "value": "xx-some-dataset-id-yy" + }, + { + "key": "frequency", + "value": "monthly" + }, + { + "key": "provenance", + "value": "Statement about provenance" + }, + { + "key": "dcat_type", + "value": "test-type" + }, + { + "key": "version_notes", + "value": "Some version notes" + }, + { + "key": "access_rights", + "value": "Statement about access rights" + }, + { + "key": "alternate_identifier", + "value": "[\"alt-id-1\",\"alt-id-2\"]" + }, + { + "key": "theme", + "value": "[\"https://example.org/uri/theme1\",\"https://example.org/uri/theme2\",\"https://example.org/uri/theme3\"]" + }, + { + "key": "language", + "value": "[\"en\",\"ca\",\"es\"]" + }, + { + "key": "documentation", + "value": "[\"https://example.org/some-doc.html\"]" + }, + { + "key": "conforms_to", + "value": "[\"Standard1\",\"Standard2\"]" + }, + { + "key": "is_referenced_by", + "value": "[\"https://doi.org/10.1038/sdata.2018.22\",\"test_isreferencedby\"]" + }, + { + "key": "applicable_legislation", + "value": "[\"http://data.europa.eu/eli/reg_impl/2023/138/oj\",\"http://data.europa.eu/eli/reg_impl/2023/138/oj_alt\"]" + }, + { + "key": "contact_name", + "value": "Contact 1" + }, + { + "key": "contact_email", + "value": "contact1@example.org" + }, + { + "key": "publisher_name", + "value": "Test Publisher" + }, + { + "key": "publisher_email", + "value": "publisher@example.org" + }, + { + "key": "publisher_url", + "value": "https://example.org" + }, + { + "key": "publisher_type", + "value": "public_body" + }, + { + "key": "temporal_start", + "value": "1905-03-01" + }, + { + "key": "temporal_end", + "value": "2013-01-05" + }, + { + "key": "temporal_resolution", + "value": "PT15M" + }, + { + "key": "spatial", + "value": "{\"type\":\"Polygon\",\"coordinates\":[[[11.9936,54.0486],[11.9936,54.2466],[12.3045,54.2466],[12.3045,54.0486],[11.9936,54.0486]]]}" + }, + { + "key": "spatial_text", + "value": "Tarragona" + }, + { + "key": "spatial_uri", + "value": "https://sws.geonames.org/6361390/" + }, + { + "key": "spatial_bbox", + "value": "{\"type\":\"Polygon\",\"coordinates\":[[[11.9936,54.0486],[11.9936,54.2466],[12.3045,54.2466],[12.3045,54.0486],[11.9936,54.0486]]]}" + }, + { + "key": "spatial_centroid", + "value": "{\"type\":\"Point\",\"coordinates\":[1.26639,41.12386]}" + }, + { + "key": "spatial_resolution_in_meters", + "value": 1.5 + } + ], + "resources": [ + { + "name": "Resource 1", + "description": "Some description", + "url": "https://example.com/data.csv", + "format": "CSV", + "availability": "http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL", + "compress_format": "http://www.iana.org/assignments/media-types/application/gzip", + "package_format": "http://publications.europa.eu/resource/authority/file-type/TAR", + "size": 12323, + "hash": "4304cf2e751e6053c90b1804c89c0ebb758f395a", + "hash_algorithm": "http://spdx.org/rdf/terms#checksumAlgorithm_sha1", + "status": "http://purl.org/adms/status/Completed", + "access_url": "https://example.com/data.csv", + "download_url": "https://example.com/data.csv", + "issued": "2024-05-01T01:20:33", + "modified": "2024-05-05T09:33:20", + "license": "http://creativecommons.org/licenses/by/3.0/", + "rights": "Some stament about rights", + "language": "[\"en\",\"ca\",\"es\"]", + "access_services": "[{\"title\": \"Access Service 1\", \"endpoint_description\": \"https://example.org/endpoint_description\", \"endpoint_url\": [\"https://example.org/access_service/1\", \"https://example.org/access_service/2\"], \"serves_dataset\": [\"https://example.org/dataset/1\", \"https://example.org/dataset/2\"]}]" + } + ] +} diff --git a/examples/dcat/dataset.rdf b/examples/dcat/dataset.rdf index f7db02db..b2f925c8 100644 --- a/examples/dcat/dataset.rdf +++ b/examples/dcat/dataset.rdf @@ -38,7 +38,6 @@ 1.5 - 2.0 public @@ -64,7 +63,6 @@ PT15M - P1D Point of Contact