From 8c1dd8522fc0144fc25cc694557033e5a45efc27 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 13 Apr 2022 10:17:22 +0200 Subject: [PATCH 01/26] add modularized validation --- .../emodul/emodul_validation.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py diff --git a/usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py b/usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py new file mode 100644 index 000000000..f9be06ff0 --- /dev/null +++ b/usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py @@ -0,0 +1,78 @@ +from pyshacl import validate +from rdflib import Graph, URIRef, Namespace +from rdflib.util import guess_format +from rdflib.namespace import SH, RDF + +""" +baseDir0 = Path(__file__).resolve().parents[0] +baseDir1 = Path(__file__).resolve().parents[1] +baseDir2 = Path(__file__).resolve().parents[2] +ontologyPath = os.path.join(baseDir2,'ConcreteOntology') +metadataPath = os.path.join(baseDir0,'E-modul-processed-data/emodul_metadata.csv') +graphPath = os.path.join(baseDir0,'E-modul-processed-data/EM_Graph.ttl') +processedDataPath = os.path.join(baseDir0,'E-modul-processed-data') +""" + +SCHEMA = Namespace('http://schema.org/') + +""" +Given a path to a shacl shape and a path to an rdf file, this function tests the rdf data against the specified shacl shapes. +The result is an rdflib graph containing the validation report, if it is empty the validation was successful. +""" +def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: + + conforms, result_graph, _ = validate( + rdf_graph, + shapes_graph, + ont_graph=None, # can use a Web URL for a graph containing extra ontological information + inference='none', + abort_on_first=False, + allow_infos=False, + allow_warnings=False, + meta_shacl=False, + advanced=False, + js=False, + debug=False) + + # only add other graphs if any violations occurred + if not conforms: + # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes + result_graph += shapes_graph + result_graph += rdf_graph + + return result_graph + +""" +Returns true if the given shape is violated in the report. +""" +def violates_shape(validation_report: Graph, shape: URIRef) -> bool: + + # get the class that is targeted by the specified shape + target_class = validation_report.value(shape, SH.targetClass, None, any=False) + if target_class is None: + raise ValueError(f'The shapes graph does not contain a {shape} shape.') + + + # get all classes that have been violated + # check if any of the violated classes is the class that is targeted by the specified shape + for o in validation_report.objects(None, SH.focusNode): + if target_class in validation_report.objects(o, RDF.type): + return True + + # no violated class is targeted by the specified shape, thus the shape is not violated + return False + +""" +Reads a graph from a file into a Graph object. +""" +def read_graph_from_file(filepath: str) -> Graph: + with open(filepath, 'r') as f: + graph = Graph() + graph.parse(file=f, format=guess_format(filepath)) + return graph + + +# assert that certain violations occurred / did not occur: +# assert violates_shape(g, SCHEMA.InformationBearingEntityShape) +# assert not violates_shape(g, SCHEMA.InformationBearingEntityShape) + From d181201f11acb9eebb2107d0b07d8d78739315c5 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 13 Apr 2022 10:19:26 +0200 Subject: [PATCH 02/26] add shape to test --- usecases/Concrete/shape.ttl | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 usecases/Concrete/shape.ttl diff --git a/usecases/Concrete/shape.ttl b/usecases/Concrete/shape.ttl new file mode 100644 index 000000000..95d6e3e58 --- /dev/null +++ b/usecases/Concrete/shape.ttl @@ -0,0 +1,83 @@ +@prefix dash: . +@prefix schema: . +@prefix sh: . +@prefix ns1: . +@prefix ns2: . +@prefix ns3: . +@prefix ns4: . +@prefix ns5: . +@prefix ns6: . +@prefix ns7: . +@prefix ns8: . +@prefix ns9: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix mseo: . +@prefix cco: . +@prefix obo: . +@prefix con: . +@prefix tt: . + +# for a documentation of shacl see https://www.w3.org/TR/shacl/ + +# this shape tests properties about the diameter +schema:SpecimenDiameterShape + a sh:NodeShape ; + sh:targetClass cco:Diameter ; + # there is exactly one InformationBearingEntity attached to the Diameter + sh:property [ + sh:path obo:RO_0010001 ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class cco:InformationBearingEntity ; + ] ; + # the InformationBearingEntity has a decimal value + sh:property [ + sh:path (obo:RO_0010001 cco:has_decimal_value) ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:dataytpe xsd:decimal ; + ] ; + # the InformationBearingEntity has a measurement unit specified + sh:property [ + sh:path (obo:RO_0010001 cco:uses_measurement_unit) ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class cco:MeasurementUnitOfLength ; + ] . + +# this shape tests properties about the Specimen +schema:SpecimenShape + a sh:NodeShape ; + sh:targetClass mseo:Specimen ; + # the Specimen is input of one (and only one) YoungsModulusTest + sh:property [ + sh:path cco:is_input_of ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:node tt:YoungsModulusTest ; + ] ; + # the Specimen has a quality (shape) attached to it + sh:property [ + sh:path (obo:BFO_0000051 obo:RO_0000086) ; + sh:minCount 1 ; + sh:maxCount 1 ; + ] ; + # the Specimen is designated by one Identifyier + sh:property [ + sh:path cco:designated_by ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class cco:NonNameIdentifier ; + ] . + +# THIS IS A SHAPE THAT MUST FAIL! +schema:InformationBearingEntityShape + a sh:NodeShape ; + sh:targetClass cco:InformationBearingEntity ; + # each InformationBearingEntity must use at least two measurement units (nonsensical) + sh:property [ + sh:path cco:uses_measurement_unit ; + sh:minCount 2 ; + ] . \ No newline at end of file From fc37a5e9ee49edb9daf9be767945a72bdc02db4b Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 13 Apr 2022 10:24:20 +0200 Subject: [PATCH 03/26] add old dodo.py code from ont_dev branch (have to update) --- usecases/Concrete/dodo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/usecases/Concrete/dodo.py b/usecases/Concrete/dodo.py index 50be66167..02e3d37c1 100644 --- a/usecases/Concrete/dodo.py +++ b/usecases/Concrete/dodo.py @@ -51,6 +51,10 @@ def task_emodul(): 'basename': 'map emodul ontology and metadata', 'actions': ['python knowledgeGraph/emodul/emodul_mapping.py'] } + yield { + 'basename': 'validate rdf files against shacl shape', + 'actions': ['python knowledgeGraph/emodul/emodul_validation.py'] + } yield { 'basename': 'run emodul query script', 'actions': ['python knowledgeGraph/emodul/emodul_query.py'] From 68aed7989767d2bbc0f8f91933a0c84dccb3ef30 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 13 Apr 2022 10:26:45 +0200 Subject: [PATCH 04/26] update requirements and environment --- environment.yml | 2 ++ usecases/Concrete/knowledgeGraph/requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 3dd6a1925..d3a6bbbc1 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,8 @@ dependencies: - pyyaml - mkl - mkl-service + - rdflib + - pyshacl # - flake8 # - sphinx # - sphinx_rtd_theme diff --git a/usecases/Concrete/knowledgeGraph/requirements.txt b/usecases/Concrete/knowledgeGraph/requirements.txt index 865c5f501..5406faae8 100644 --- a/usecases/Concrete/knowledgeGraph/requirements.txt +++ b/usecases/Concrete/knowledgeGraph/requirements.txt @@ -2,8 +2,9 @@ Cython==0.29.15 numpy==1.19.2 pandas==0.25.1 Owlready2==0.33 -rdflib==5.0.0 +rdflib==6.1.1 SPARQLWrapper==1.8.5 requests==2.22.0 GitPython==3.1.24 probeye==1.0.6 +pyshacl==0.9.5 From f49e04025c3174d65f1014612e8bd59b563bf29a Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 3 May 2022 10:27:07 +0200 Subject: [PATCH 05/26] add __init__.py for modularization of code --- usecases/Concrete/dodo.py | 20 ++++++++++++++++--- .../knowledgeGraph/emodul/__init__.py | 0 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 usecases/Concrete/knowledgeGraph/emodul/__init__.py diff --git a/usecases/Concrete/dodo.py b/usecases/Concrete/dodo.py index 02e3d37c1..ad46a73fd 100644 --- a/usecases/Concrete/dodo.py +++ b/usecases/Concrete/dodo.py @@ -1,17 +1,30 @@ +import graphlib import os from pathlib import Path +from emodul import emodul_validation as validation baseDir = Path(__file__).resolve().parents[0] -emodulFolder = os.path.join(os.path.join(os.path.join(baseDir,'knowledgeGraph'),'emodul'),'E-modul-processed-data') +emodulFolder = os.path.join(os.path.join(baseDir,'knowledgeGraph'),'emodul') emodulRawdataFolder = os.path.join(emodulFolder,'rawdata') -emodulProcesseddataFolder = os.path.join(emodulFolder,'processeddata') +emodulProcesseddataFolder = os.path.join(emodulFolder,'E-modul-processed-data') compressionFolder = os.path.join(os.path.join(os.path.join(baseDir,'knowledgeGraph'),'compression'),'compression-processed-data') compressionRawdataFolder = os.path.join(compressionFolder,'rawdata') compressionProcesseddataFolder = os.path.join(compressionFolder,'processeddata') +graph_path = os.path.join(emodulProcesseddataFolder, 'EM_Graph.ttl') +shapes_path = os.path.join(os.path.join(baseDir, 'knowledgeGraph'), 'shape.ttl') + DOIT_CONFIG = {'verbosity': 2} +def validate_graph(graph_path, shapes_path): + g = validation.read_graph_from_file(graph_path) + s = validation.read_graph_from_file(shapes_path) + r = validation.test_graph(g, s) + assert validation.violates_shape(r, validation.SCHEMA.InformationBearingEntityShape) + assert not validation.violates_shape(r, validation.SCHEMA.SpecimenDiameterShape) + assert not validation.violates_shape(r, validation.SCHEMA.SpecimenShape) + def task_installation(): yield { 'basename': 'install python packages', @@ -53,7 +66,8 @@ def task_emodul(): } yield { 'basename': 'validate rdf files against shacl shape', - 'actions': ['python knowledgeGraph/emodul/emodul_validation.py'] + 'actions': [(validate_graph, [graph_path, shapes_path])], + 'file_dep': [graph_path] } yield { 'basename': 'run emodul query script', diff --git a/usecases/Concrete/knowledgeGraph/emodul/__init__.py b/usecases/Concrete/knowledgeGraph/emodul/__init__.py new file mode 100644 index 000000000..e69de29bb From 644a4c9d1bc035b80b13222a0487653b3bc7b789 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 3 May 2022 10:40:25 +0200 Subject: [PATCH 06/26] rename emodul_validation.py to validation.py to imporve python modularization nameing schema --- .../knowledgeGraph/emodul/validation.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 usecases/Concrete/knowledgeGraph/emodul/validation.py diff --git a/usecases/Concrete/knowledgeGraph/emodul/validation.py b/usecases/Concrete/knowledgeGraph/emodul/validation.py new file mode 100644 index 000000000..f9be06ff0 --- /dev/null +++ b/usecases/Concrete/knowledgeGraph/emodul/validation.py @@ -0,0 +1,78 @@ +from pyshacl import validate +from rdflib import Graph, URIRef, Namespace +from rdflib.util import guess_format +from rdflib.namespace import SH, RDF + +""" +baseDir0 = Path(__file__).resolve().parents[0] +baseDir1 = Path(__file__).resolve().parents[1] +baseDir2 = Path(__file__).resolve().parents[2] +ontologyPath = os.path.join(baseDir2,'ConcreteOntology') +metadataPath = os.path.join(baseDir0,'E-modul-processed-data/emodul_metadata.csv') +graphPath = os.path.join(baseDir0,'E-modul-processed-data/EM_Graph.ttl') +processedDataPath = os.path.join(baseDir0,'E-modul-processed-data') +""" + +SCHEMA = Namespace('http://schema.org/') + +""" +Given a path to a shacl shape and a path to an rdf file, this function tests the rdf data against the specified shacl shapes. +The result is an rdflib graph containing the validation report, if it is empty the validation was successful. +""" +def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: + + conforms, result_graph, _ = validate( + rdf_graph, + shapes_graph, + ont_graph=None, # can use a Web URL for a graph containing extra ontological information + inference='none', + abort_on_first=False, + allow_infos=False, + allow_warnings=False, + meta_shacl=False, + advanced=False, + js=False, + debug=False) + + # only add other graphs if any violations occurred + if not conforms: + # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes + result_graph += shapes_graph + result_graph += rdf_graph + + return result_graph + +""" +Returns true if the given shape is violated in the report. +""" +def violates_shape(validation_report: Graph, shape: URIRef) -> bool: + + # get the class that is targeted by the specified shape + target_class = validation_report.value(shape, SH.targetClass, None, any=False) + if target_class is None: + raise ValueError(f'The shapes graph does not contain a {shape} shape.') + + + # get all classes that have been violated + # check if any of the violated classes is the class that is targeted by the specified shape + for o in validation_report.objects(None, SH.focusNode): + if target_class in validation_report.objects(o, RDF.type): + return True + + # no violated class is targeted by the specified shape, thus the shape is not violated + return False + +""" +Reads a graph from a file into a Graph object. +""" +def read_graph_from_file(filepath: str) -> Graph: + with open(filepath, 'r') as f: + graph = Graph() + graph.parse(file=f, format=guess_format(filepath)) + return graph + + +# assert that certain violations occurred / did not occur: +# assert violates_shape(g, SCHEMA.InformationBearingEntityShape) +# assert not violates_shape(g, SCHEMA.InformationBearingEntityShape) + From f752d1797a33094ef65d41d22567da8f9f6fd896 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 3 May 2022 10:40:41 +0200 Subject: [PATCH 07/26] add validation to dodo.py --- usecases/Concrete/dodo.py | 4 +- .../emodul/emodul_validation.py | 78 ------------------- 2 files changed, 2 insertions(+), 80 deletions(-) delete mode 100644 usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py diff --git a/usecases/Concrete/dodo.py b/usecases/Concrete/dodo.py index ad46a73fd..6a75d54f0 100644 --- a/usecases/Concrete/dodo.py +++ b/usecases/Concrete/dodo.py @@ -1,7 +1,7 @@ import graphlib import os from pathlib import Path -from emodul import emodul_validation as validation +from knowledgeGraph.emodul import validation baseDir = Path(__file__).resolve().parents[0] emodulFolder = os.path.join(os.path.join(baseDir,'knowledgeGraph'),'emodul') @@ -13,7 +13,7 @@ compressionProcesseddataFolder = os.path.join(compressionFolder,'processeddata') graph_path = os.path.join(emodulProcesseddataFolder, 'EM_Graph.ttl') -shapes_path = os.path.join(os.path.join(baseDir, 'knowledgeGraph'), 'shape.ttl') +shapes_path = os.path.join(baseDir, 'shape.ttl') DOIT_CONFIG = {'verbosity': 2} diff --git a/usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py b/usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py deleted file mode 100644 index f9be06ff0..000000000 --- a/usecases/Concrete/knowledgeGraph/emodul/emodul_validation.py +++ /dev/null @@ -1,78 +0,0 @@ -from pyshacl import validate -from rdflib import Graph, URIRef, Namespace -from rdflib.util import guess_format -from rdflib.namespace import SH, RDF - -""" -baseDir0 = Path(__file__).resolve().parents[0] -baseDir1 = Path(__file__).resolve().parents[1] -baseDir2 = Path(__file__).resolve().parents[2] -ontologyPath = os.path.join(baseDir2,'ConcreteOntology') -metadataPath = os.path.join(baseDir0,'E-modul-processed-data/emodul_metadata.csv') -graphPath = os.path.join(baseDir0,'E-modul-processed-data/EM_Graph.ttl') -processedDataPath = os.path.join(baseDir0,'E-modul-processed-data') -""" - -SCHEMA = Namespace('http://schema.org/') - -""" -Given a path to a shacl shape and a path to an rdf file, this function tests the rdf data against the specified shacl shapes. -The result is an rdflib graph containing the validation report, if it is empty the validation was successful. -""" -def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: - - conforms, result_graph, _ = validate( - rdf_graph, - shapes_graph, - ont_graph=None, # can use a Web URL for a graph containing extra ontological information - inference='none', - abort_on_first=False, - allow_infos=False, - allow_warnings=False, - meta_shacl=False, - advanced=False, - js=False, - debug=False) - - # only add other graphs if any violations occurred - if not conforms: - # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes - result_graph += shapes_graph - result_graph += rdf_graph - - return result_graph - -""" -Returns true if the given shape is violated in the report. -""" -def violates_shape(validation_report: Graph, shape: URIRef) -> bool: - - # get the class that is targeted by the specified shape - target_class = validation_report.value(shape, SH.targetClass, None, any=False) - if target_class is None: - raise ValueError(f'The shapes graph does not contain a {shape} shape.') - - - # get all classes that have been violated - # check if any of the violated classes is the class that is targeted by the specified shape - for o in validation_report.objects(None, SH.focusNode): - if target_class in validation_report.objects(o, RDF.type): - return True - - # no violated class is targeted by the specified shape, thus the shape is not violated - return False - -""" -Reads a graph from a file into a Graph object. -""" -def read_graph_from_file(filepath: str) -> Graph: - with open(filepath, 'r') as f: - graph = Graph() - graph.parse(file=f, format=guess_format(filepath)) - return graph - - -# assert that certain violations occurred / did not occur: -# assert violates_shape(g, SCHEMA.InformationBearingEntityShape) -# assert not violates_shape(g, SCHEMA.InformationBearingEntityShape) - From c4dd67b40c3bf9d7458c507711ab22e4b272c739 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 3 May 2022 13:08:44 +0200 Subject: [PATCH 08/26] add shape as dep to dodo.py --- usecases/Concrete/dodo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usecases/Concrete/dodo.py b/usecases/Concrete/dodo.py index 6a75d54f0..35e7f2a5b 100644 --- a/usecases/Concrete/dodo.py +++ b/usecases/Concrete/dodo.py @@ -67,7 +67,7 @@ def task_emodul(): yield { 'basename': 'validate rdf files against shacl shape', 'actions': [(validate_graph, [graph_path, shapes_path])], - 'file_dep': [graph_path] + 'file_dep': [graph_path, shapes_path] } yield { 'basename': 'run emodul query script', From 230ac90cab80a0379fec7c75c0b4e04ad0f023bf Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 3 May 2022 13:22:06 +0200 Subject: [PATCH 09/26] move and rename shape --- usecases/Concrete/dodo.py | 2 +- usecases/Concrete/shape.ttl | 83 ------------------------------------- 2 files changed, 1 insertion(+), 84 deletions(-) delete mode 100644 usecases/Concrete/shape.ttl diff --git a/usecases/Concrete/dodo.py b/usecases/Concrete/dodo.py index 35e7f2a5b..6a2c33211 100644 --- a/usecases/Concrete/dodo.py +++ b/usecases/Concrete/dodo.py @@ -13,7 +13,7 @@ compressionProcesseddataFolder = os.path.join(compressionFolder,'processeddata') graph_path = os.path.join(emodulProcesseddataFolder, 'EM_Graph.ttl') -shapes_path = os.path.join(baseDir, 'shape.ttl') +shapes_path = os.path.join(emodulFolder, 'shape_ym.ttl') DOIT_CONFIG = {'verbosity': 2} diff --git a/usecases/Concrete/shape.ttl b/usecases/Concrete/shape.ttl deleted file mode 100644 index 95d6e3e58..000000000 --- a/usecases/Concrete/shape.ttl +++ /dev/null @@ -1,83 +0,0 @@ -@prefix dash: . -@prefix schema: . -@prefix sh: . -@prefix ns1: . -@prefix ns2: . -@prefix ns3: . -@prefix ns4: . -@prefix ns5: . -@prefix ns6: . -@prefix ns7: . -@prefix ns8: . -@prefix ns9: . -@prefix rdf: . -@prefix rdfs: . -@prefix xsd: . -@prefix mseo: . -@prefix cco: . -@prefix obo: . -@prefix con: . -@prefix tt: . - -# for a documentation of shacl see https://www.w3.org/TR/shacl/ - -# this shape tests properties about the diameter -schema:SpecimenDiameterShape - a sh:NodeShape ; - sh:targetClass cco:Diameter ; - # there is exactly one InformationBearingEntity attached to the Diameter - sh:property [ - sh:path obo:RO_0010001 ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:class cco:InformationBearingEntity ; - ] ; - # the InformationBearingEntity has a decimal value - sh:property [ - sh:path (obo:RO_0010001 cco:has_decimal_value) ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:dataytpe xsd:decimal ; - ] ; - # the InformationBearingEntity has a measurement unit specified - sh:property [ - sh:path (obo:RO_0010001 cco:uses_measurement_unit) ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:class cco:MeasurementUnitOfLength ; - ] . - -# this shape tests properties about the Specimen -schema:SpecimenShape - a sh:NodeShape ; - sh:targetClass mseo:Specimen ; - # the Specimen is input of one (and only one) YoungsModulusTest - sh:property [ - sh:path cco:is_input_of ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:node tt:YoungsModulusTest ; - ] ; - # the Specimen has a quality (shape) attached to it - sh:property [ - sh:path (obo:BFO_0000051 obo:RO_0000086) ; - sh:minCount 1 ; - sh:maxCount 1 ; - ] ; - # the Specimen is designated by one Identifyier - sh:property [ - sh:path cco:designated_by ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:class cco:NonNameIdentifier ; - ] . - -# THIS IS A SHAPE THAT MUST FAIL! -schema:InformationBearingEntityShape - a sh:NodeShape ; - sh:targetClass cco:InformationBearingEntity ; - # each InformationBearingEntity must use at least two measurement units (nonsensical) - sh:property [ - sh:path cco:uses_measurement_unit ; - sh:minCount 2 ; - ] . \ No newline at end of file From 1e3697e8b34ba6569b245d35a0a34c540d77f45d Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 26 Jul 2022 11:18:29 +0200 Subject: [PATCH 10/26] move validation into own lebedigital submodule --- lebedigital/validation/__init__.py | 0 lebedigital/validation/emodul_validation.py | 78 +++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 lebedigital/validation/__init__.py create mode 100644 lebedigital/validation/emodul_validation.py diff --git a/lebedigital/validation/__init__.py b/lebedigital/validation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lebedigital/validation/emodul_validation.py b/lebedigital/validation/emodul_validation.py new file mode 100644 index 000000000..f9be06ff0 --- /dev/null +++ b/lebedigital/validation/emodul_validation.py @@ -0,0 +1,78 @@ +from pyshacl import validate +from rdflib import Graph, URIRef, Namespace +from rdflib.util import guess_format +from rdflib.namespace import SH, RDF + +""" +baseDir0 = Path(__file__).resolve().parents[0] +baseDir1 = Path(__file__).resolve().parents[1] +baseDir2 = Path(__file__).resolve().parents[2] +ontologyPath = os.path.join(baseDir2,'ConcreteOntology') +metadataPath = os.path.join(baseDir0,'E-modul-processed-data/emodul_metadata.csv') +graphPath = os.path.join(baseDir0,'E-modul-processed-data/EM_Graph.ttl') +processedDataPath = os.path.join(baseDir0,'E-modul-processed-data') +""" + +SCHEMA = Namespace('http://schema.org/') + +""" +Given a path to a shacl shape and a path to an rdf file, this function tests the rdf data against the specified shacl shapes. +The result is an rdflib graph containing the validation report, if it is empty the validation was successful. +""" +def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: + + conforms, result_graph, _ = validate( + rdf_graph, + shapes_graph, + ont_graph=None, # can use a Web URL for a graph containing extra ontological information + inference='none', + abort_on_first=False, + allow_infos=False, + allow_warnings=False, + meta_shacl=False, + advanced=False, + js=False, + debug=False) + + # only add other graphs if any violations occurred + if not conforms: + # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes + result_graph += shapes_graph + result_graph += rdf_graph + + return result_graph + +""" +Returns true if the given shape is violated in the report. +""" +def violates_shape(validation_report: Graph, shape: URIRef) -> bool: + + # get the class that is targeted by the specified shape + target_class = validation_report.value(shape, SH.targetClass, None, any=False) + if target_class is None: + raise ValueError(f'The shapes graph does not contain a {shape} shape.') + + + # get all classes that have been violated + # check if any of the violated classes is the class that is targeted by the specified shape + for o in validation_report.objects(None, SH.focusNode): + if target_class in validation_report.objects(o, RDF.type): + return True + + # no violated class is targeted by the specified shape, thus the shape is not violated + return False + +""" +Reads a graph from a file into a Graph object. +""" +def read_graph_from_file(filepath: str) -> Graph: + with open(filepath, 'r') as f: + graph = Graph() + graph.parse(file=f, format=guess_format(filepath)) + return graph + + +# assert that certain violations occurred / did not occur: +# assert violates_shape(g, SCHEMA.InformationBearingEntityShape) +# assert not violates_shape(g, SCHEMA.InformationBearingEntityShape) + From f35255b28e109fa32acd3eabf2ed2a4e205ec0b5 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 26 Jul 2022 11:30:05 +0200 Subject: [PATCH 11/26] validation didnt need additional submodule structure --- lebedigital/{validation/emodul_validation.py => validation.py} | 0 lebedigital/validation/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename lebedigital/{validation/emodul_validation.py => validation.py} (100%) delete mode 100644 lebedigital/validation/__init__.py diff --git a/lebedigital/validation/emodul_validation.py b/lebedigital/validation.py similarity index 100% rename from lebedigital/validation/emodul_validation.py rename to lebedigital/validation.py diff --git a/lebedigital/validation/__init__.py b/lebedigital/validation/__init__.py deleted file mode 100644 index e69de29bb..000000000 From 379c1076ceff123c0b0c39cfc82c7298c1efeab9 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Tue, 26 Jul 2022 11:35:42 +0200 Subject: [PATCH 12/26] add comments --- lebedigital/validation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lebedigital/validation.py b/lebedigital/validation.py index f9be06ff0..aab35f425 100644 --- a/lebedigital/validation.py +++ b/lebedigital/validation.py @@ -55,6 +55,7 @@ def violates_shape(validation_report: Graph, shape: URIRef) -> bool: # get all classes that have been violated # check if any of the violated classes is the class that is targeted by the specified shape + # return any((True for o in validation_report.objects(None, SH.focusNode) if target_class in validation_report.objects(o, RDF.type))) for o in validation_report.objects(None, SH.focusNode): if target_class in validation_report.objects(o, RDF.type): return True From 2a3e61c5d46a5f55700dc9639582ca45969f2485 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Thu, 4 Aug 2022 13:42:39 +0200 Subject: [PATCH 13/26] add comments --- lebedigital/validation.py | 62 ++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/lebedigital/validation.py b/lebedigital/validation.py index aab35f425..263208814 100644 --- a/lebedigital/validation.py +++ b/lebedigital/validation.py @@ -3,24 +3,25 @@ from rdflib.util import guess_format from rdflib.namespace import SH, RDF -""" -baseDir0 = Path(__file__).resolve().parents[0] -baseDir1 = Path(__file__).resolve().parents[1] -baseDir2 = Path(__file__).resolve().parents[2] -ontologyPath = os.path.join(baseDir2,'ConcreteOntology') -metadataPath = os.path.join(baseDir0,'E-modul-processed-data/emodul_metadata.csv') -graphPath = os.path.join(baseDir0,'E-modul-processed-data/EM_Graph.ttl') -processedDataPath = os.path.join(baseDir0,'E-modul-processed-data') -""" - SCHEMA = Namespace('http://schema.org/') -""" -Given a path to a shacl shape and a path to an rdf file, this function tests the rdf data against the specified shacl shapes. -The result is an rdflib graph containing the validation report, if it is empty the validation was successful. -""" + def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: + """ + Tests an RDF graph against a SHACL shapes graph. + Parameters + ---------- + rdf_graph + An rdflib Graph object containing the triples to test against. + shapes_graph + An rdflib Graph object containing the shapes to test. + + Returns + ------- + result_graph + An rdflib Graph object containing the SHACL validation report (which is empty if no SHACl shapes were violated). + """ conforms, result_graph, _ = validate( rdf_graph, shapes_graph, @@ -42,11 +43,21 @@ def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: return result_graph -""" -Returns true if the given shape is violated in the report. -""" def violates_shape(validation_report: Graph, shape: URIRef) -> bool: + """ + Returns true if the given shape is violated in the report. + Parameters + ---------- + validation_report + An rdflib Graph object containing a validation report from the test_graph function. + shape + A URIRef object containing the URI of a shape. + + Returns + ------- + True, if the specified shape appears as violated in the validation report, False otherwise. + """ # get the class that is targeted by the specified shape target_class = validation_report.value(shape, SH.targetClass, None, any=False) if target_class is None: @@ -63,10 +74,21 @@ def violates_shape(validation_report: Graph, shape: URIRef) -> bool: # no violated class is targeted by the specified shape, thus the shape is not violated return False -""" -Reads a graph from a file into a Graph object. -""" + def read_graph_from_file(filepath: str) -> Graph: + """ + Reads a file containing an RDF graph into an rdflib Graph object. + + Parameters + ---------- + filepath + The path to the file containing the graph. + + Returns + ------- + graph + The rdflib Graph object containing the triples from the file. + """ with open(filepath, 'r') as f: graph = Graph() graph.parse(file=f, format=guess_format(filepath)) From b9b31759c010219c3e2462af53d2252752a1fa7d Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Thu, 4 Aug 2022 13:45:15 +0200 Subject: [PATCH 14/26] remove superflous comments --- lebedigital/validation.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lebedigital/validation.py b/lebedigital/validation.py index 263208814..86259ecf1 100644 --- a/lebedigital/validation.py +++ b/lebedigital/validation.py @@ -92,10 +92,4 @@ def read_graph_from_file(filepath: str) -> Graph: with open(filepath, 'r') as f: graph = Graph() graph.parse(file=f, format=guess_format(filepath)) - return graph - - -# assert that certain violations occurred / did not occur: -# assert violates_shape(g, SCHEMA.InformationBearingEntityShape) -# assert not violates_shape(g, SCHEMA.InformationBearingEntityShape) - + return graph \ No newline at end of file From c150d152e81027c46fb043d2a0b4bf64f03bf6ef Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:11:52 +0200 Subject: [PATCH 15/26] remove unused file from concrete --- .../knowledgeGraph/emodul/validation.py | 78 ------------------- 1 file changed, 78 deletions(-) delete mode 100644 usecases/Concrete/knowledgeGraph/emodul/validation.py diff --git a/usecases/Concrete/knowledgeGraph/emodul/validation.py b/usecases/Concrete/knowledgeGraph/emodul/validation.py deleted file mode 100644 index f9be06ff0..000000000 --- a/usecases/Concrete/knowledgeGraph/emodul/validation.py +++ /dev/null @@ -1,78 +0,0 @@ -from pyshacl import validate -from rdflib import Graph, URIRef, Namespace -from rdflib.util import guess_format -from rdflib.namespace import SH, RDF - -""" -baseDir0 = Path(__file__).resolve().parents[0] -baseDir1 = Path(__file__).resolve().parents[1] -baseDir2 = Path(__file__).resolve().parents[2] -ontologyPath = os.path.join(baseDir2,'ConcreteOntology') -metadataPath = os.path.join(baseDir0,'E-modul-processed-data/emodul_metadata.csv') -graphPath = os.path.join(baseDir0,'E-modul-processed-data/EM_Graph.ttl') -processedDataPath = os.path.join(baseDir0,'E-modul-processed-data') -""" - -SCHEMA = Namespace('http://schema.org/') - -""" -Given a path to a shacl shape and a path to an rdf file, this function tests the rdf data against the specified shacl shapes. -The result is an rdflib graph containing the validation report, if it is empty the validation was successful. -""" -def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: - - conforms, result_graph, _ = validate( - rdf_graph, - shapes_graph, - ont_graph=None, # can use a Web URL for a graph containing extra ontological information - inference='none', - abort_on_first=False, - allow_infos=False, - allow_warnings=False, - meta_shacl=False, - advanced=False, - js=False, - debug=False) - - # only add other graphs if any violations occurred - if not conforms: - # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes - result_graph += shapes_graph - result_graph += rdf_graph - - return result_graph - -""" -Returns true if the given shape is violated in the report. -""" -def violates_shape(validation_report: Graph, shape: URIRef) -> bool: - - # get the class that is targeted by the specified shape - target_class = validation_report.value(shape, SH.targetClass, None, any=False) - if target_class is None: - raise ValueError(f'The shapes graph does not contain a {shape} shape.') - - - # get all classes that have been violated - # check if any of the violated classes is the class that is targeted by the specified shape - for o in validation_report.objects(None, SH.focusNode): - if target_class in validation_report.objects(o, RDF.type): - return True - - # no violated class is targeted by the specified shape, thus the shape is not violated - return False - -""" -Reads a graph from a file into a Graph object. -""" -def read_graph_from_file(filepath: str) -> Graph: - with open(filepath, 'r') as f: - graph = Graph() - graph.parse(file=f, format=guess_format(filepath)) - return graph - - -# assert that certain violations occurred / did not occur: -# assert violates_shape(g, SCHEMA.InformationBearingEntityShape) -# assert not violates_shape(g, SCHEMA.InformationBearingEntityShape) - From 8b253312eae784d31aff6dffdb8a120385cf877e Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 7 Sep 2022 16:34:59 +0200 Subject: [PATCH 16/26] add files to test validation --- .../emodul/validation_test/graph.ttl | 37 +++++++++ .../emodul/validation_test/shape.ttl | 83 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 usecases/MinimumWorkingExample/emodul/validation_test/graph.ttl create mode 100644 usecases/MinimumWorkingExample/emodul/validation_test/shape.ttl diff --git a/usecases/MinimumWorkingExample/emodul/validation_test/graph.ttl b/usecases/MinimumWorkingExample/emodul/validation_test/graph.ttl new file mode 100644 index 000000000..d987cee22 --- /dev/null +++ b/usecases/MinimumWorkingExample/emodul/validation_test/graph.ttl @@ -0,0 +1,37 @@ +@prefix ex: . +@prefix rdf: . +@prefix rdfs: . +@prefix schema: . +@prefix xsd: . +@prefix dash: . +@prefix schema: . +@prefix sh: . +@prefix ns1: . +@prefix ns2: . +@prefix ns3: . +@prefix ns4: . +@prefix ns5: . +@prefix ns6: . +@prefix ns7: . +@prefix ns8: . +@prefix ns9: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix mseo: . +@prefix cco: . +@prefix obo: . +@prefix con: . +@prefix tt: . + +tt:Diam a cco:Diameter ; + obo:RO_0010001 tt:DiamVal . + +tt:DiamVal a cco:InformationBearingEntity ; + cco:has_decimal_value 12.5 ; + cco:uses_measurement_unit [ a cco:MeasurementUnitOfLength ] . + +tt:Spec a mseo:Specimen ; + cco:is_input_of [ a cco:YoungsModulusTest ] ; + cco:designated_by [ a cco:NonNameIdentifier ] ; + obo:BFO_0000051 [ obo:RO_0000086 [] ] . diff --git a/usecases/MinimumWorkingExample/emodul/validation_test/shape.ttl b/usecases/MinimumWorkingExample/emodul/validation_test/shape.ttl new file mode 100644 index 000000000..95d6e3e58 --- /dev/null +++ b/usecases/MinimumWorkingExample/emodul/validation_test/shape.ttl @@ -0,0 +1,83 @@ +@prefix dash: . +@prefix schema: . +@prefix sh: . +@prefix ns1: . +@prefix ns2: . +@prefix ns3: . +@prefix ns4: . +@prefix ns5: . +@prefix ns6: . +@prefix ns7: . +@prefix ns8: . +@prefix ns9: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix mseo: . +@prefix cco: . +@prefix obo: . +@prefix con: . +@prefix tt: . + +# for a documentation of shacl see https://www.w3.org/TR/shacl/ + +# this shape tests properties about the diameter +schema:SpecimenDiameterShape + a sh:NodeShape ; + sh:targetClass cco:Diameter ; + # there is exactly one InformationBearingEntity attached to the Diameter + sh:property [ + sh:path obo:RO_0010001 ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class cco:InformationBearingEntity ; + ] ; + # the InformationBearingEntity has a decimal value + sh:property [ + sh:path (obo:RO_0010001 cco:has_decimal_value) ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:dataytpe xsd:decimal ; + ] ; + # the InformationBearingEntity has a measurement unit specified + sh:property [ + sh:path (obo:RO_0010001 cco:uses_measurement_unit) ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class cco:MeasurementUnitOfLength ; + ] . + +# this shape tests properties about the Specimen +schema:SpecimenShape + a sh:NodeShape ; + sh:targetClass mseo:Specimen ; + # the Specimen is input of one (and only one) YoungsModulusTest + sh:property [ + sh:path cco:is_input_of ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:node tt:YoungsModulusTest ; + ] ; + # the Specimen has a quality (shape) attached to it + sh:property [ + sh:path (obo:BFO_0000051 obo:RO_0000086) ; + sh:minCount 1 ; + sh:maxCount 1 ; + ] ; + # the Specimen is designated by one Identifyier + sh:property [ + sh:path cco:designated_by ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class cco:NonNameIdentifier ; + ] . + +# THIS IS A SHAPE THAT MUST FAIL! +schema:InformationBearingEntityShape + a sh:NodeShape ; + sh:targetClass cco:InformationBearingEntity ; + # each InformationBearingEntity must use at least two measurement units (nonsensical) + sh:property [ + sh:path cco:uses_measurement_unit ; + sh:minCount 2 ; + ] . \ No newline at end of file From da2cebf9ac575ecb6881e28ed1546026a78cfff4 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 7 Sep 2022 16:35:20 +0200 Subject: [PATCH 17/26] add validation tests --- lebedigital/validation.py | 11 ++- usecases/Concrete/knowledgeGraph/shape.ttl | 83 ---------------------- 2 files changed, 5 insertions(+), 89 deletions(-) delete mode 100644 usecases/Concrete/knowledgeGraph/shape.ttl diff --git a/lebedigital/validation.py b/lebedigital/validation.py index 86259ecf1..5e4974fa5 100644 --- a/lebedigital/validation.py +++ b/lebedigital/validation.py @@ -24,7 +24,7 @@ def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: """ conforms, result_graph, _ = validate( rdf_graph, - shapes_graph, + shacl_graph=shapes_graph, ont_graph=None, # can use a Web URL for a graph containing extra ontological information inference='none', abort_on_first=False, @@ -35,11 +35,8 @@ def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: js=False, debug=False) - # only add other graphs if any violations occurred - if not conforms: - # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes - result_graph += shapes_graph - result_graph += rdf_graph + result_graph += shapes_graph + result_graph += rdf_graph return result_graph @@ -75,6 +72,8 @@ def violates_shape(validation_report: Graph, shape: URIRef) -> bool: return False + + def read_graph_from_file(filepath: str) -> Graph: """ Reads a file containing an RDF graph into an rdflib Graph object. diff --git a/usecases/Concrete/knowledgeGraph/shape.ttl b/usecases/Concrete/knowledgeGraph/shape.ttl deleted file mode 100644 index 95d6e3e58..000000000 --- a/usecases/Concrete/knowledgeGraph/shape.ttl +++ /dev/null @@ -1,83 +0,0 @@ -@prefix dash: . -@prefix schema: . -@prefix sh: . -@prefix ns1: . -@prefix ns2: . -@prefix ns3: . -@prefix ns4: . -@prefix ns5: . -@prefix ns6: . -@prefix ns7: . -@prefix ns8: . -@prefix ns9: . -@prefix rdf: . -@prefix rdfs: . -@prefix xsd: . -@prefix mseo: . -@prefix cco: . -@prefix obo: . -@prefix con: . -@prefix tt: . - -# for a documentation of shacl see https://www.w3.org/TR/shacl/ - -# this shape tests properties about the diameter -schema:SpecimenDiameterShape - a sh:NodeShape ; - sh:targetClass cco:Diameter ; - # there is exactly one InformationBearingEntity attached to the Diameter - sh:property [ - sh:path obo:RO_0010001 ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:class cco:InformationBearingEntity ; - ] ; - # the InformationBearingEntity has a decimal value - sh:property [ - sh:path (obo:RO_0010001 cco:has_decimal_value) ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:dataytpe xsd:decimal ; - ] ; - # the InformationBearingEntity has a measurement unit specified - sh:property [ - sh:path (obo:RO_0010001 cco:uses_measurement_unit) ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:class cco:MeasurementUnitOfLength ; - ] . - -# this shape tests properties about the Specimen -schema:SpecimenShape - a sh:NodeShape ; - sh:targetClass mseo:Specimen ; - # the Specimen is input of one (and only one) YoungsModulusTest - sh:property [ - sh:path cco:is_input_of ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:node tt:YoungsModulusTest ; - ] ; - # the Specimen has a quality (shape) attached to it - sh:property [ - sh:path (obo:BFO_0000051 obo:RO_0000086) ; - sh:minCount 1 ; - sh:maxCount 1 ; - ] ; - # the Specimen is designated by one Identifyier - sh:property [ - sh:path cco:designated_by ; - sh:minCount 1 ; - sh:maxCount 1 ; - sh:class cco:NonNameIdentifier ; - ] . - -# THIS IS A SHAPE THAT MUST FAIL! -schema:InformationBearingEntityShape - a sh:NodeShape ; - sh:targetClass cco:InformationBearingEntity ; - # each InformationBearingEntity must use at least two measurement units (nonsensical) - sh:property [ - sh:path cco:uses_measurement_unit ; - sh:minCount 2 ; - ] . \ No newline at end of file From de56554a6d926346bffd606dca50d1b4ae345513 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 7 Sep 2022 16:35:35 +0200 Subject: [PATCH 18/26] add validation tests --- lebedigital/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lebedigital/validation.py b/lebedigital/validation.py index 5e4974fa5..515fa061f 100644 --- a/lebedigital/validation.py +++ b/lebedigital/validation.py @@ -22,7 +22,7 @@ def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: result_graph An rdflib Graph object containing the SHACL validation report (which is empty if no SHACl shapes were violated). """ - conforms, result_graph, _ = validate( + _, result_graph, _ = validate( rdf_graph, shacl_graph=shapes_graph, ont_graph=None, # can use a Web URL for a graph containing extra ontological information From 1a49eab4c110b9d360f6ea009525490d3451e522 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 7 Sep 2022 16:36:23 +0200 Subject: [PATCH 19/26] remove empty lines --- lebedigital/validation.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lebedigital/validation.py b/lebedigital/validation.py index 515fa061f..11fcd39b6 100644 --- a/lebedigital/validation.py +++ b/lebedigital/validation.py @@ -5,7 +5,6 @@ SCHEMA = Namespace('http://schema.org/') - def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph: """ Tests an RDF graph against a SHACL shapes graph. @@ -71,9 +70,6 @@ def violates_shape(validation_report: Graph, shape: URIRef) -> bool: # no violated class is targeted by the specified shape, thus the shape is not violated return False - - - def read_graph_from_file(filepath: str) -> Graph: """ Reads a file containing an RDF graph into an rdflib Graph object. From 26e0925e3b8af04b8b414625cdfb2e5aa7f6470b Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Wed, 7 Sep 2022 16:39:08 +0200 Subject: [PATCH 20/26] add validation tests --- tests/validation/test_validation.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/validation/test_validation.py diff --git a/tests/validation/test_validation.py b/tests/validation/test_validation.py new file mode 100644 index 000000000..ac84cf9bd --- /dev/null +++ b/tests/validation/test_validation.py @@ -0,0 +1,21 @@ +import pytest +import os + +from lebedigital.validation import SCHEMA, read_graph_from_file, test_graph, violates_shape + +def test_graph_against_shape(): + """ + Testing a data graph against a shapes graph and checking that the appropriate shapes fail. + """ + + shapes_location = "../usecases/MinimumWorkingExample/emodul/validation_test/shape.ttl" + data_location = "../usecases/MinimumWorkingExample/emodul/validation_test/graph.ttl" + + g = read_graph_from_file(data_location) + s = read_graph_from_file(shapes_location) + + res = test_graph(g, s) + + assert not violates_shape(res, SCHEMA.SpecimenDiameterShape) + assert not violates_shape(res, SCHEMA.SpecimenShape) + assert violates_shape(res, SCHEMA.InformationBearingEntityShape) \ No newline at end of file From 9e92d7082e5888fd5dbeadac12e8a41491da3d3d Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Thu, 8 Sep 2022 14:13:59 +0200 Subject: [PATCH 21/26] undo changes in requirements.txt --- usecases/Concrete/knowledgeGraph/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/usecases/Concrete/knowledgeGraph/requirements.txt b/usecases/Concrete/knowledgeGraph/requirements.txt index 3f4ada73d..76665f0b2 100644 --- a/usecases/Concrete/knowledgeGraph/requirements.txt +++ b/usecases/Concrete/knowledgeGraph/requirements.txt @@ -8,6 +8,5 @@ SPARQLWrapper==1.8.5 requests==2.22.0 GitPython==3.1.24 probeye==1.0.6 -pyshacl==0.9.5 pyaml==21.10.1 doit==0.33.1 From 3133735ac0aa11b9bba2069908916268f5c0d8af Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Thu, 8 Sep 2022 14:14:27 +0200 Subject: [PATCH 22/26] undo changes in requirements.txt --- usecases/Concrete/knowledgeGraph/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usecases/Concrete/knowledgeGraph/requirements.txt b/usecases/Concrete/knowledgeGraph/requirements.txt index 76665f0b2..c393e89ff 100644 --- a/usecases/Concrete/knowledgeGraph/requirements.txt +++ b/usecases/Concrete/knowledgeGraph/requirements.txt @@ -9,4 +9,4 @@ requests==2.22.0 GitPython==3.1.24 probeye==1.0.6 pyaml==21.10.1 -doit==0.33.1 +doit==0.33.1 \ No newline at end of file From 0d501bc1e78c84f0f4019bb6a132138fa2d5fdd6 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Fri, 7 Oct 2022 12:51:41 +0200 Subject: [PATCH 23/26] add some dodo code --- usecases/MinimumWorkingExample/dodo.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/usecases/MinimumWorkingExample/dodo.py b/usecases/MinimumWorkingExample/dodo.py index 64c7f5950..76cfb407e 100644 --- a/usecases/MinimumWorkingExample/dodo.py +++ b/usecases/MinimumWorkingExample/dodo.py @@ -13,6 +13,9 @@ from doit import get_var +from lebedigital import validation +from lebedigital.validation import SCHEMA + # set a variable to define a cheap or full run # the default "doit" is set to "doit mode=cheap" # any other mode value runs the expensive version i.e. "doit mode=full" @@ -120,4 +123,22 @@ def task_export_knowledgeGraph_emodul(): 'file_dep': [metadata_file_path, processed_data_file_path], 'targets': [knowledge_graph_file], 'clean': [clean_targets] - } \ No newline at end of file + } + +#validate +@create_after(executed='expord_knowledgeGraph_emodul') +def task_validate_graph(): + + graphs = os.scandir(knowledge_graphs_directory) + + s = validation.read_graph_from_file(Path(emodul_output_directory, 'validation_test', 'shape.ttl')) + + for f in graphs: + if f.is_file() and Path(f).suffix == '.ttl': + # do some validation + g = validation.read_graph_from_file(g) + res = validation.test_graph(g, s) + + assert not validation.violates_shape(res, SCHEMA.SpecimenDiameterShape) + assert not validation.violates_shape(res, SCHEMA.SpecimenShape) + assert validation.violates_shape(res, SCHEMA.InformationBearingEntityShape) \ No newline at end of file From 3cd8a72038ad101efe6cd7d69b873495a8f51e00 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Fri, 7 Oct 2022 13:20:36 +0200 Subject: [PATCH 24/26] add output code --- usecases/MinimumWorkingExample/dodo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/usecases/MinimumWorkingExample/dodo.py b/usecases/MinimumWorkingExample/dodo.py index 76cfb407e..5692860c8 100644 --- a/usecases/MinimumWorkingExample/dodo.py +++ b/usecases/MinimumWorkingExample/dodo.py @@ -141,4 +141,10 @@ def task_validate_graph(): assert not validation.violates_shape(res, SCHEMA.SpecimenDiameterShape) assert not validation.violates_shape(res, SCHEMA.SpecimenShape) - assert validation.violates_shape(res, SCHEMA.InformationBearingEntityShape) \ No newline at end of file + assert validation.violates_shape(res, SCHEMA.InformationBearingEntityShape) + + out = open(Path(emodul_output_directory, 'validation_result.txt'), 'wx') + out.write(f'{f.name}:') + out.write(repr(SCHEMA.SpecimenDiameterShape) + ('failed' if validation.violates_shape(res, SCHEMA.SpecimenDiameterShape) else 'passed')) + out.write(repr(SCHEMA.SpecimenShape) + ('failed' if validation.violates_shape(res, SCHEMA.SpecimenShape) else 'passed')) + out.write(repr(SCHEMA.InformationBearingEntityShape) + ('failed' if validation.violates_shape(res, SCHEMA.InformationBearingEntityShape) else 'passed')) From a3917f954498024a5afef4242931b8e6356bc6e6 Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Fri, 7 Oct 2022 13:21:59 +0200 Subject: [PATCH 25/26] add output code --- usecases/MinimumWorkingExample/dodo.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/usecases/MinimumWorkingExample/dodo.py b/usecases/MinimumWorkingExample/dodo.py index 5692860c8..7e4a97d3c 100644 --- a/usecases/MinimumWorkingExample/dodo.py +++ b/usecases/MinimumWorkingExample/dodo.py @@ -145,6 +145,11 @@ def task_validate_graph(): out = open(Path(emodul_output_directory, 'validation_result.txt'), 'wx') out.write(f'{f.name}:') - out.write(repr(SCHEMA.SpecimenDiameterShape) + ('failed' if validation.violates_shape(res, SCHEMA.SpecimenDiameterShape) else 'passed')) - out.write(repr(SCHEMA.SpecimenShape) + ('failed' if validation.violates_shape(res, SCHEMA.SpecimenShape) else 'passed')) - out.write(repr(SCHEMA.InformationBearingEntityShape) + ('failed' if validation.violates_shape(res, SCHEMA.InformationBearingEntityShape) else 'passed')) + + for shape in [ + SCHEMA.SpecimenDiameterShape, + SCHEMA.SpecimenShape, + SCHEMA.InformationBearingEntityShape + ]: + out.write(repr(shape) + ('failed' if validation.violates_shape(res, shape) else 'passed')) + \ No newline at end of file From 267802304fa047ad39b7e71df005f80f9d6f538f Mon Sep 17 00:00:00 2001 From: Horst Fellenberg <40174618+horstf@users.noreply.github.com> Date: Mon, 16 Jan 2023 14:39:18 +0100 Subject: [PATCH 26/26] update dodo --- usecases/MinimumWorkingExample/dodo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/usecases/MinimumWorkingExample/dodo.py b/usecases/MinimumWorkingExample/dodo.py index 7e4a97d3c..5ffc472ab 100644 --- a/usecases/MinimumWorkingExample/dodo.py +++ b/usecases/MinimumWorkingExample/dodo.py @@ -126,7 +126,7 @@ def task_export_knowledgeGraph_emodul(): } #validate -@create_after(executed='expord_knowledgeGraph_emodul') +@create_after(executed='export_knowledgeGraph_emodul') def task_validate_graph(): graphs = os.scandir(knowledge_graphs_directory) @@ -144,12 +144,12 @@ def task_validate_graph(): assert validation.violates_shape(res, SCHEMA.InformationBearingEntityShape) out = open(Path(emodul_output_directory, 'validation_result.txt'), 'wx') - out.write(f'{f.name}:') + out.write(f'{f.name}:\n') for shape in [ SCHEMA.SpecimenDiameterShape, SCHEMA.SpecimenShape, SCHEMA.InformationBearingEntityShape ]: - out.write(repr(shape) + ('failed' if validation.violates_shape(res, shape) else 'passed')) + out.write(f'{shape} {"failed" if validation.violates_shape(res, shape) else "passed"}\n') \ No newline at end of file