BAMresearch · horstf · Apr 13, 2022 · Apr 13, 2022 · Apr 13, 2022 · Apr 13, 2022
diff --git a/lebedigital/validation.py b/lebedigital/validation.py
@@ -0,0 +1,95 @@
+from pyshacl import validate
+from rdflib import Graph, URIRef, Namespace
+from rdflib.util import guess_format
+from rdflib.namespace import SH, RDF
+
+SCHEMA = Namespace('http://schema.org/')
+
+
+def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph:
+    """
+    Tests an RDF graph against a SHACL shapes graph.
+
+    Parameters
+    ----------
+    rdf_graph
+        An rdflib Graph object containing the triples to test against.
+    shapes_graph
+        An rdflib Graph object containing the shapes to test.
+
+    Returns
+    -------
+    result_graph
+        An rdflib Graph object containing the SHACL validation report (which is empty if no SHACl shapes were violated).
+    """
+    conforms, result_graph, _ = validate(
+            rdf_graph,
+            shapes_graph,
+            ont_graph=None,  # can use a Web URL for a graph containing extra ontological information
+            inference='none',
+            abort_on_first=False,
+            allow_infos=False,
+            allow_warnings=False,
+            meta_shacl=False,
+            advanced=False,
+            js=False,
+            debug=False)
+
+    # only add other graphs if any violations occurred
+    if not conforms:
+        # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes
+        result_graph += shapes_graph
+        result_graph += rdf_graph
+
+    return result_graph
+
+def violates_shape(validation_report: Graph, shape: URIRef) -> bool:
+    """
+    Returns true if the given shape is violated in the report.
+
+    Parameters
+    ----------
+    validation_report
+        An rdflib Graph object containing a validation report from the test_graph function.
+    shape
+        A URIRef object containing the URI of a shape.
+
+    Returns
+    -------
+        True, if the specified shape appears as violated in the validation report, False otherwise.
+    """
+    # get the class that is targeted by the specified shape
+    target_class = validation_report.value(shape, SH.targetClass, None, any=False)
+    if target_class is None:
+        raise ValueError(f'The shapes graph does not contain a {shape} shape.')
+
+
+    # get all classes that have been violated
+    # check if any of the violated classes is the class that is targeted by the specified shape
+    # return any((True for o in validation_report.objects(None, SH.focusNode) if target_class in validation_report.objects(o, RDF.type)))
+    for o in validation_report.objects(None, SH.focusNode):
+        if target_class in validation_report.objects(o, RDF.type):
+            return True
+
+    # no violated class is targeted by the specified shape, thus the shape is not violated
+    return False
+
+
+def read_graph_from_file(filepath: str) -> Graph:
+    """
+    Reads a file containing an RDF graph into an rdflib Graph object.
+
+    Parameters
+    ----------
+    filepath
+        The path to the file containing the graph.
+
+    Returns
+    -------
+    graph
+        The rdflib Graph object containing the triples from the file.
+    """
+    with open(filepath, 'r') as f:
+        graph = Graph()
+        graph.parse(file=f, format=guess_format(filepath))
+    return graph
diff --git a/usecases/Concrete/dodo.py b/usecases/Concrete/dodo.py
@@ -1,8 +1,10 @@
+import graphlib
 import os
 from pathlib import Path
+from knowledgeGraph.emodul import validation
 
 baseDir = Path(__file__).resolve().parents[0]
-emodulFolder = os.path.join(os.path.join(os.path.join(baseDir,'knowledgeGraph'),'emodul'),'E-modul-processed-data')
+emodulFolder = os.path.join(os.path.join(baseDir,'knowledgeGraph'),'emodul')
 emodulRawdataFolder = os.path.join(emodulFolder,'rawdata')
 emodulProcesseddataFolder = os.path.join(emodulFolder,'processeddata')
 emodulYAMLmetadataFolder = os.path.join(emodulFolder,'metadata_yaml_files')
@@ -11,8 +13,19 @@
 compressionRawdataFolder = os.path.join(compressionFolder,'rawdata')
 compressionProcesseddataFolder = os.path.join(compressionFolder,'processeddata')
 
+graph_path = os.path.join(emodulProcesseddataFolder, 'EM_Graph.ttl')
+shapes_path = os.path.join(emodulFolder, 'shape_ym.ttl')
+
 DOIT_CONFIG = {'verbosity': 2}
 
+def validate_graph(graph_path, shapes_path):
+    g = validation.read_graph_from_file(graph_path)
+    s = validation.read_graph_from_file(shapes_path)
+    r = validation.test_graph(g, s)
+    assert validation.violates_shape(r, validation.SCHEMA.InformationBearingEntityShape)
+    assert not validation.violates_shape(r, validation.SCHEMA.SpecimenDiameterShape)
+    assert not validation.violates_shape(r, validation.SCHEMA.SpecimenShape)
+
 def task_installation():
     yield {
         'basename': 'install python packages',
@@ -71,6 +84,11 @@ def task_emodul():
     #     'basename': 'validate rdf files against shacl shape',
     #     'actions': ['python knowledgeGraph/emodul/emodul_validation.py']
     # }
+    yield {
+        'basename': 'validate rdf files against shacl shape',
+        'actions': [(validate_graph, [graph_path, shapes_path])],
+        'file_dep': [graph_path, shapes_path]
+    }
     yield {
         'basename': 'run emodul query script',
         'actions': ['python knowledgeGraph/emodul/emodul_query.py'],

diff --git a/usecases/Concrete/knowledgeGraph/emodul/__init__.py b/usecases/Concrete/knowledgeGraph/emodul/__init__.py
diff --git a/usecases/Concrete/knowledgeGraph/emodul/validation.py b/usecases/Concrete/knowledgeGraph/emodul/validation.py
@@ -0,0 +1,78 @@
+from pyshacl import validate
+from rdflib import Graph, URIRef, Namespace
+from rdflib.util import guess_format
+from rdflib.namespace import SH, RDF
+
+"""
+baseDir0 = Path(__file__).resolve().parents[0]
+baseDir1 = Path(__file__).resolve().parents[1]
+baseDir2 = Path(__file__).resolve().parents[2]
+ontologyPath = os.path.join(baseDir2,'ConcreteOntology')
+metadataPath = os.path.join(baseDir0,'E-modul-processed-data/emodul_metadata.csv')
+graphPath = os.path.join(baseDir0,'E-modul-processed-data/EM_Graph.ttl')
+processedDataPath = os.path.join(baseDir0,'E-modul-processed-data')
+"""
+
+SCHEMA = Namespace('http://schema.org/')
+
+"""
+Given a path to a shacl shape and a path to an rdf file, this function tests the rdf data against the specified shacl shapes.
+The result is an rdflib graph containing the validation report, if it is empty the validation was successful.
+"""
+def test_graph(rdf_graph: Graph, shapes_graph: Graph) -> Graph:
+
+    conforms, result_graph, _ = validate(
+            rdf_graph,
+            shapes_graph,
+            ont_graph=None,  # can use a Web URL for a graph containing extra ontological information
+            inference='none',
+            abort_on_first=False,
+            allow_infos=False,
+            allow_warnings=False,
+            meta_shacl=False,
+            advanced=False,
+            js=False,
+            debug=False)
+
+    # only add other graphs if any violations occurred
+    if not conforms:
+        # also add nodes from data and shacl shapes to graph to be able to search backwards for the violated shapes
+        result_graph += shapes_graph
+        result_graph += rdf_graph
+
+    return result_graph
+
+"""
+Returns true if the given shape is violated in the report.
+"""
+def violates_shape(validation_report: Graph, shape: URIRef) -> bool:
+
+    # get the class that is targeted by the specified shape
+    target_class = validation_report.value(shape, SH.targetClass, None, any=False)
+    if target_class is None:
+        raise ValueError(f'The shapes graph does not contain a {shape} shape.')
+
+
+    # get all classes that have been violated
+    # check if any of the violated classes is the class that is targeted by the specified shape
+    for o in validation_report.objects(None, SH.focusNode):
+        if target_class in validation_report.objects(o, RDF.type):
+            return True
+
+    # no violated class is targeted by the specified shape, thus the shape is not violated
+    return False
+
+"""
+Reads a graph from a file into a Graph object.
+"""
+def read_graph_from_file(filepath: str) -> Graph:
+    with open(filepath, 'r') as f:
+        graph = Graph()
+        graph.parse(file=f, format=guess_format(filepath))
+    return graph
+
+
+# assert that certain violations occurred / did not occur:
+# assert violates_shape(g, SCHEMA.InformationBearingEntityShape)
+# assert not violates_shape(g, SCHEMA.InformationBearingEntityShape)
+
diff --git a/usecases/Concrete/knowledgeGraph/requirements.txt b/usecases/Concrete/knowledgeGraph/requirements.txt
@@ -8,5 +8,6 @@ SPARQLWrapper==1.8.5
 requests==2.22.0
 GitPython==3.1.24
 probeye==1.0.6
+pyshacl==0.9.5
 pyaml==21.10.1
-doit==0.33.1
+doit==0.33.1