From 70de03d471dbd010b1eff54a38537f0584c81633 Mon Sep 17 00:00:00 2001 From: marcel Date: Mon, 4 Nov 2024 19:52:57 +0100 Subject: [PATCH] Improve SHACL2Flink SPARQL query performance SPARQL is used by the tools intensively to transform SHACL expressions into SQL expressions. This is slowing down when large knowledge graphs or large SHACL models are used. In this PR two main goals are achieved: (1) Move from rdflib native representation to faster Oxigraph representation where possible (2) Replace owlrl to be able to update rdflib, pyshacl etc to most recent versions. To achieve that a simplified transitive-closure algorithm is applied because transitive closure is needed to make "single-step" rdfs:subClassOf evaluations in Flink. This also led to correction of demo knowledge.ttl since the class defintions where not correct (i.e. rdfs:class insted of rdfs:Class) Signed-off-by: marcel --- .../shacl2flink/create_knowledge_closure.py | 5 +- .../shacl2flink/create_ngsild_models.py | 10 ++- .../shacl2flink/create_ngsild_tables.py | 8 +-- .../shacl2flink/create_rdf_table.py | 7 +- .../shacl2flink/lib/bgp_translation_utils.py | 28 ++++---- .../shacl2flink/lib/shacl_sparql_to_sql.py | 7 +- .../shacl2flink/lib/sparql_to_sql.py | 14 +++- semantic-model/shacl2flink/lib/utils.py | 67 +++++++++++++++++++ semantic-model/shacl2flink/requirements.txt | 9 +-- .../kms-constraints/test2/knowledge.ttl | 8 +-- .../sql-tests/kms-constraints/test2/shacl.ttl | 2 +- .../kms-constraints/test5/knowledge.ttl | 8 +-- .../tests/test_create_knowledge_closure.py | 7 +- .../tests/test_create_ngsild_tables.py | 10 +-- .../tests/test_create_rdf_table.py | 7 +- .../tests/test_lib_bgp_translation_utils.py | 23 +++---- .../tests/test_lib_shacl_sparql_to_sql.py | 4 +- .../shacl2flink/tests/test_utils.py | 49 ++++++++++++++ 18 files changed, 199 insertions(+), 74 deletions(-) diff --git a/semantic-model/shacl2flink/create_knowledge_closure.py b/semantic-model/shacl2flink/create_knowledge_closure.py index bc956366..0fc9e2d1 100644 --- a/semantic-model/shacl2flink/create_knowledge_closure.py +++ b/semantic-model/shacl2flink/create_knowledge_closure.py @@ -17,8 +17,8 @@ import sys import os import rdflib -import owlrl import argparse +import lib.utils as utils def parse_args(args=sys.argv[1:]): @@ -33,8 +33,7 @@ def parse_args(args=sys.argv[1:]): def main(knowledgefile, outputfile): h = rdflib.Graph() h.parse(knowledgefile) - owlrl.DeductiveClosure(owlrl.OWLRL_Extension, rdfs_closure=True, - axiomatic_triples=True, datatype_axioms=True).expand(h) + h = utils.transitive_closure(h) filename = os.path.dirname(os.path.abspath(knowledgefile)) + '/' + outputfile h.serialize(destination=filename, format='turtle') diff --git a/semantic-model/shacl2flink/create_ngsild_models.py b/semantic-model/shacl2flink/create_ngsild_models.py index 55fca940..aea8e9fa 100644 --- a/semantic-model/shacl2flink/create_ngsild_models.py +++ b/semantic-model/shacl2flink/create_ngsild_models.py @@ -20,7 +20,6 @@ import argparse import lib.utils as utils import lib.configs as configs -import owlrl def parse_args(args=sys.argv[1:]): @@ -112,11 +111,11 @@ def main(shaclfile, knowledgefile, modelfile, output_folder='output'): utils.create_output_folder(output_folder) with open(os.path.join(output_folder, "ngsild-models.sqlite"), "w")\ as sqlitef: - g = Graph() + g = Graph(store="Oxigraph") g.parse(shaclfile) - model = Graph() + model = Graph(store="Oxigraph") model.parse(modelfile) - knowledge = Graph() + knowledge = Graph(store="Oxigraph") knowledge.parse(knowledgefile) attributes_model = model + g + knowledge @@ -161,8 +160,7 @@ def main(shaclfile, knowledgefile, modelfile, output_folder='output'): print(";", file=sqlitef) # Create ngsild tables by sparql - owlrl.DeductiveClosure(owlrl.OWLRL_Extension, rdfs_closure=True, axiomatic_triples=True, - datatype_axioms=True).expand(knowledge) + knowledge = utils.transitive_closure(knowledge) table_model = model + knowledge + g qres = table_model.query(ngsild_tables_query_noinference) tables = {} diff --git a/semantic-model/shacl2flink/create_ngsild_tables.py b/semantic-model/shacl2flink/create_ngsild_tables.py index df18d7c9..06556aa2 100644 --- a/semantic-model/shacl2flink/create_ngsild_tables.py +++ b/semantic-model/shacl2flink/create_ngsild_tables.py @@ -23,7 +23,6 @@ import lib.utils as utils import lib.configs as configs from ruamel.yaml.scalarstring import (SingleQuotedScalarString as sq) -import owlrl field_query = """ @@ -65,12 +64,11 @@ def parse_args(args=sys.argv[1:]): def main(shaclfile, knowledgefile, output_folder='output'): yaml = ruamel.yaml.YAML() utils.create_output_folder(output_folder) - g = Graph() + g = Graph(store="Oxigraph") g.parse(shaclfile) - h = Graph() + h = Graph(store="Oxigraph") h.parse(knowledgefile) - owlrl.DeductiveClosure(owlrl.OWLRL_Extension, rdfs_closure=True, axiomatic_triples=True, - datatype_axioms=True).expand(h) + h = utils.transitive_closure(h) g += h tables = {} qres = g.query(field_query) diff --git a/semantic-model/shacl2flink/create_rdf_table.py b/semantic-model/shacl2flink/create_rdf_table.py index 2bef82c7..73df7c22 100644 --- a/semantic-model/shacl2flink/create_rdf_table.py +++ b/semantic-model/shacl2flink/create_rdf_table.py @@ -19,7 +19,6 @@ import sys import math import hashlib -import owlrl import ruamel.yaml import rdflib from lib import utils @@ -124,11 +123,9 @@ def main(knowledgefile, namespace, output_folder='output'): primary_key = ['subject', 'predicate', 'index'] # Create RDF statements to insert data - g = rdflib.Graph() + g = rdflib.Graph(store="Oxigraph") g.parse(knowledgefile) - owlrl.DeductiveClosure(owlrl.OWLRL_Extension, rdfs_closure=True, axiomatic_triples=True, - datatype_axioms=True).expand(g) - + g = utils.transitive_closure(g) statementsets = create_statementset(g) sqlstatements = '' for statementset in statementsets: diff --git a/semantic-model/shacl2flink/lib/bgp_translation_utils.py b/semantic-model/shacl2flink/lib/bgp_translation_utils.py index d29b21ba..610bbeba 100644 --- a/semantic-model/shacl2flink/lib/bgp_translation_utils.py +++ b/semantic-model/shacl2flink/lib/bgp_translation_utils.py @@ -352,37 +352,37 @@ def create_ngsild_mappings(ctx, sorted_graph): equivalence = [] variables = [] for key, value in ctx['classes'].items(): - sparqlvalidationquery += f'?{key} rdfs:subClassOf <{value.toPython()}> .\n' - sparqlvalidationquery += f'<{value.toPython()}> rdfs:subClassOf ?{key} .\n' + sparqlvalidationquery += f'{{?{key} rdfs:subClassOf <{value.toPython()}> .\n' + sparqlvalidationquery += f'<{value.toPython()}> rdfs:subClassOf ?{key} .}}\n' for entity in entity_variables.keys(): - sparqlvalidationquery += f'?{entity}shapex sh:targetClass/rdfs:subClassOf* ?{entity} .\n' - sparqlvalidationquery += f'?{entity}shape sh:targetClass ?{entity} .\n' + sparqlvalidationquery += f'{{?{entity}shapex sh:targetClass/rdfs:subClassOf ?{entity} .\n' + sparqlvalidationquery += f'?{entity}shape sh:targetClass ?{entity} .}}\n' variables.append(entity) for s, p, o in sorted_graph.triples((entity, None, None)): property_class = sorted_graph.value(o, ngsild['hasObject']) if property_class is not None: - sparqlvalidationquery += f'?{s}shape sh:property [ sh:path <{p}> ; sh:property \ -[ sh:path ngsild:hasObject; sh:class ?{property_class} ] ] .\n' + sparqlvalidationquery += f'{{?{s}shape sh:property [ sh:path <{p}> ; sh:property \ +[ sh:path ngsild:hasObject; sh:class ?{property_class} ] ] .}}\n' for property in property_variables: variables.append(property) - sparqlvalidationquery += f'?{property}shapex sh:targetClass/rdfs:subClassOf* ?{property} .\n' - sparqlvalidationquery += f'?{property}shape sh:targetClass ?{property} .\n' + sparqlvalidationquery += f'{{?{property}shapex sh:targetClass/rdfs:subClassOf ?{property} .\n' + sparqlvalidationquery += f'?{property}shape sh:targetClass ?{property} .}}\n' for s, p, o in sorted_graph.triples((None, ngsild['hasValue'], property)): for p in sorted_graph.predicates(object=s): - sparqlvalidationquery += f'?{property}shape sh:property [ sh:path <{p}> ; ] .\n' + sparqlvalidationquery += f'{{?{property}shape sh:property [ sh:path <{p}> ; ] .}}\n' for subj in sorted_graph.subjects(predicate=p, object=s): if isinstance(subj, Variable): - sparqlvalidationquery += f'{subj.toPython()} rdfs:subClassOf* ?{property} .\n' + sparqlvalidationquery += f'{{{subj.toPython()} rdfs:subClassOf ?{property} .}}\n' for property in time_variables: variables.append(property) - sparqlvalidationquery += f'?{property}shapex sh:targetClass/rdfs:subClassOf* ?{property} .\n' - sparqlvalidationquery += f'?{property}shape sh:targetClass ?{property} .\n' + sparqlvalidationquery += f'{{?{property}shapex sh:targetClass/rdfs:subClassOf ?{property} .\n' + sparqlvalidationquery += f'?{property}shape sh:targetClass ?{property} .}}\n' for s, p, o in sorted_graph.triples((None, ngsild['observedAt'], property)): for p in sorted_graph.predicates(object=s): - sparqlvalidationquery += f'?{property}shape sh:property [ sh:path <{p}> ; ] .\n' + sparqlvalidationquery += f'{{?{property}shape sh:property [ sh:path <{p}> ; ] .}}\n' for subj in sorted_graph.subjects(predicate=p, object=s): if isinstance(subj, Variable): - sparqlvalidationquery += f'{subj.toPython()} rdfs:subClassOf ?{property}' + sparqlvalidationquery += f'{{{subj.toPython()} rdfs:subClassOf ?{property}}}' query = basequery for variable in variables: diff --git a/semantic-model/shacl2flink/lib/shacl_sparql_to_sql.py b/semantic-model/shacl2flink/lib/shacl_sparql_to_sql.py index 62ee3fed..3a0d5acf 100644 --- a/semantic-model/shacl2flink/lib/shacl_sparql_to_sql.py +++ b/semantic-model/shacl2flink/lib/shacl_sparql_to_sql.py @@ -1,5 +1,4 @@ from rdflib import Graph -import owlrl import os import sys import re @@ -89,12 +88,12 @@ def translate(shaclfile, knowledgefile, prefixes): (statementset, tables, views): statementset in yaml format """ - g = Graph() - h = Graph() + g = Graph(store="Oxigraph") + h = Graph(store="Oxigraph") g.parse(shaclfile) h.parse(knowledgefile) g += h - owlrl.RDFSClosure.RDFS_Semantics(g, axioms=True, daxioms=False, rdfs=True).closure() + g = utils.transitive_closure(g) tables_all = [] statementsets = [] sqlite = '' diff --git a/semantic-model/shacl2flink/lib/sparql_to_sql.py b/semantic-model/shacl2flink/lib/sparql_to_sql.py index 1d188ea4..c50c173b 100644 --- a/semantic-model/shacl2flink/lib/sparql_to_sql.py +++ b/semantic-model/shacl2flink/lib/sparql_to_sql.py @@ -207,8 +207,7 @@ def translate(ctx, elem): elif elem.name == 'Builtin_NOTEXISTS': return translate_notexists(ctx, elem) elif elem.name == 'Distinct': - ctx['target_modifiers'].append('Distinct') - translate(ctx, elem.p) + return translate_distinct(ctx, elem) elif elem.name == 'LeftJoin': return translate_left_join(ctx, elem) elif elem.name == 'Extend': @@ -240,6 +239,13 @@ def translate(ctx, elem): supported!') +def translate_distinct(ctx, elem): + ctx['target_modifiers'].append('Distinct') + translate(ctx, elem.p) + elem['target_sql'] = elem.p['target_sql'] + elem['where'] = elem.p['where'] + + def translate_unary_not(ctx, elem): expression = translate(ctx, elem.expr) return f" NOT ({expression}) " @@ -608,7 +614,11 @@ def remap_join_constraint_to_where(node): def copy_context(ctx): + # avoid deep copy of graph. it is not needed and creates problems with oxigraph + graph = ctx['g'] ctx_copy = copy.deepcopy(ctx) + # copy graph manually into the new structure + ctx_copy['g'] = graph ctx_copy['target_sql'] = '' ctx_copy['target_modifiers'] = [] ctx_copy['sql_tables'] = ctx['sql_tables'] diff --git a/semantic-model/shacl2flink/lib/utils.py b/semantic-model/shacl2flink/lib/utils.py index bba5b75b..c427f2ca 100644 --- a/semantic-model/shacl2flink/lib/utils.py +++ b/semantic-model/shacl2flink/lib/utils.py @@ -17,8 +17,10 @@ import os import re import rdflib +from rdflib import RDFS, OWL, RDF, Graph, Literal, XSD from urllib.parse import urlparse from enum import Enum +from collections import deque class WrongSparqlStructure(Exception): @@ -594,3 +596,68 @@ def split_statementsets(statementsets, max_map_size): grouped_strings.append(current_group) return grouped_strings + + +# This creates a transitive closure of all OWL.TransitiveProperty elements given in the ontology +# plus rdfs:subClassOf. In addition is makes sure that every rdfs:Class and owl:Class are reflexive +def transitive_closure(g): + closure_graph = Graph() + closure_graph += g + + # Ensure rdfs:subClassOf is defined as an OWL.TransitiveProperty if it is not already defined + if (RDFS.subClassOf, RDF.type, OWL.TransitiveProperty) not in closure_graph: + closure_graph.add((RDFS.subClassOf, RDF.type, OWL.TransitiveProperty)) + + # Handle subClassOf separately + # Add reflexive subClassOf relationships for all classes + for s in closure_graph.subjects(predicate=RDFS.subClassOf): + if (s, RDFS.subClassOf, s) not in closure_graph: + closure_graph.add((s, RDFS.subClassOf, s)) + + # Add reflexive subClassOf relationships for every element of type rdfs:Class and owl:Class + for s in closure_graph.subjects(predicate=RDF.type, object=RDFS.Class): + if (s, RDFS.subClassOf, s) not in closure_graph: + closure_graph.add((s, RDFS.subClassOf, s)) + for s in closure_graph.subjects(predicate=RDF.type, object=OWL.Class): + if (s, RDFS.subClassOf, s) not in closure_graph: + closure_graph.add((s, RDFS.subClassOf, s)) + + # Handle other transitive properties + transitive_properties = set(closure_graph.subjects(predicate=RDF.type, object=OWL.TransitiveProperty)) + for prop in transitive_properties: + # Use a queue for BFS for each transitive property + queue = deque(closure_graph.triples((None, prop, None))) + visited = set(queue) + + while queue: + s1, _, o1 = queue.popleft() + + # Find all objects that o1 is related to via the same property + for _, _, o2 in closure_graph.triples((o1, prop, None)): + if (s1, prop, o2) not in visited: + # Add new inferred triple + closure_graph.add((s1, prop, o2)) + queue.append((s1, prop, o2)) + visited.add((s1, prop, o2)) + + # Handle generalization of rdf:Bag/rdf:Container + for bag in closure_graph.subjects(predicate=RDF.type, object=RDF.Bag): + # Add rdf:Bag and rdfs:Container types + closure_graph.add((bag, RDF.type, RDFS.Container)) + + # Collect all rdf:_n properties (e.g., rdf:_1, rdf:_2, etc.) + members = [] + for p, o in closure_graph.predicate_objects(subject=bag): + if p.startswith(str(RDF) + "_"): + members.append(o) + # Ensure all values are xsd:string literals + if not isinstance(o, Literal) or o.datatype != XSD.string: + closure_graph.set((bag, p, Literal(str(o), datatype=XSD.string))) + + # Add rdfs:member relationships + if members: + closure_graph.add((bag, RDFS.member, Literal(members[0], datatype=XSD.string))) + for member in members[1:]: + closure_graph.add((bag, RDFS.member, Literal(member, datatype=XSD.string))) + + return closure_graph diff --git a/semantic-model/shacl2flink/requirements.txt b/semantic-model/shacl2flink/requirements.txt index 138c3105..6d4b1a3c 100644 --- a/semantic-model/shacl2flink/requirements.txt +++ b/semantic-model/shacl2flink/requirements.txt @@ -1,8 +1,9 @@ -rdflib==6.2.0 -owlrl==6.0.2 -pyshacl==0.20.0 +rdflib==7.1.1 +pyshacl==0.29.0 ruamel.yaml==0.17.21 click==8.1.3 Jinja2==3.1.3 setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -apache-flink==1.17.1 +apache-flink==1.17.2 +oxrdflib==0.4.0 + diff --git a/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/knowledge.ttl b/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/knowledge.ttl index 124a3118..b3df1de0 100644 --- a/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/knowledge.ttl +++ b/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/knowledge.ttl @@ -3,9 +3,9 @@ PREFIX rdfs: PREFIX iff: iff:entity a iff:class ; - a rdfs:class . + a rdfs:Class . iff:machine a iff:class ; - a rdfs:class . + a rdfs:Class . iff:filter rdfs:subClassOf iff:machine ; a iff:class . iff:plasmacutter rdfs:subClassOf iff:cutter ; @@ -22,7 +22,7 @@ iff:operationSchedule rdfs:subClassOf iff:linkedEntity ; a iff:class . iff:maintenanceInterval rdfs:subClassOf iff:linkedEntity ; a iff:class . -iff:machineState a rdfs:class . +iff:machineState a rdfs:Class . iff:state_OFF a iff:machineState . iff:state_OFF iff:stateValidFor iff:filter, iff:cutter . iff:state_ON a iff:machineState . @@ -50,7 +50,7 @@ iff:WC1 iff:moreExpensiveThan iff:WC0 . iff:WC1 rdf:_n ("1.4301" "1.4302" "1.4303" "1.4304" "1.4305") . iff:WC2 rdf:_n ("1.3301" "1.3302" "1.3303" "1.3304" "1.3305") . iff:WC3 rdf:_n ("1.5301" "1.5302" "1.5303" "1.5304" "1.5305") . -iff:Severity a rdfs:class . +iff:Severity a rdfs:Class . iff:severityWarning a iff:Severity . iff:severityWarning rdfs:label 'warning' . iff:severityMajor a iff:Severity . diff --git a/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/shacl.ttl b/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/shacl.ttl index 390bca0d..aab28748 100644 --- a/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/shacl.ttl +++ b/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test2/shacl.ttl @@ -325,7 +325,7 @@ iff:StateOnFilterShape PREFIX iff: PREFIX rdfs: PREFIX rdf: -SELECT $this ?v1 ?pc ?v2 +SELECT DISTINCT $this ?v1 ?pc ?v2 where { $this iff:state [ ?v1 ] . ?pc rdf:type iff:plasmacutter . diff --git a/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test5/knowledge.ttl b/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test5/knowledge.ttl index fef4ae6f..d6d51164 100644 --- a/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test5/knowledge.ttl +++ b/semantic-model/shacl2flink/tests/sql-tests/kms-constraints/test5/knowledge.ttl @@ -12,9 +12,9 @@ iff:plasmacutter_test rdfs:subClassOf iff:cutter_test ; ### End of Test Content iff:entity a iff:class ; - a rdfs:class . + a rdfs:Class . iff:machine a iff:class ; - a rdfs:class . + a rdfs:Class . iff:filter rdfs:subClassOf iff:machine ; a iff:class . iff:plasmacutter rdfs:subClassOf iff:cutter ; @@ -29,11 +29,11 @@ iff:filterCartridge rdfs:subClassOf iff:entity ; a iff:class . -iff:scheduleEntity a iff:class . +iff:scheduleEntity a iff:class, rdfs:Class . iff:oeeTemplate rdfs:subClassOf iff:scheduleEntity . ### Machine states -iff:machineState a rdfs:class . +iff:machineState a rdfs:Class . iff:state_OFF a iff:machineState . iff:state_OFF iff:stateValidFor iff:filter, iff:cutter . iff:state_ON a iff:machineState . diff --git a/semantic-model/shacl2flink/tests/test_create_knowledge_closure.py b/semantic-model/shacl2flink/tests/test_create_knowledge_closure.py index db2110a5..5e0d96db 100644 --- a/semantic-model/shacl2flink/tests/test_create_knowledge_closure.py +++ b/semantic-model/shacl2flink/tests/test_create_knowledge_closure.py @@ -19,6 +19,9 @@ @patch('create_knowledge_closure.rdflib') -@patch('create_knowledge_closure.owlrl') -def test_main(mock_owlrl, mock_rdflib, tmp_path): +@patch('create_knowledge_closure.utils') +def test_main(mock_utils, mock_rdflib, tmp_path, monkeypatch): + def identity(val): + return val + monkeypatch.setattr(mock_utils, "transitive_closure", identity) create_knowledge_closure.main('kms/knowledge.ttl', 'knowledge_closure.ttl') diff --git a/semantic-model/shacl2flink/tests/test_create_ngsild_tables.py b/semantic-model/shacl2flink/tests/test_create_ngsild_tables.py index b33e2f74..cfbbe09b 100644 --- a/semantic-model/shacl2flink/tests/test_create_ngsild_tables.py +++ b/semantic-model/shacl2flink/tests/test_create_ngsild_tables.py @@ -34,9 +34,10 @@ class dotdict(dict): @patch('create_ngsild_tables.Graph') @patch('create_ngsild_tables.configs') @patch('create_ngsild_tables.utils') -@patch('create_ngsild_tables.owlrl') -def test_main(mock_owlrl, mock_utils, mock_configs, mock_graph, - mock_yaml, tmp_path): +def test_main(mock_utils, mock_configs, mock_graph, + mock_yaml, tmp_path, monkeypatch): + def identity(val): + return val mock_configs.kafka_topic_ngsi_prefix = 'ngsild_prefix' mock_configs.kafka_bootstrap = 'bootstrap' mock_utils.create_sql_table.return_value = "sqltable" @@ -44,6 +45,8 @@ def test_main(mock_owlrl, mock_utils, mock_configs, mock_graph, mock_utils.create_sql_view.return_value = "sqlview" mock_utils.create_yaml_view.return_value = "yamlview" mock_utils.camelcase_to_snake_case.return_value = 'shacltype' + monkeypatch.setattr(mock_utils, "transitive_closure", identity) + mock_yaml.dump.return_value = "dump" g = mock_graph.return_value g.__contains__.return_value = True @@ -56,7 +59,6 @@ def test_main(mock_owlrl, mock_utils, mock_configs, mock_graph, row = {'shacltype': shacltype, 'path': 'path'} row = dotdict(row) g.query.return_value = [row] - mock_owlrl.DeductiveClosure.expand.return_value = True create_ngsild_tables.main('kms/shacl.ttl', 'kms/knowledge.ttl', tmp_path) assert os.path.exists(os.path.join(tmp_path, 'ngsild.yaml')) is True diff --git a/semantic-model/shacl2flink/tests/test_create_rdf_table.py b/semantic-model/shacl2flink/tests/test_create_rdf_table.py index e0c93543..e729301a 100644 --- a/semantic-model/shacl2flink/tests/test_create_rdf_table.py +++ b/semantic-model/shacl2flink/tests/test_create_rdf_table.py @@ -62,17 +62,20 @@ def format_node_type(x): @patch('create_rdf_table.ruamel.yaml') -@patch('create_rdf_table.owlrl') @patch('create_rdf_table.rdflib') @patch('create_rdf_table.create_table') @patch('create_rdf_table.configs') @patch('create_rdf_table.utils') def test_main(mock_utils, mock_configs, mock_create_table, mock_rdflib, - mock_owlrl, mock_yaml, tmp_path): + mock_yaml, tmp_path, monkeypatch): + def identity(val): + return val mock_utils.create_sql_table.return_value = "sqltable" mock_utils.create_yaml_table.return_value = "yamltable" mock_utils.create_statementset.return_value = "statementset" mock_yaml.dump.return_value = "dump" + monkeypatch.setattr(mock_utils, "transitive_closure", identity) + create_rdf_table.main('kms/knowledge.ttl', 'namespace', tmp_path) assert os.path.exists(os.path.join(tmp_path, 'rdf.yaml')) is True diff --git a/semantic-model/shacl2flink/tests/test_lib_bgp_translation_utils.py b/semantic-model/shacl2flink/tests/test_lib_bgp_translation_utils.py index 98d2aef9..3c7c6cab 100644 --- a/semantic-model/shacl2flink/tests/test_lib_bgp_translation_utils.py +++ b/semantic-model/shacl2flink/tests/test_lib_bgp_translation_utils.py @@ -32,14 +32,14 @@ def test_create_ngsild_mappings(monkeypatch): class Graph: def query(self, sparql): - assert "?this rdfs:subClassOf .\n" in sparql - assert "?thisshape sh:targetClass ?this .\n?thisshape sh:property [ sh:path \ - ; sh:property [ sh:path \ -ngsild:hasObject; sh:class ?f ] ] ." in sparql - assert "?v2shape sh:targetClass ?v2 .\n?v2shape sh:property [ sh:path \ - ; ] ." in sparql - assert "?v1shape sh:targetClass ?v1 .\n?v1shape sh:property [ sh:path \ - ; ] ." in sparql + assert "{?this rdfs:subClassOf .\n\ + rdfs:subClassOf ?this .}" in sparql + assert "{?fshapex sh:targetClass/rdfs:subClassOf ?f .\n\ +?fshape sh:targetClass ?f .}" in sparql + assert "{?thisshape sh:property [ sh:path ; \ +sh:property [ sh:path ngsild:hasObject; sh:class ?f ] ] .}" in sparql + assert "{?v2shapex sh:targetClass/rdfs:subClassOf ?v2 .\n\ +?v2shape sh:targetClass ?v2 .}" in sparql return ['row'] relationships = { "https://industry-fusion.com/types/v0.9/hasFilter": True @@ -97,10 +97,9 @@ def query(self, sparql): assert "?this rdfs:subClassOf ." in sparql assert "pcshape sh:property [ sh:path ; sh:property \ [ sh:path ngsild:hasObject; sh:class ?this ] ]" in sparql - assert "?v2shape sh:targetClass ?v2 .\n?v2shape sh:property [ sh:path \ - ; ] ." in sparql - assert "?v1shape sh:targetClass ?v1 .\n?v1shape sh:property [ sh:path \ - ; ] ." in sparql + assert "{?v2shapex sh:targetClass/rdfs:subClassOf ?v2 .\n\ +?v2shape sh:targetClass ?v2 .}" in sparql + assert "{?v1shape sh:property [ sh:path ; ] .}" in sparql return ['row'] relationships = { diff --git a/semantic-model/shacl2flink/tests/test_lib_shacl_sparql_to_sql.py b/semantic-model/shacl2flink/tests/test_lib_shacl_sparql_to_sql.py index 7e4d2768..805bbf72 100644 --- a/semantic-model/shacl2flink/tests/test_lib_shacl_sparql_to_sql.py +++ b/semantic-model/shacl2flink/tests/test_lib_shacl_sparql_to_sql.py @@ -20,12 +20,11 @@ @patch('lib.shacl_sparql_to_sql.Graph') -@patch('lib.shacl_sparql_to_sql.owlrl') @patch('lib.shacl_sparql_to_sql.translate_sparql') @patch('lib.shacl_sparql_to_sql.add_variables_to_message') @patch('lib.shacl_sparql_to_sql.utils') def test_translate(mock_utils, mock_add_variables_to_message, mock_translate_sparql, - mock_owlrl, mock_graph, monkeypatch): + mock_graph, monkeypatch): def mock_add_variables_to_message(message): return message g = mock_graph.return_value @@ -37,6 +36,7 @@ def mock_strip_class(klass): monkeypatch.setattr(mock_utils, "strip_class", mock_strip_class) monkeypatch.setattr(mock_utils, "class_to_obj_name", mock_strip_class) monkeypatch.setattr(mock_utils, "camelcase_to_snake_case", mock_strip_class) + monkeypatch.setattr(mock_utils, "transitive_closure", mock_strip_class) message = MagicMock() message.toPython.return_value = 'message' diff --git a/semantic-model/shacl2flink/tests/test_utils.py b/semantic-model/shacl2flink/tests/test_utils.py index ae2dbd7f..72ccc44d 100644 --- a/semantic-model/shacl2flink/tests/test_utils.py +++ b/semantic-model/shacl2flink/tests/test_utils.py @@ -18,6 +18,7 @@ from unittest.mock import patch import lib.utils as utils import rdflib +from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, XSD def test_check_dns_name(): @@ -316,3 +317,51 @@ def test_process_sql_dialect(): isSqlite = True result_expression = utils.process_sql_dialect(expression, isSqlite) assert result_expression == 'trim(CAST(julianday(ltrim(rtrim(test, \'>\'), \'<\')) * 86400000 as INTEGER), \'\\"\')' + + +def test_transitive_closure(): + # Define a custom namespace for testing + TEST = Namespace("http://example.org/test#") + + # Create an RDF graph for testing + g = Graph() + g.bind("test", TEST) + + # Add some initial triples + g.add((TEST.A, RDF.type, RDFS.Class)) + g.add((TEST.B, RDF.type, RDFS.Class)) + g.add((TEST.A, RDFS.subClassOf, TEST.B)) + + g.add((TEST.C, RDF.type, OWL.Class)) + g.add((TEST.C, RDFS.subClassOf, TEST.A)) + + # Add a transitive property + g.add((TEST.transitiveProp, RDF.type, OWL.TransitiveProperty)) + g.add((TEST.X, TEST.transitiveProp, TEST.Y)) + g.add((TEST.Y, TEST.transitiveProp, TEST.Z)) + + # Add a container type + g.add((TEST.container, RDF.type, RDF.Bag)) + g.add((TEST.container, RDF._1, Literal("value1"))) + g.add((TEST.container, RDF._2, Literal("value2"))) + + # Apply the transitive_closure function to the graph + closure_graph = utils.transitive_closure(g) + for s, p, o in closure_graph: + print((s, p, o)) + # Check if the reflexive relationships are added + assert (TEST.A, RDFS.subClassOf, TEST.A) in closure_graph + assert (TEST.B, RDFS.subClassOf, TEST.B) in closure_graph + assert (TEST.C, RDFS.subClassOf, TEST.C) in closure_graph + + # Check if the transitive relationships are added + assert (TEST.A, RDFS.subClassOf, TEST.B) in closure_graph + assert (TEST.C, RDFS.subClassOf, TEST.B) in closure_graph + + # Check transitive property propagation + assert (TEST.X, TEST.transitiveProp, TEST.Z) in closure_graph + + # Check container relationships + assert (TEST.container, RDF.type, RDFS.Container) in closure_graph + assert (TEST.container, RDFS.member, Literal("value1", datatype=XSD.string)) in closure_graph + assert (TEST.container, RDFS.member, Literal("value2", datatype=XSD.string)) in closure_graph