diff --git a/.binder/environment.yml b/.binder/environment.yml index 42d4747..61dc0e9 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -6,6 +6,10 @@ dependencies: - owlready2 =0.47 - pandas =2.2.3 - pint =0.24.4 +- semantikon =0.0.9 +- pyiron_workflow =0.11.2 +- rdflib =7.1.1 +- owlrl =7.1.2 - pyiron_atomistics =0.6.19 -- pyiron-data =0.0.29 +- pyiron-data =0.0.30 - lammps =2024.02.07=*_openmpi_* diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index ef41085..c23037e 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -5,4 +5,8 @@ dependencies: - openjdk - owlready2 =0.47 - pandas =2.2.3 -- pint =0.24.4 \ No newline at end of file +- pint =0.24.4 +- semantikon =0.0.9 +- pyiron_workflow =0.11.2 +- rdflib =7.1.1 +- owlrl =7.1.2 diff --git a/.ci_support/lower_bound.yml b/.ci_support/lower_bound.yml index 1704139..db342dd 100644 --- a/.ci_support/lower_bound.yml +++ b/.ci_support/lower_bound.yml @@ -4,7 +4,11 @@ dependencies: - numpy =1.26.3 - openjdk - owlready2 =0.46 -- pandas =2.1.4 -- pint =0.23 -- pyiron_atomistics =0.5.0 -- pyiron-data =0.0.26 \ No newline at end of file +- pandas =2.2.0 +- pint =0.24 +- pyiron_atomistics =0.6.19 +- pyiron-data =0.0.26 +- semantikon =0.0.9 +- pyiron_workflow =0.11.0 +- rdflib =7.1.1 +- owlrl =7.1.2 diff --git a/.github/workflows/push-pull.yml b/.github/workflows/push-pull.yml index 55368fc..63fac5f 100644 --- a/.github/workflows/push-pull.yml +++ b/.github/workflows/push-pull.yml @@ -12,7 +12,8 @@ jobs: uses: pyiron/actions/.github/workflows/push-pull.yml@actions-3.3.3 secrets: inherit with: + python-version-alt3: 'exclude' # No python 3.9 docs-env-files: .ci_support/environment.yml .ci_support/environment-docs.yml notebooks-env-files: .ci_support/environment.yml .ci_support/environment-pyiron_atomistics.yml .ci_support/environment-lammps.yml tests-env-files: .ci_support/environment.yml .ci_support/environment-pyiron_atomistics.yml - alternate-tests-env-files: .ci_support/lower_bound.yml \ No newline at end of file + alternate-tests-env-files: .ci_support/lower_bound.yml diff --git a/docs/environment.yml b/docs/environment.yml index c507644..5a59693 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -12,4 +12,8 @@ dependencies: - owlready2 =0.47 - pandas =2.2.3 - pint =0.24.4 +- semantikon =0.0.9 +- pyiron_workflow =0.11.2 +- rdflib =7.1.1 +- owlrl =7.1.2 - sphinxcontrib-mermaid diff --git a/pyiron_ontology/parser.py b/pyiron_ontology/parser.py new file mode 100644 index 0000000..1bb54fc --- /dev/null +++ b/pyiron_ontology/parser.py @@ -0,0 +1,168 @@ +from semantikon.converter import parse_input_args, parse_output_args +from rdflib import Graph, Literal, RDF, RDFS, URIRef, OWL +from pyiron_workflow import NOT_DATA + + +def get_source_output(var): + if not var.connected: + return None + connection = var.connections[0] + return f"{connection.owner.label}.outputs.{connection.label}" + + +def get_inputs_and_outputs(node): + """ + Read input and output arguments with their type hints and return a + dictionary containing all input output information + + Args: + node (pyiron_workflow.nodes.Node): node to be parsed + + Returns: + (dict): dictionary containing input output args, type hints, values + and variable names + """ + inputs = parse_input_args(node.node_function) + outputs = parse_output_args(node.node_function) + if isinstance(outputs, dict): + outputs = (outputs,) + elif outputs is None: + outputs = len(node.outputs.labels) * ({},) + outputs = {key: out for key, out in zip(node.outputs.labels, outputs)} + for key, value in node.inputs.items(): + if inputs[key] is None: + inputs[key] = {} + inputs[key]["value"] = value.value + inputs[key]["var_name"] = key + inputs[key]["connection"] = get_source_output(value) + for key, value in node.outputs.to_value_dict().items(): + outputs[key]["value"] = value + outputs[key]["var_name"] = key + return { + "inputs": inputs, + "outputs": outputs, + "function": node.node_function.__name__, + "label": node.label, + } + + +def get_triples( + data, + EX, + hasSourceFunction=None, + hasUnits=None, + inheritsPropertiesFrom=None, + update_query=True, +): + if hasSourceFunction is None: + hasSourceFunction = EX.hasSourceFunction + if hasUnits is None: + hasUnits = EX.hasUnits + if inheritsPropertiesFrom is None: + inheritsPropertiesFrom = EX.inheritsPropertiesFrom + graph = Graph() + label_def_triple = (EX[data["label"]], RDF.type, OWL.NamedIndividual) + # Triple already exists + if len(list(graph.triples(label_def_triple))) > 0: + return graph + graph.add(label_def_triple) + graph.add((EX[data["label"]], hasSourceFunction, EX[data["function"]])) + for io_ in ["inputs", "outputs"]: + for key, d in data[io_].items(): + full_key = data["label"] + f".{io_}." + key + label = EX[full_key] + graph.add((label, RDFS.label, Literal(full_key))) + if d.get("uri", None) is not None: + graph.add((label, RDF.type, d["uri"])) + if d.get("value", NOT_DATA) is not NOT_DATA: + graph.add((label, RDF.value, Literal(d["value"]))) + graph.add((label, EX[io_[:-1] + "Of"], EX[data["label"]])) + if d.get("units", None) is not None: + graph.add((label, hasUnits, EX[d["units"]])) + if d.get("connection", None) is not None: + graph.add((label, inheritsPropertiesFrom, EX[d["connection"]])) + for t in _get_triples_from_restrictions(d, EX): + graph.add(_parse_triple(t, EX, label=label, data=data)) + if update_query: + inherit_properties(graph, EX) + return graph + + +def _get_triples_from_restrictions(data, EX): + triples = [] + if data.get("restriction", None) is not None: + triples = restriction_to_triple(data["restriction"]) + if data.get("triple", None) is not None: + if isinstance(data["triple"][0], tuple | list): + triples.extend(list(data["triple"])) + else: + triples.extend([data["triple"]]) + return triples + + +def restriction_to_triple(restriction): + triples = [] + assert isinstance(restriction, tuple) and isinstance(restriction[0], tuple) + if not isinstance(restriction[0][0], tuple): + restriction = (restriction,) + for r in restriction: + assert len(r[0]) == 2 + label = r[0][1] + "Restriction" + triples.append((label, RDF.type, OWL.Restriction)) + for rr in r: + triples.append((label, rr[0], rr[1])) + triples.append((RDF.type, label)) + return triples + + +def _parse_triple(triple, EX, label=None, data=None): + if len(triple) == 2: + subj, pred, obj = label, triple[0], triple[1] + elif len(triple) == 3: + subj, pred, obj = triple + else: + raise ValueError("Triple must have 2 or 3 elements") + if obj.startswith("inputs.") or obj.startswith("outputs."): + obj = data["label"] + "." + obj + if not isinstance(obj, URIRef): + obj = EX[obj] + return subj, pred, obj + + +def inherit_properties(graph, NS, n=None): + update_query = ( + f"PREFIX ns: <{NS}>", + f"PREFIX rdfs: <{RDFS}>", + f"PREFIX rdf: <{RDF}>", + "", + "INSERT {", + " ?subject ?p ?o .", + "}", + "WHERE {", + " ?subject ns:inheritsPropertiesFrom ?target .", + " ?target ?p ?o .", + " FILTER(?p != ns:inheritsPropertiesFrom)", + " FILTER(?p != rdfs:label)", + " FILTER(?p != rdf:value)", + " FILTER(?p != rdf:type)", + "}", + ) + if n is None: + n = len(list(graph.triples((None, NS.inheritsPropertiesFrom, None)))) + for _ in range(n): + graph.update("\n".join(update_query)) + + +def validate_values(graph): + missing_triples = [] + for restriction in graph.subjects(RDF.type, OWL.Restriction): + on_property = graph.value(restriction, OWL.onProperty) + some_values_from = graph.value(restriction, OWL.someValuesFrom) + if on_property and some_values_from: + for cls in graph.subjects(OWL.equivalentClass, restriction): + for instance in graph.subjects(RDF.type, cls): + if not (instance, on_property, some_values_from) in graph: + missing_triples.append( + (instance, on_property, some_values_from) + ) + return missing_triples diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py new file mode 100644 index 0000000..c8c70ff --- /dev/null +++ b/tests/unit/test_parser.py @@ -0,0 +1,126 @@ +import unittest +from owlrl import DeductiveClosure, OWLRL_Semantics +from rdflib import Graph, OWL, RDF +from pyiron_ontology.parser import get_inputs_and_outputs, get_triples, inherit_properties, validate_values +from pyiron_workflow import Workflow +from semantikon.typing import u +from rdflib import Namespace + + +EX = Namespace("http://example.org/") + + +@Workflow.wrap.as_function_node("speed") +def calculate_speed( + distance: u(float, units="meter") = 10.0, + time: u(float, units="second") = 2.0, +) -> u( + float, + units="meter/second", + triple=( + (EX.isOutputOf, "inputs.time"), + (EX.subject, EX.predicate, EX.object) + ) +): + return distance / time + + +@Workflow.wrap.as_function_node("result") +def add(a: float, b: float) -> u(float, triple=(EX.HasOperation, EX.Addition)): + return a + b + + +@Workflow.wrap.as_function_node("result") +def multiply(a: float, b: float) -> u( + float, + triple=( + (EX.HasOperation, EX.Multiplication), + (EX.inheritsPropertiesFrom, "inputs.a") + ) +): + return a * b + + +@Workflow.wrap.as_function_node("result") +def correct_analysis( + a: u( + float, + restriction=( + (OWL.onProperty, EX.HasOperation), + (OWL.someValuesFrom, EX.Addition) + ) + ) +) -> float: + return a + + +@Workflow.wrap.as_function_node("result") +def wrong_analysis( + a: u( + float, + restriction=( + (OWL.onProperty, EX.HasOperation), + (OWL.someValuesFrom, EX.Division) + ) + ) +) -> float: + return a + + +class TestParser(unittest.TestCase): + def test_parser(self): + c = calculate_speed() + output_dict = get_inputs_and_outputs(c) + for label in ["inputs", "outputs", "function", "label"]: + self.assertIn(label, output_dict) + + def test_triples(self): + speed = calculate_speed() + data = get_inputs_and_outputs(speed) + graph = get_triples(data, EX) + self.assertGreater( + len(list(graph.triples((None, EX.hasUnits, EX["meter/second"])))), 0 + ) + self.assertEqual( + len( + list( + graph.triples( + (None, EX.isOutputOf, EX["calculate_speed.inputs.time"]) + ) + ) + ), + 1 + ) + self.assertEqual( + len(list(graph.triples((EX.subject, EX.predicate, EX.object)))), + 1 + ) + + def test_correct_analysis(self): + def get_graph(wf): + graph = Graph() + graph.add((EX.HasOperation, RDF.type, RDF.Property)) + graph.add((EX.Addition, RDF.type, OWL.Class)) + graph.add((EX.Multiplication, RDF.type, OWL.Class)) + for value in wf.children.values(): + data = get_inputs_and_outputs(value) + graph += get_triples(data, EX) + inherit_properties(graph, EX) + DeductiveClosure(OWLRL_Semantics).expand(graph) + return graph + wf = Workflow("correct_analysis") + wf.addition = add(a=1., b=2.) + wf.multiply = multiply(a=wf.addition, b=3.) + wf.analysis = correct_analysis(a=wf.multiply) + graph = get_graph(wf) + self.assertEqual(len(validate_values(graph)), 0) + wf = Workflow("wrong_analysis") + wf.addition = add(a=1., b=2.) + wf.multiply = multiply(a=wf.addition, b=3.) + wf.analysis = wrong_analysis(a=wf.multiply) + graph = get_graph(wf) + self.assertEqual(len(validate_values(graph)), 1) + + +if __name__ == "__main__": + unittest.main()