diff --git a/rdflib/plugins/sparql/_contrib/valuesToTheLeftOfTheJoins.py b/rdflib/plugins/sparql/_contrib/valuesToTheLeftOfTheJoins.py new file mode 100644 index 000000000..f2193bb8c --- /dev/null +++ b/rdflib/plugins/sparql/_contrib/valuesToTheLeftOfTheJoins.py @@ -0,0 +1,36 @@ +""" +Move a VALUES clause to the left of the join. +This is normally smart as this is often a much shorter list than what is generated +by the other expression. +""" + +from typing import Any + +from rdflib.plugins.sparql.sparql import Query + + +class ValuesToTheLeftOfTheJoin: + @classmethod + def translate(cls, query: Query) -> Query: + main = query.algebra + query.algebra = ValuesToTheLeftOfTheJoin._optimize_node(main) + return query + + @classmethod + def _optimize_node(cls, cv: Any) -> Any: + if cv.name == "Join": + if cv.p1.name != "ToMultiSet" and "ToMultiSet" == cv.p2.name: + cv.update(p1=cv.p2, p2=cv.p1) + else: + op1 = ValuesToTheLeftOfTheJoin._optimize_node(cv.p1) + op2 = ValuesToTheLeftOfTheJoin._optimize_node(cv.p2) + cv.update(op1, op2) + return cv + elif cv.p is not None: + cv.p.update(ValuesToTheLeftOfTheJoin._optimize_node(cv.p)) + elif cv.p1 is not None and cv.p2 is not None: + cv.p1.update(ValuesToTheLeftOfTheJoin._optimize_node(cv.p1)) + cv.p2.update(ValuesToTheLeftOfTheJoin._optimize_node(cv.p2)) + elif cv.p1 is not None: + cv.p1.update(ValuesToTheLeftOfTheJoin._optimize_node(cv.p1)) + return cv diff --git a/rdflib/plugins/sparql/optimizer.py b/rdflib/plugins/sparql/optimizer.py new file mode 100644 index 000000000..2556543f6 --- /dev/null +++ b/rdflib/plugins/sparql/optimizer.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +""" +This contains standard optimizers for sparql + +""" +import re +from typing import Any, Callable + +from rdflib import Literal +from rdflib.plugins.sparql.algebra import CompValue, Expr, Join, Values +from rdflib.plugins.sparql.operators import Builtin_CONTAINS, Builtin_REGEX +from rdflib.plugins.sparql.sparql import Query + +""" +An interface for having optimizers that transform a query algebra hopefully +in an faster to evaluate version. +""" + + +class SPARQLOptimizer: + def optimize(self, query: Query) -> Query: + return query diff --git a/rdflib/plugins/sparql/processor.py b/rdflib/plugins/sparql/processor.py index de97d80bd..a8677d340 100644 --- a/rdflib/plugins/sparql/processor.py +++ b/rdflib/plugins/sparql/processor.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Any, Mapping, Optional, Union +from typing import Any, Callable, List, Mapping, Optional, Union from rdflib.graph import Graph from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate @@ -106,9 +106,13 @@ def update( return evalUpdate(self.graph, strOrQuery, initBindings) +_QueryTranslatorType = Callable[[Query], Query] + + class SPARQLProcessor(Processor): - def __init__(self, graph): + def __init__(self, graph, translators: Optional[List[_QueryTranslatorType]] = None): self.graph = graph + self.translators = translators # NOTE on type error: this is because the super type constructor does not # accept base argument and thie position of the DEBUG argument is @@ -144,4 +148,7 @@ def query( # type: ignore[override] if isinstance(strOrQuery, str): strOrQuery = translateQuery(parseQuery(strOrQuery), base, initNs) + for translator in self.translators: + strOrQuery = translator(strOrQuery) + return evalQuery(self.graph, strOrQuery, initBindings, base) diff --git a/test/test_sparql/test_contrib_query_translators.py b/test/test_sparql/test_contrib_query_translators.py new file mode 100644 index 000000000..f9de73cb3 --- /dev/null +++ b/test/test_sparql/test_contrib_query_translators.py @@ -0,0 +1,71 @@ +from rdflib import Graph +from rdflib.plugins.sparql._contrib.valuesToTheLeftOfTheJoins import ( + ValuesToTheLeftOfTheJoin, +) +from rdflib.plugins.sparql.parser import * + +# from rdflib.plugins.sparql.processor import prepareQuery +from rdflib.plugins.sparql.processor import parseQuery, translateQuery + +query_slow = """ +PREFIX ex: + +SELECT ?x { + ?x ?y ?z . + VALUES (?x) { + (ex:1) + (ex:2) + (ex:3) + } +} +""" + +query_fast = """ +PREFIX ex: + +SELECT ?x { + VALUES (?x) { + (ex:1) + (ex:2) + (ex:3) + } + ?x ?y ?z . +} +""" + +query_regex = """ +PREFIX ex: + +SELECT ?x { + ?x ?y ?z . + FILTER(regex("?z", "hi")) +} +""" + +query_contains = """ +PREFIX ex: + +SELECT ?x { + ?x ?y ?z . + FILTER(contains("?z", "hi")) +} +""" + + +def test_values_to_left(): + qs = _prepare_query(query_slow) + qf = _prepare_query(query_fast) + assert qs != qf + qso = ValuesToTheLeftOfTheJoin.translate(qs) + + assert qso.algebra == qf.algebra + + +def _prepare_query(str_or_query): + parse_tree = parseQuery(str_or_query) + query = translateQuery(parse_tree, None, {}) + return query + + +if __name__ == "__main__": + test_values_to_left()