Skip to content

Commit

Permalink
Fix explicit dataset (FROM and FROM NAMED clauses) (#2794)
Browse files Browse the repository at this point in the history
* fix explicit dataset: FROM and FROM NAMED clause

When using a FROM or FROM NAMED clause: redefine entirely the query's RDF dataset.
Include only the graphs in FROM clause in the query's default graph
Include only the graphs in the FROM NAMED clause in the query's named graphs
Try to load external graphs only if they don't already exist in the given ConjunctiveGraph

* Formatting with back and flake8 for commit d6858e0

Using rdflib rules

* Fix import order on test_dataset_exclusive

* Use Dataset instead of ConjunctiveGraph in test_dataset_exclusive and test_dataset_inclusive

Since ConjunctiveGraph has been deprecated.
Also define if dataset is inclusive or exclusive at Dataset init instead of using global param SPARQL_DEFAULT_GRAPH_UNION

* Fix graph name def in test_dataset_inclusive and test_dataset_exclusive

* Only get_context on default and named graphs once and use Dataset instead of ConjunctiveGraph

* Update rdflib/plugins/sparql/sparql.py

Co-authored-by: Ashley Sommer <[email protected]>

---------

Co-authored-by: Nicholas Car <[email protected]>
Co-authored-by: Ashley Sommer <[email protected]>
  • Loading branch information
3 people authored Jul 31, 2024
1 parent d7b2d25 commit 5876266
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 33 deletions.
30 changes: 4 additions & 26 deletions rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,36 +672,14 @@ def evalQuery(
:doc:`Security Considerations </security_considerations>`
documentation.
"""
main = query.algebra

initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())

ctx = QueryContext(graph, initBindings=initBindings)
ctx = QueryContext(
graph, initBindings=initBindings, datasetClause=main.datasetClause
)

ctx.prologue = query.prologue
main = query.algebra

if main.datasetClause:
if ctx.dataset is None:
raise Exception(
"Non-conjunctive-graph doesn't know about "
+ "graphs! Try a query without FROM (NAMED)."
)

ctx = ctx.clone() # or push/pop?

firstDefault = False
for d in main.datasetClause:
if d.default:
if firstDefault:
# replace current default graph
dg = ctx.dataset.get_context(BNode())
ctx = ctx.pushGraph(dg)
firstDefault = True

ctx.load(d.default, default=True)

elif d.named:
g = d.named
ctx.load(g, default=False)

return evalPart(ctx, main)
33 changes: 26 additions & 7 deletions rdflib/plugins/sparql/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import isodate

import rdflib.plugins.sparql
from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
from rdflib.namespace import NamespaceManager
from rdflib.plugins.sparql.parserutils import CompValue
from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable
Expand Down Expand Up @@ -255,20 +255,39 @@ def __init__(
graph: Optional[Graph] = None,
bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None,
initBindings: Optional[Mapping[str, Identifier]] = None,
datasetClause=None,
):
self.initBindings = initBindings
self.bindings = Bindings(d=bindings or [])
if initBindings:
self.bindings.update(initBindings)

self.graph: Optional[Graph]
self._dataset: Optional[ConjunctiveGraph]
if isinstance(graph, ConjunctiveGraph):
self._dataset = graph
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
self.graph = self.dataset
self._dataset: Optional[Union[Dataset, ConjunctiveGraph]]
if isinstance(graph, (Dataset, ConjunctiveGraph)):
if datasetClause:
self._dataset = Dataset()
self.graph = Graph()
for d in datasetClause:
if d.default:
from_graph = graph.get_context(d.default)
self.graph += from_graph
if not from_graph:
self.load(d.default, default=True)
elif d.named:
namedGraphs = Graph(
store=self.dataset.store, identifier=d.named
)
from_named_graphs = graph.get_context(d.named)
namedGraphs += from_named_graphs
if not from_named_graphs:
self.load(d.named, default=False)
else:
self.graph = self.dataset.default_context
self._dataset = graph
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
self.graph = self.dataset
else:
self.graph = self.dataset.default_context
else:
self._dataset = None
self.graph = graph
Expand Down
84 changes: 84 additions & 0 deletions test/test_sparql/test_dataset_exclusive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from rdflib.graph import Dataset, Graph
from rdflib.term import URIRef

dataset = Dataset(default_union=False)
# Adding into default graph
dataset.add((URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")))
# Adding into named graphs
dataset.add(
(
URIRef("urn:s1"),
URIRef("urn:p1"),
URIRef("urn:o1"),
Graph(identifier=URIRef("urn:g1")),
)
)

dataset.add(
(
URIRef("urn:s2"),
URIRef("urn:p2"),
URIRef("urn:o2"),
Graph(identifier=URIRef("urn:g2")),
)
)

dataset.add(
(
URIRef("urn:s3"),
URIRef("urn:p3"),
URIRef("urn:o3"),
Graph(identifier=URIRef("urn:g3")),
)
)


# Test implicit exlusive dataset
def test_exclusive():
results = list(dataset.query("SELECT ?s ?p ?o WHERE {?s ?p ?o}"))
assert results == [(URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0"))]


# Test explicit default graph with exclusive dataset
def test_from():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
WHERE {?s ?p ?o}
"""
results = list(dataset.query(query))
assert results == [(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))]


# Test explicit named graphs with exclusive dataset
def test_from_named():
query = """
SELECT
?g ?s ?p ?o
FROM NAMED <urn:g1>
WHERE {
graph ?g {?s ?p ?o}
}
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:g1"), URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))
]


# Test that we can use from and from named in the same query
def test_from_and_from_named():
query = """
SELECT ?g ?s ?p ?o
FROM <urn:g1>
FROM NAMED <urn:g2>
WHERE {
{?s ?p ?o}
UNION {graph ?g {?s ?p ?o}}
} ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(None, URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:g2"), URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]
150 changes: 150 additions & 0 deletions test/test_sparql/test_dataset_inclusive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from rdflib.graph import Dataset, Graph
from rdflib.term import URIRef

dataset = Dataset(default_union=True)
# Adding into default graph
dataset.add((URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")))
# Adding into named graphs
dataset.add(
(
URIRef("urn:s1"),
URIRef("urn:p1"),
URIRef("urn:o1"),
Graph(identifier=URIRef("urn:g1")),
)
)

dataset.add(
(
URIRef("urn:s2"),
URIRef("urn:p2"),
URIRef("urn:o2"),
Graph(identifier=URIRef("urn:g2")),
)
)

dataset.add(
(
URIRef("urn:s3"),
URIRef("urn:p3"),
URIRef("urn:o3"),
Graph(identifier=URIRef("urn:g3")),
)
)


# Test implicit inclusive dataset
# The query's default graph should contain a merge of all graphs:
# The service's default graph + all the service's named graphs
def test_inclusive():
query = """
SELECT ?s ?p ?o
WHERE {?s ?p ?o}
ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")),
(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
(URIRef("urn:s3"), URIRef("urn:p3"), URIRef("urn:o3")),
]


# Test explicit default graph with inclusive dataset
def test_default_from_1():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
WHERE {?s ?p ?o}
"""
results = list(dataset.query(query))
assert results == [(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))]


# test that we include more than one graph into the default graph
def test_default_from_2():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
FROM <urn:g2>
WHERE {?s ?p ?o}
ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]


# Since there is a FROM clause, we consider RDF dataset explicit
# Thus if FROM NAMED is not defined, named graph is considered empty set
def test_named_from():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
WHERE {
graph ?g {?s ?p ?o}
} ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [], "no result expected"


# Test explicit named graphs with inclusive dataset
def test_named_from_named_1():
query = """
SELECT ?g ?s ?p ?o
FROM NAMED <urn:g1>
WHERE {
graph ?g {?s ?p ?o}
}
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:g1"), URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))
]


# test that we include more than one graph into the named graphs
def test_named_from_named_2():
query = """
SELECT ?g ?s ?p ?o
FROM NAMED <urn:g1>
FROM NAMED <urn:g2>
WHERE {
graph ?g {?s ?p ?o}
} ORDER BY ?g
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:g1"), URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:g2"), URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]


# Since there is a FROM NAMED clause, we consider RDF dataset explicit
# Thus if FROM is not defined, default graph is considered empty
def test_default_from_named():
results = list(
dataset.query("SELECT ?g ?s ?p ?o FROM NAMED <urn:g1> WHERE {?s ?p ?o}")
)
assert results == [], "no result expected"


def test_from_and_from_named():
query = """
SELECT ?g ?s ?p ?o
FROM <urn:g1>
FROM NAMED <urn:g2>
WHERE {
{?s ?p ?o}
UNION {graph ?g {?s ?p ?o}}
} ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(None, URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:g2"), URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]

0 comments on commit 5876266

Please sign in to comment.