Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix explicit dataset (FROM and FROM NAMED clauses) #2794

Merged
merged 14 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 4 additions & 26 deletions rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,36 +672,14 @@ def evalQuery(
:doc:`Security Considerations </security_considerations>`
documentation.
"""
main = query.algebra

initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())

ctx = QueryContext(graph, initBindings=initBindings)
ctx = QueryContext(
graph, initBindings=initBindings, datasetClause=main.datasetClause
)

ctx.prologue = query.prologue
main = query.algebra

if main.datasetClause:
if ctx.dataset is None:
raise Exception(
"Non-conjunctive-graph doesn't know about "
+ "graphs! Try a query without FROM (NAMED)."
)

ctx = ctx.clone() # or push/pop?

firstDefault = False
for d in main.datasetClause:
if d.default:
if firstDefault:
# replace current default graph
dg = ctx.dataset.get_context(BNode())
ctx = ctx.pushGraph(dg)
firstDefault = True

ctx.load(d.default, default=True)

elif d.named:
g = d.named
ctx.load(g, default=False)

return evalPart(ctx, main)
33 changes: 26 additions & 7 deletions rdflib/plugins/sparql/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import isodate

import rdflib.plugins.sparql
from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
from rdflib.namespace import NamespaceManager
from rdflib.plugins.sparql.parserutils import CompValue
from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable
Expand Down Expand Up @@ -255,20 +255,39 @@ def __init__(
graph: Optional[Graph] = None,
bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None,
initBindings: Optional[Mapping[str, Identifier]] = None,
datasetClause=None,
):
self.initBindings = initBindings
self.bindings = Bindings(d=bindings or [])
if initBindings:
self.bindings.update(initBindings)

self.graph: Optional[Graph]
self._dataset: Optional[ConjunctiveGraph]
if isinstance(graph, ConjunctiveGraph):
self._dataset = graph
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
self.graph = self.dataset
self._dataset: Optional[Union[Dataset, ConjunctiveGraph]]
if isinstance(graph, (Dataset, ConjunctiveGraph)):
if datasetClause:
self._dataset = Dataset()
self.graph = Graph()
for d in datasetClause:
if d.default:
from_graph = graph.get_context(d.default)
self.graph += from_graph
if not from_graph:
self.load(d.default, default=True)
elif d.named:
namedGraphs = Graph(
store=self.dataset.store, identifier=d.named
)
from_named_graphs = graph.get_context(d.named)
namedGraphs += from_named_graphs
if not from_named_graphs:
self.load(d.named, default=False)
else:
self.graph = self.dataset.default_context
self._dataset = graph
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
self.graph = self.dataset
else:
self.graph = self.dataset.default_context
else:
self._dataset = None
self.graph = graph
Expand Down
84 changes: 84 additions & 0 deletions test/test_sparql/test_dataset_exclusive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from rdflib.graph import Dataset, Graph
from rdflib.term import URIRef

dataset = Dataset(default_union=False)
# Adding into default graph
dataset.add((URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")))
# Adding into named graphs
dataset.add(
(
URIRef("urn:s1"),
URIRef("urn:p1"),
URIRef("urn:o1"),
Graph(identifier=URIRef("urn:g1")),
)
)

dataset.add(
(
URIRef("urn:s2"),
URIRef("urn:p2"),
URIRef("urn:o2"),
Graph(identifier=URIRef("urn:g2")),
)
)

dataset.add(
(
URIRef("urn:s3"),
URIRef("urn:p3"),
URIRef("urn:o3"),
Graph(identifier=URIRef("urn:g3")),
)
)


# Test implicit exlusive dataset
def test_exclusive():
results = list(dataset.query("SELECT ?s ?p ?o WHERE {?s ?p ?o}"))
assert results == [(URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0"))]


# Test explicit default graph with exclusive dataset
def test_from():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
WHERE {?s ?p ?o}
"""
results = list(dataset.query(query))
assert results == [(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))]


# Test explicit named graphs with exclusive dataset
def test_from_named():
query = """
SELECT
?g ?s ?p ?o
FROM NAMED <urn:g1>
WHERE {
graph ?g {?s ?p ?o}
}
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:g1"), URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))
]


# Test that we can use from and from named in the same query
def test_from_and_from_named():
query = """
SELECT ?g ?s ?p ?o
FROM <urn:g1>
FROM NAMED <urn:g2>
WHERE {
{?s ?p ?o}
UNION {graph ?g {?s ?p ?o}}
} ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(None, URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:g2"), URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]
150 changes: 150 additions & 0 deletions test/test_sparql/test_dataset_inclusive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from rdflib.graph import Dataset, Graph
from rdflib.term import URIRef

dataset = Dataset(default_union=True)
# Adding into default graph
dataset.add((URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")))
# Adding into named graphs
dataset.add(
(
URIRef("urn:s1"),
URIRef("urn:p1"),
URIRef("urn:o1"),
Graph(identifier=URIRef("urn:g1")),
)
)

dataset.add(
(
URIRef("urn:s2"),
URIRef("urn:p2"),
URIRef("urn:o2"),
Graph(identifier=URIRef("urn:g2")),
)
)

dataset.add(
(
URIRef("urn:s3"),
URIRef("urn:p3"),
URIRef("urn:o3"),
Graph(identifier=URIRef("urn:g3")),
)
)


# Test implicit inclusive dataset
# The query's default graph should contain a merge of all graphs:
# The service's default graph + all the service's named graphs
def test_inclusive():
query = """
SELECT ?s ?p ?o
WHERE {?s ?p ?o}
ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")),
(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
(URIRef("urn:s3"), URIRef("urn:p3"), URIRef("urn:o3")),
]


# Test explicit default graph with inclusive dataset
def test_default_from_1():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
WHERE {?s ?p ?o}
"""
results = list(dataset.query(query))
assert results == [(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))]


# test that we include more than one graph into the default graph
def test_default_from_2():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
FROM <urn:g2>
WHERE {?s ?p ?o}
ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]


# Since there is a FROM clause, we consider RDF dataset explicit
# Thus if FROM NAMED is not defined, named graph is considered empty set
def test_named_from():
query = """
SELECT ?s ?p ?o
FROM <urn:g1>
WHERE {
graph ?g {?s ?p ?o}
} ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [], "no result expected"


# Test explicit named graphs with inclusive dataset
def test_named_from_named_1():
query = """
SELECT ?g ?s ?p ?o
FROM NAMED <urn:g1>
WHERE {
graph ?g {?s ?p ?o}
}
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:g1"), URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"))
]


# test that we include more than one graph into the named graphs
def test_named_from_named_2():
query = """
SELECT ?g ?s ?p ?o
FROM NAMED <urn:g1>
FROM NAMED <urn:g2>
WHERE {
graph ?g {?s ?p ?o}
} ORDER BY ?g
"""
results = list(dataset.query(query))
assert results == [
(URIRef("urn:g1"), URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:g2"), URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]


# Since there is a FROM NAMED clause, we consider RDF dataset explicit
# Thus if FROM is not defined, default graph is considered empty
def test_default_from_named():
results = list(
dataset.query("SELECT ?g ?s ?p ?o FROM NAMED <urn:g1> WHERE {?s ?p ?o}")
)
assert results == [], "no result expected"


def test_from_and_from_named():
query = """
SELECT ?g ?s ?p ?o
FROM <urn:g1>
FROM NAMED <urn:g2>
WHERE {
{?s ?p ?o}
UNION {graph ?g {?s ?p ?o}}
} ORDER BY ?s
"""
results = list(dataset.query(query))
assert results == [
(None, URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1")),
(URIRef("urn:g2"), URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2")),
]
Loading