Description
Hi @Tpt ,
First, thank you very much for the excellent oxrdflib
library. We've had multiple requests to integrate this with our kglab
project, and in some cases (e.g., with unions and axes) we see queries that have ~2 orders of magnitude better performance than with the default RDFlib.Store
implementation.
One of our use cases at BASF has identified a couple issues, and we wanted to provide a minimal code example to replicate these errors. The following script:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import itertools
import sys
import time
import traceback
import typing
from icecream import ic
import oxrdflib
import rdflib
TTL_DATA = """
@prefix ex: <https://example.com/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
ex:Foo a owl:Class ;
rdfs:label "Call me Foo"^^xsd:string ;
rdfs:comment "A foo-like substance, commonly found in dimethyloxsorbate"^^xsd:string
.
ex:Bar a owl:Class ;
rdfs:label "My name is Bar"
.
"""
QUERIES = {
"BASE": """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label ?comment
WHERE {
OPTIONAL { ?item rdfs:comment ?comment } .
OPTIONAL { ?item rdfs:label ?label } .
FILTER(?item != owl:Nothing)
}
""",
"NO_OPTIONAL" : """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label
WHERE {
?item rdfs:label ?label .
FILTER(?item != owl:Nothing)
}
""",
"NO_PREFIX" : """
SELECT ?label
WHERE {
OPTIONAL { ?item rdfs:label ?label } .
FILTER(?item != owl:Nothing)
}
"""
}
def run_query (
data: str,
plugin: typing.Optional[str],
query: str,
item: str,
bind: bool,
) -> None:
"""measure the timing and behavior for a SPARQL query"""
ic(plugin, query, item, bind)
if plugin is not None:
g = rdflib.Graph(store=plugin)
else:
g = rdflib.Graph()
g.parse(data=data, format="ttl")
sparql = QUERIES[query].strip()
bindings = {
"item": rdflib.term.URIRef("https://example.com/" + item),
}
if not bind:
for var, val in bindings.items():
bind_var = "?" + var
bind_val = "<" + str(val) + ">"
ic(bind_var, bind_val)
sparql = sparql.replace(bind_var, bind_val)
print(sparql)
# query init
init_time = time.time()
if bind:
query_iter = g.query(sparql, initBindings=bindings)
else:
query_iter = g.query(sparql)
duration = time.time() - init_time
print(f"query init: {duration:10.3f} sec")
# query exec
count = 0
init_time = time.time()
for row in query_iter:
ic(row)
print(row.asdict())
count += 1
duration = time.time() - init_time
print(f"query exec: {duration:10.3f} sec")
if count < 1:
print("MISSING RESULT")
print()
if __name__ == "__main__":
PLUGIN_LIST = [ None, "Oxigraph", ]
QUERY_LIST = [ "BASE", "NO_OPTIONAL", "NO_PREFIX", ]
ITEM_LIST = [ "Foo", "Bar", ]
BIND_LIST = [ True, False, ]
for plugin, query, item, bind in itertools.product(PLUGIN_LIST, QUERY_LIST, ITEM_LIST, BIND_LIST):
try:
run_query(
data = TTL_DATA,
plugin = plugin,
query = query,
item = item,
bind = bind,
)
except SyntaxError as ex:
traceback.print_exc()
... was run with Python 3.8.10 on macOS with oxrdflib
0.3.0 installed from the repo (not PyPi) and produces these results:
ic| plugin: None, query: 'BASE', item: 'Foo', bind: True
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label ?comment
WHERE {
OPTIONAL { ?item rdfs:comment ?comment } .
OPTIONAL { ?item rdfs:label ?label } .
FILTER(?item != owl:Nothing)
}
query init: 0.196 sec
ic| row: (rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),
rdflib.term.Literal('A foo-like substance, commonly found in dimethyloxsorbate', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')))
{'label': rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), 'comment': rdflib.term.Literal('A foo-like substance, commonly found in dimethyloxsorbate', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
query exec: 0.005 sec
ic| plugin: None, query: 'BASE', item: 'Foo', bind: False
ic| bind_var: '?item', bind_val: '<https://example.com/Foo>'
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label ?comment
WHERE {
OPTIONAL { <https://example.com/Foo> rdfs:comment ?comment } .
OPTIONAL { <https://example.com/Foo> rdfs:label ?label } .
FILTER(<https://example.com/Foo> != owl:Nothing)
}
query init: 0.008 sec
ic| row: (rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),
rdflib.term.Literal('A foo-like substance, commonly found in dimethyloxsorbate', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')))
{'label': rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), 'comment': rdflib.term.Literal('A foo-like substance, commonly found in dimethyloxsorbate', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
query exec: 0.001 sec
ic| plugin: None, query: 'BASE', item: 'Bar', bind: True
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label ?comment
WHERE {
OPTIONAL { ?item rdfs:comment ?comment } .
OPTIONAL { ?item rdfs:label ?label } .
FILTER(?item != owl:Nothing)
}
query init: 0.007 sec
ic| row: (rdflib.term.Literal('My name is Bar'), None)
{'label': rdflib.term.Literal('My name is Bar')}
query exec: 0.001 sec
ic| plugin: None, query: 'BASE', item: 'Bar', bind: False
ic| bind_var: '?item', bind_val: '<https://example.com/Bar>'
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label ?comment
WHERE {
OPTIONAL { <https://example.com/Bar> rdfs:comment ?comment } .
OPTIONAL { <https://example.com/Bar> rdfs:label ?label } .
FILTER(<https://example.com/Bar> != owl:Nothing)
}
query init: 0.009 sec
ic| row: (rdflib.term.Literal('My name is Bar'), None)
{'label': rdflib.term.Literal('My name is Bar')}
query exec: 0.001 sec
ic| plugin: None, query: 'NO_OPTIONAL', item: 'Foo', bind: True
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label
WHERE {
?item rdfs:label ?label .
FILTER(?item != owl:Nothing)
}
query init: 0.006 sec
ic| row: (rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
{'label': rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
query exec: 0.001 sec
ic| plugin: None, query: 'NO_OPTIONAL', item: 'Foo', bind: False
ic| bind_var: '?item', bind_val: '<https://example.com/Foo>'
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?label
WHERE {
<https://example.com/Foo> rdfs:label ?label .
FILTER(<https://example.com/Foo> != owl:Nothing)
}
query init: 0.005 sec
ic| row: (rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
{'label': rdflib.term.Literal('Call me Foo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
query exec: 0.001 sec
Based on the TTL input and SPARQL queries used, each query should have one result row. The issues appear to be:
- SPARQL queries must have a PREFIX defined for each of the namespaces references in the query, or
oxrdflib
will throw aSyntaxError
exception - the
OPTIONAL
clause does not appear to work correctly; see the case that printsMISSING RESULT
where the?comment
variable was within anOPTIONAL
clause - the
initBindings
parameter does not appear to work correctly; see the sameMISSING RESULT
case which works correctly versus when we do an explicit string replace in the query string (which produces correct results)
Please let us know if we can help troubleshoot any further?
cc: @paoespinozarias @neobernad @jelisf @Mec-iS @davidshumway