From 69fad45f0d4875e29c82f7fcbd19a701a3a82137 Mon Sep 17 00:00:00 2001 From: Nikola Vasiljevic <35523348+nikokaoja@users.noreply.github.com> Date: Sun, 28 Jul 2024 22:00:47 +0200 Subject: [PATCH] Exposes Oxigraph parsers (#45) --- .gitignore | 124 +++++++++++++++++- CHANGELOG.md | 9 ++ oxrdflib/__init__.py | 283 ++--------------------------------------- oxrdflib/_converter.py | 138 ++++++++++++++++++++ oxrdflib/_type.py | 8 ++ oxrdflib/parser.py | 131 +++++++++++++++++++ oxrdflib/store.py | 216 +++++++++++++++++++++++++++++++ pyproject.toml | 26 ++-- tests/data/test.nt | 6 + tests/data/test.rdf | 16 +++ tests/data/test.ttl | 10 ++ tests/test_parser.py | 132 +++++++++++++++++++ 12 files changed, 815 insertions(+), 284 deletions(-) create mode 100644 oxrdflib/_converter.py create mode 100644 oxrdflib/_type.py create mode 100644 oxrdflib/parser.py create mode 100644 oxrdflib/store.py create mode 100644 tests/data/test.nt create mode 100644 tests/data/test.rdf create mode 100644 tests/data/test.ttl create mode 100644 tests/test_parser.py diff --git a/.gitignore b/.gitignore index f650bce..47876aa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,124 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +.playground/ +config.yaml +real_data/ +data/ +dev-data/ +dev-data-*/ +tests/data/ + +# C extensions +*.so + +# Distribution / packaging +.Python +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +!tests/data +# Translations +*.mo +*.pot + + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + + + +# Environments venv -*.egg-info +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + + +# mypy +.mypy_cache/ +data/.DS_Store +.vscode/settings.json +.DS_Store +.vscode/ +.DS_Store +default.profraw diff --git a/CHANGELOG.md b/CHANGELOG.md index 956da17..1e99cb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## unreleased + +### Added +- Oxigraph native parsers for N-Triples, Turtle and RDF/XML. + +### Improved +- Restructured a code based to account for further additions to the codebase. + + ## [0.3.7] - 2024-03-30 ### Changed diff --git a/oxrdflib/__init__.py b/oxrdflib/__init__.py index 84e2fc8..0f1d987 100644 --- a/oxrdflib/__init__.py +++ b/oxrdflib/__init__.py @@ -1,274 +1,9 @@ -import shutil -from typing import Any, Dict, Generator, Iterable, Iterator, Mapping, Optional, Tuple, Union - -import pyoxigraph as ox -from rdflib import Graph -from rdflib.graph import DATASET_DEFAULT_GRAPH_ID -from rdflib.plugins.sparql.sparql import Query, Update -from rdflib.query import Result -from rdflib.store import VALID_STORE, Store -from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable - -__all__ = ["OxigraphStore"] - -_Triple = Tuple[Node, Node, Node] -_Quad = Tuple[Node, Node, Node, Graph] -_TriplePattern = Tuple[Optional[Node], Optional[Node], Optional[Node]] - - -class OxigraphStore(Store): - context_aware: bool = True - formula_aware: bool = False - transaction_aware: bool = False - graph_aware: bool = True - - def __init__( - self, - configuration: Optional[str] = None, - identifier: Optional[Identifier] = None, - *, - store: Optional[ox.Store] = None, - ): - self._store = store - self._prefix_for_namespace: Dict[URIRef, str] = {} - self._namespace_for_prefix: Dict[str, URIRef] = {} - super().__init__(configuration, identifier) - - def open(self, configuration: str, create: bool = False) -> Optional[int]: # noqa: ARG002 - if self._store is not None: - raise ValueError("The open function should be called before any RDF operation") - self._store = ox.Store(configuration) - return VALID_STORE - - def close(self, commit_pending_transaction: bool = False) -> None: # noqa: ARG002 - del self._store - - def destroy(self, configuration: str) -> None: - shutil.rmtree(configuration) - - def gc(self) -> None: - pass - - @property - def _inner(self) -> ox.Store: - if self._store is None: - self._store = ox.Store() - return self._store - - def add( - self, - triple: _Triple, - context: Graph, - quoted: bool = False, - ) -> None: - if quoted: - raise ValueError("Oxigraph stores are not formula aware") - self._inner.add(_to_ox(triple, context)) - super().add(triple, context, quoted) - - def addN(self, quads: Iterable[_Quad]) -> None: # noqa: N802 - self._inner.extend([_to_ox(q) for q in quads]) - for quad in quads: - (s, p, o, g) = quad - super().add((s, p, o), g) - - def remove( - self, - triple: _TriplePattern, - context: Optional[Graph] = None, - ) -> None: - for q in self._inner.quads_for_pattern(*_to_ox_quad_pattern(triple, context)): - self._inner.remove(q) - super().remove(triple, context) - - def triples( - self, - triple_pattern: _TriplePattern, - context: Optional[Graph] = None, - ) -> Iterator[Tuple[_Triple, Iterator[Optional[Graph]]]]: - try: - return ( - ( - (_from_ox(q.subject), _from_ox(q.predicate), _from_ox(q.object)), - iter(((_from_ox_graph_name(q.graph_name, self) if q.graph_name != ox.DefaultGraph() else None),)), - ) - for q in self._inner.quads_for_pattern(*_to_ox_quad_pattern(triple_pattern, context)) - ) - except (TypeError, ValueError): - return iter(()) # We just don't return anything - - def __len__(self, context: Optional[Graph] = None) -> int: - if context is None: - # TODO: very bad - return len({q.triple for q in self._inner}) - return sum(1 for _ in self._inner.quads_for_pattern(None, None, None, _to_ox(context))) - - def contexts(self, triple: Optional[_Triple] = None) -> Generator[Graph, None, None]: - if triple is None: - return (_from_ox_graph_name(g, self) for g in self._inner.named_graphs()) - return ( - _from_ox_graph_name(q.graph_name, self) for q in self._inner.quads_for_pattern(*_to_ox_quad_pattern(triple)) - ) - - def query( - self, - query: Union[Query, str], - initNs: Mapping[str, Any], # noqa: N803 - initBindings: Mapping[str, Identifier], # noqa: N803 - queryGraph: str, # noqa: N803 - **kwargs: Any, - ) -> "Result": - if isinstance(query, Query) or kwargs: - raise NotImplementedError - init_ns = dict(self._namespace_for_prefix, **initNs) - query = "".join(f"PREFIX {prefix}: <{namespace}>\n" for prefix, namespace in init_ns.items()) + query - if initBindings: - query += "\nVALUES ( {} ) {{ ({}) }}".format( - " ".join(f"?{k}" for k in initBindings), " ".join(v.n3() for v in initBindings.values()) - ) - result = self._inner.query( - query, - use_default_graph_as_union=queryGraph == "__UNION__", - default_graph=_to_ox(queryGraph) if isinstance(queryGraph, Node) else None, - ) - if isinstance(result, bool): - out = Result("ASK") - out.askAnswer = result - elif isinstance(result, ox.QuerySolutions): - out = Result("SELECT") - out.vars = [Variable(v.value) for v in result.variables] - out.bindings = ({v: _from_ox(val) for v, val in zip(out.vars, solution)} for solution in result) - elif isinstance(result, ox.QueryTriples): - out = Result("CONSTRUCT") - out.graph = Graph() - out.graph += (_from_ox(t) for t in result) - else: - raise ValueError(f"Unexpected query result: {result}") - return out - - def update( - self, - update: Union[Update, str], - initNs: Mapping[str, Any], # noqa: N803 - initBindings: Mapping[str, Identifier], # noqa: N803 - queryGraph: str, # noqa: N803 - **kwargs: Any, - ) -> None: - raise NotImplementedError - - def commit(self) -> None: - # TODO: implement - pass - - def rollback(self) -> None: - # TODO: implement - pass - - def add_graph(self, graph: Graph) -> None: - self._inner.add_graph(_to_ox(graph)) - - def remove_graph(self, graph: Graph) -> None: - self._inner.remove_graph(_to_ox(graph)) - - def bind(self, prefix: str, namespace: URIRef, override: bool = True) -> None: - if not override and (prefix in self._namespace_for_prefix or namespace in self._prefix_for_namespace): - return # nothing to do - self._delete_from_prefix(prefix) - self._delete_from_namespace(namespace) - self._namespace_for_prefix[prefix] = namespace - self._prefix_for_namespace[namespace] = prefix - - def _delete_from_prefix(self, prefix): - if prefix not in self._namespace_for_prefix: - return - namespace = self._namespace_for_prefix[prefix] - del self._namespace_for_prefix[prefix] - self._delete_from_namespace(namespace) - - def _delete_from_namespace(self, namespace): - if namespace not in self._prefix_for_namespace: - return - prefix = self._prefix_for_namespace[namespace] - del self._prefix_for_namespace[namespace] - self._delete_from_prefix(prefix) - - def prefix(self, namespace: URIRef) -> Optional[str]: - return self._prefix_for_namespace.get(namespace) - - def namespace(self, prefix: str) -> Optional[URIRef]: - return self._namespace_for_prefix.get(prefix) - - def namespaces(self) -> Iterator[Tuple[str, URIRef]]: - yield from self._namespace_for_prefix.items() - - -def _to_ox( - term: Optional[Union[Node, _Triple, _Quad, Graph]], context: Optional[Graph] = None -) -> Optional[Union[ox.NamedNode, ox.BlankNode, ox.Literal, ox.DefaultGraph, ox.Quad]]: - if term is None: - return None - if term == DATASET_DEFAULT_GRAPH_ID: - return ox.DefaultGraph() - if isinstance(term, URIRef): - return ox.NamedNode(term) - if isinstance(term, BNode): - return ox.BlankNode(term) - if isinstance(term, Literal): - return ox.Literal(term, language=term.language, datatype=ox.NamedNode(term.datatype) if term.datatype else None) - if isinstance(term, Graph): - return _to_ox(term.identifier) - if isinstance(term, tuple): - if len(term) == 3: - return ox.Quad(_to_ox(term[0]), _to_ox(term[1]), _to_ox(term[2]), _to_ox(context)) - if len(term) == 4: - return ox.Quad(_to_ox(term[0]), _to_ox(term[1]), _to_ox(term[2]), _to_ox(term[3])) - raise ValueError(f"Unexpected rdflib term: {term!r}") - - -def _to_ox_quad_pattern(triple: _TriplePattern, context: Optional[Graph] = None): - (s, p, o) = triple - return _to_ox_term_pattern(s), _to_ox_term_pattern(p), _to_ox_term_pattern(o), _to_ox_term_pattern(context) - - -def _to_ox_term_pattern( - term: Optional[Union[URIRef, BNode, Literal, Graph]], -) -> Optional[Union[ox.NamedNode, ox.BlankNode, ox.Literal]]: - if term is None: - return None - if isinstance(term, URIRef): - return ox.NamedNode(term) - if isinstance(term, BNode): - return ox.BlankNode(term) - if isinstance(term, Literal): - return ox.Literal(term, language=term.language, datatype=ox.NamedNode(term.datatype) if term.datatype else None) - if isinstance(term, Graph): - return _to_ox(term.identifier) - raise ValueError(f"Unexpected rdflib term: {term!r}") - - -def _from_ox_graph_name(graph_name: Union[ox.NamedNode, ox.BlankNode, ox.DefaultGraph], store: OxigraphStore) -> Graph: - if isinstance(graph_name, ox.NamedNode): - return Graph(identifier=URIRef(graph_name.value), store=store) - if isinstance(graph_name, ox.BlankNode): - return Graph(identifier=BNode(graph_name.value), store=store) - if isinstance(graph_name, ox.DefaultGraph): - return Graph(identifier=DATASET_DEFAULT_GRAPH_ID, store=store) - raise ValueError(f"Unexpected Oxigraph graph name: {graph_name!r}") - - -def _from_ox( - term: Optional[Union[ox.NamedNode, ox.BlankNode, ox.Literal, ox.Triple]], -) -> Optional[Union[Node, Tuple[Node, Node, Node]]]: - if term is None: - return None - if isinstance(term, ox.NamedNode): - return URIRef(term.value) - if isinstance(term, ox.BlankNode): - return BNode(term.value) - if isinstance(term, ox.Literal): - if term.language: - return Literal(term.value, lang=term.language) - return Literal(term.value, datatype=URIRef(term.datatype.value)) - if isinstance(term, ox.Triple): - return _from_ox(term.subject), _from_ox(term.predicate), _from_ox(term.object) - raise ValueError(f"Unexpected Oxigraph term: {term!r}") +from .parser import OxigraphNTriplesParser, OxigraphRdfXmlParser, OxigraphTurtleParser +from .store import OxigraphStore + +__all__ = [ + "OxigraphStore", + "OxigraphTurtleParser", + "OxigraphNTriplesParser", + "OxigraphRdfXmlParser", +] diff --git a/oxrdflib/_converter.py b/oxrdflib/_converter.py new file mode 100644 index 0000000..bc957c2 --- /dev/null +++ b/oxrdflib/_converter.py @@ -0,0 +1,138 @@ +from typing import TYPE_CHECKING, Optional, Tuple, Union + +import pyoxigraph as ox +from rdflib import Graph +from rdflib.graph import DATASET_DEFAULT_GRAPH_ID +from rdflib.term import BNode, Literal, Node, URIRef + +from oxrdflib._type import _Quad, _Triple, _TriplePattern + +if TYPE_CHECKING: + from oxrdflib.store import OxigraphStore + + +def to_ox( + term: Optional[Union[Node, _Triple, _Quad, Graph]], context: Optional[Graph] = None +) -> Optional[Union[ox.NamedNode, ox.BlankNode, ox.Literal, ox.DefaultGraph, ox.Quad]]: + """Convert an rdflib term to an Oxigraph term.""" + if term is None: + return None + if term == DATASET_DEFAULT_GRAPH_ID: + return ox.DefaultGraph() + if isinstance(term, URIRef): + return ox.NamedNode(term) + if isinstance(term, BNode): + return ox.BlankNode(term) + if isinstance(term, Literal): + return ox.Literal( + term, + language=term.language, + datatype=ox.NamedNode(term.datatype) if term.datatype else None, + ) + if isinstance(term, Graph): + return to_ox(term.identifier) + if isinstance(term, tuple): + if len(term) == 3: + return ox.Quad( + to_ox(term[0]), + to_ox(term[1]), + to_ox(term[2]), + to_ox(context), + ) + if len(term) == 4: + return ox.Quad( + to_ox(term[0]), + to_ox(term[1]), + to_ox(term[2]), + to_ox(term[3]), + ) + raise ValueError(f"Unexpected rdflib term: {term!r}") + + +def to_ox_quad_pattern(triple: _TriplePattern, context: Optional[Graph] = None): + """Convert an rdflib quad pattern to an Oxigraph quad pattern.""" + + (s, p, o) = triple + return ( + to_ox_term_pattern(s), + to_ox_term_pattern(p), + to_ox_term_pattern(o), + to_ox_term_pattern(context), + ) + + +def to_ox_term_pattern( + term: Optional[Union[URIRef, BNode, Literal, Graph]], +) -> Optional[Union[ox.NamedNode, ox.BlankNode, ox.Literal]]: + if term is None: + return None + if isinstance(term, URIRef): + return ox.NamedNode(term) + if isinstance(term, BNode): + return ox.BlankNode(term) + if isinstance(term, Literal): + return ox.Literal( + term, + language=term.language, + datatype=ox.NamedNode(term.datatype) if term.datatype else None, + ) + if isinstance(term, Graph): + return to_ox(term.identifier) + raise ValueError(f"Unexpected rdflib term: {term!r}") + + +def from_ox_graph_name( + graph_name: Union[ox.NamedNode, ox.BlankNode, ox.DefaultGraph], + store: "OxigraphStore", +) -> Graph: + if isinstance(graph_name, ox.NamedNode): + return Graph(identifier=URIRef(graph_name.value), store=store) + if isinstance(graph_name, ox.BlankNode): + return Graph(identifier=BNode(graph_name.value), store=store) + if isinstance(graph_name, ox.DefaultGraph): + return Graph(identifier=DATASET_DEFAULT_GRAPH_ID, store=store) + raise ValueError(f"Unexpected Oxigraph graph name: {graph_name!r}") + + +def from_ox( + term: Optional[Union[ox.NamedNode, ox.BlankNode, ox.Literal, ox.Triple]], +) -> Optional[Union[Node, Tuple[Node, Node, Node]]]: + if term is None: + return None + if isinstance(term, ox.NamedNode): + return URIRef(term.value) + if isinstance(term, ox.BlankNode): + return BNode(term.value) + if isinstance(term, ox.Literal): + if term.language: + return Literal(term.value, lang=term.language) + return Literal(term.value, datatype=URIRef(term.datatype.value)) + if isinstance(term, ox.Triple): + return from_ox(term.subject), from_ox(term.predicate), from_ox(term.object) + raise ValueError(f"Unexpected Oxigraph term: {term!r}") + + +def rdflib_to_mime_type(rdflib_type: str) -> str: + """Convert an rdflib type to a MIME type.""" + if rdflib_type in ("ttl", "turtle"): + return "text/turtle" + if rdflib_type in ("nt", "ntriples"): + return "application/n-triples" + if rdflib_type == "xml": + return "application/rdf+xml" + if rdflib_type == "trig": + return "application/trig" + if rdflib_type == "trix": + return "application/trix" + raise ValueError(f"Unsupported rdflib type: {rdflib_type}") + + +def ox_to_rdflib_type(ox_format: str) -> str: + """Convert an Oxigraph format to a rdflib parser format.""" + if ox_format in ("ox-turtle", "ox-ttl"): + return "turtle" + if ox_format in ("ox-nt", "ox-ntriples"): + return "nt" + if ox_format == "ox-xml": + return "xml" + raise ValueError(f"Unsupported Oxigraph type: {ox_format}") diff --git a/oxrdflib/_type.py b/oxrdflib/_type.py new file mode 100644 index 0000000..0115794 --- /dev/null +++ b/oxrdflib/_type.py @@ -0,0 +1,8 @@ +from typing import Optional, Tuple + +from rdflib import Graph +from rdflib.term import Node + +_Triple = Tuple[Node, Node, Node] +_Quad = Tuple[Node, Node, Node, Graph] +_TriplePattern = Tuple[Optional[Node], Optional[Node], Optional[Node]] diff --git a/oxrdflib/parser.py b/oxrdflib/parser.py new file mode 100644 index 0000000..d839da9 --- /dev/null +++ b/oxrdflib/parser.py @@ -0,0 +1,131 @@ +import warnings +from typing import Any, Optional + +from rdflib import ConjunctiveGraph, Graph +from rdflib.exceptions import ParserError +from rdflib.parser import ( + FileInputSource, + InputSource, + Parser, + URLInputSource, + create_input_source, +) + +from oxrdflib._converter import ox_to_rdflib_type, rdflib_to_mime_type, to_ox +from oxrdflib.store import OxigraphStore + +__all__ = [ + "OxigraphTurtleParser", + "OxigraphNTriplesParser", + "OxigraphRdfXmlParser", +] + + +class OxigraphParser(Parser): + def __init__(self): + pass + + def parse( + self, + source: InputSource, + sink: Graph, + format: str, + encoding: Optional[str] = "utf-8", + **kwargs: Any, + ) -> None: + if encoding not in (None, "utf-8"): + raise ParserError("N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding) + + if not isinstance(sink.store, OxigraphStore): + warnings.warn( + "Graph store should be an instance of OxigraphStore, " + f"got {type(sink.store).__name__} store instead." + " Attempting to parse using rdflib native parser.", + stacklevel=2, + ) + sink.parse(source, format=ox_to_rdflib_type(format)) + + else: + base_iri = sink.absolutize(source.getPublicId() or source.getSystemId() or "") + + if isinstance(source, FileInputSource): + input = source.file + elif isinstance(source, URLInputSource): + input = create_input_source(source.url, format=ox_to_rdflib_type(format)).getByteStream() + else: + input = source.getByteStream() + + if kwargs.get("transactional", True): + sink.store._inner.load( + input, + rdflib_to_mime_type(ox_to_rdflib_type(format)), + base_iri=base_iri, + to_graph=to_ox(sink.identifier), + ) + else: + sink.store._inner.bulk_load( + input, + rdflib_to_mime_type(ox_to_rdflib_type(format)), + base_iri=base_iri, + to_graph=to_ox(sink.identifier), + ) + + +class OxigraphTurtleParser(OxigraphParser): + def parse( + self, + source: InputSource, + sink: Graph, + format: str = "ox-turtle", + encoding: Optional[str] = "utf-8", + **kwargs: Any, + ) -> None: + super().parse(source, sink, format, encoding, **kwargs) + + +class OxigraphNTriplesParser(OxigraphParser): + def parse( + self, + source: InputSource, + sink: Graph, + format: str = "ox-nt", + encoding: Optional[str] = None, + **kwargs: Any, + ) -> None: + super().parse(source, sink, format, encoding, **kwargs) + + +class OxigraphRdfXmlParser(OxigraphParser): + def parse( + self, + source: FileInputSource, + sink: Graph, + format: str = "ox-xml", + encoding: Optional[str] = None, + **kwargs: Any, + ) -> None: + super().parse(source, sink, format, encoding, **kwargs) + + +class OxigraphNQuadsParser(OxigraphParser): + def parse( + self, + source: InputSource, + sink: ConjunctiveGraph, + format: str, + encoding: Optional[str] = None, + **kwargs: Any, + ) -> None: + raise NotImplementedError("N-Quads is not supported yet") + + +class OxigraphTriGParser(OxigraphParser): + def parse( + self, + source: InputSource, + sink: Graph, + format: str, + encoding: Optional[str] = "utf-8", + **kwargs: Any, + ) -> None: + raise NotImplementedError("TriG parser is not supported yet") diff --git a/oxrdflib/store.py b/oxrdflib/store.py new file mode 100644 index 0000000..8edbe56 --- /dev/null +++ b/oxrdflib/store.py @@ -0,0 +1,216 @@ +import shutil +from typing import ( + Any, + Dict, + Generator, + Iterable, + Iterator, + Mapping, + Optional, + Tuple, + Union, +) + +import pyoxigraph as ox +from rdflib import Graph +from rdflib.plugins.sparql.sparql import Query, Update +from rdflib.query import Result +from rdflib.store import VALID_STORE, Store +from rdflib.term import Identifier, Node, URIRef, Variable + +from ._converter import ( + from_ox, + from_ox_graph_name, + to_ox, + to_ox_quad_pattern, +) +from ._type import _Quad, _Triple, _TriplePattern + +__all__ = ["OxigraphStore"] + + +class OxigraphStore(Store): + context_aware: bool = True + formula_aware: bool = False + transaction_aware: bool = False + graph_aware: bool = True + + def __init__( + self, + configuration: Optional[str] = None, + identifier: Optional[Identifier] = None, + *, + store: Optional[ox.Store] = None, + ): + self._store = store + self._prefix_for_namespace: Dict[URIRef, str] = {} + self._namespace_for_prefix: Dict[str, URIRef] = {} + super().__init__(configuration, identifier) + + def open(self, configuration: str, create: bool = False) -> Optional[int]: # noqa: ARG002 + if self._store is not None: + raise ValueError("The open function should be called before any RDF operation") + self._store = ox.Store(configuration) + return VALID_STORE + + def close(self, commit_pending_transaction: bool = False) -> None: # noqa: ARG002 + del self._store + + def destroy(self, configuration: str) -> None: + shutil.rmtree(configuration) + + def gc(self) -> None: + pass + + @property + def _inner(self) -> ox.Store: + if self._store is None: + self._store = ox.Store() + return self._store + + def add( + self, + triple: _Triple, + context: Graph, + quoted: bool = False, + ) -> None: + if quoted: + raise ValueError("Oxigraph stores are not formula aware") + self._inner.add(to_ox(triple, context)) + super().add(triple, context, quoted) + + def addN(self, quads: Iterable[_Quad]) -> None: # noqa: N802 + self._inner.extend([to_ox(q) for q in quads]) + for quad in quads: + (s, p, o, g) = quad + super().add((s, p, o), g) + + def remove( + self, + triple: _TriplePattern, + context: Optional[Graph] = None, + ) -> None: + for q in self._inner.quads_for_pattern(*to_ox_quad_pattern(triple, context)): + self._inner.remove(q) + super().remove(triple, context) + + def triples( + self, + triple_pattern: _TriplePattern, + context: Optional[Graph] = None, + ) -> Iterator[Tuple[_Triple, Iterator[Optional[Graph]]]]: + try: + return ( + ( + (from_ox(q.subject), from_ox(q.predicate), from_ox(q.object)), + iter(((from_ox_graph_name(q.graph_name, self) if q.graph_name != ox.DefaultGraph() else None),)), + ) + for q in self._inner.quads_for_pattern(*to_ox_quad_pattern(triple_pattern, context)) + ) + except (TypeError, ValueError): + return iter(()) # We just don't return anything + + def __len__(self, context: Optional[Graph] = None) -> int: + if context is None: + # TODO: very bad + return len({q.triple for q in self._inner}) + return sum(1 for _ in self._inner.quads_for_pattern(None, None, None, to_ox(context))) + + def contexts(self, triple: Optional[_Triple] = None) -> Generator[Graph, None, None]: + if triple is None: + return (from_ox_graph_name(g, self) for g in self._inner.named_graphs()) + return ( + from_ox_graph_name(q.graph_name, self) for q in self._inner.quads_for_pattern(*to_ox_quad_pattern(triple)) + ) + + def query( + self, + query: Union[Query, str], + initNs: Mapping[str, Any], # noqa: N803 + initBindings: Mapping[str, Identifier], # noqa: N803 + queryGraph: str, # noqa: N803 + **kwargs: Any, + ) -> "Result": + if isinstance(query, Query) or kwargs: + raise NotImplementedError + init_ns = dict(self._namespace_for_prefix, **initNs) + query = "".join(f"PREFIX {prefix}: <{namespace}>\n" for prefix, namespace in init_ns.items()) + query + if initBindings: + query += "\nVALUES ( {} ) {{ ({}) }}".format( + " ".join(f"?{k}" for k in initBindings), + " ".join(v.n3() for v in initBindings.values()), + ) + result = self._inner.query( + query, + use_default_graph_as_union=queryGraph == "__UNION__", + default_graph=(to_ox(queryGraph) if isinstance(queryGraph, Node) else None), + ) + if isinstance(result, bool): + out = Result("ASK") + out.askAnswer = result + elif isinstance(result, ox.QuerySolutions): + out = Result("SELECT") + out.vars = [Variable(v.value) for v in result.variables] + out.bindings = ({v: from_ox(val) for v, val in zip(out.vars, solution)} for solution in result) + elif isinstance(result, ox.QueryTriples): + out = Result("CONSTRUCT") + out.graph = Graph() + out.graph += (from_ox(t) for t in result) + else: + raise ValueError(f"Unexpected query result: {result}") + return out + + def update( + self, + update: Union[Update, str], + initNs: Mapping[str, Any], # noqa: N803 + initBindings: Mapping[str, Identifier], # noqa: N803 + queryGraph: str, # noqa: N803 + **kwargs: Any, + ) -> None: + raise NotImplementedError + + def commit(self) -> None: + # TODO: implement + pass + + def rollback(self) -> None: + # TODO: implement + pass + + def add_graph(self, graph: Graph) -> None: + self._inner.add_graph(to_ox(graph)) + + def remove_graph(self, graph: Graph) -> None: + self._inner.remove_graph(to_ox(graph)) + + def bind(self, prefix: str, namespace: URIRef, override: bool = True) -> None: + if not override and (prefix in self._namespace_for_prefix or namespace in self._prefix_for_namespace): + return # nothing to do + self._delete_from_prefix(prefix) + self._delete_from_namespace(namespace) + self._namespace_for_prefix[prefix] = namespace + self._prefix_for_namespace[namespace] = prefix + + def _delete_from_prefix(self, prefix): + if prefix not in self._namespace_for_prefix: + return + namespace = self._namespace_for_prefix[prefix] + del self._namespace_for_prefix[prefix] + self._delete_from_namespace(namespace) + + def _delete_from_namespace(self, namespace): + if namespace not in self._prefix_for_namespace: + return + prefix = self._prefix_for_namespace[namespace] + del self._prefix_for_namespace[namespace] + self._delete_from_prefix(prefix) + + def prefix(self, namespace: URIRef) -> Optional[str]: + return self._prefix_for_namespace.get(namespace) + + def namespace(self, prefix: str) -> Optional[URIRef]: + return self._namespace_for_prefix.get(prefix) + + def namespaces(self) -> Iterator[Tuple[str, URIRef]]: + yield from self._namespace_for_prefix.items() diff --git a/pyproject.toml b/pyproject.toml index dbc6f96..96eb452 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ build-backend = "setuptools.build_meta" [project] authors = [ { name = "Tpt", email = "thomas@pellissier-tanon.fr" }, + { name = "Nikola", email = "me@thisisnikola.com" }, ] description = "rdflib stores based on pyoxigraph" classifiers = [ @@ -18,22 +19,29 @@ classifiers = [ "Programming Language :: Python :: 3.11", "License :: OSI Approved :: BSD License", "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Database :: Database Engines/Servers" -] -dependencies = [ - "pyoxigraph~=0.3.14", - "rdflib>=6.3,<8.0" + "Topic :: Database :: Database Engines/Servers", ] +dependencies = ["pyoxigraph~=0.3.14", "rdflib>=6.3,<8.0"] dynamic = ["version"] license = { text = "BSD-3-Clause" } name = "oxrdflib" readme = "README.md" requires-python = ">=3.7" + [project.entry-points."rdf.plugins.store"] -Oxigraph = "oxrdflib:OxigraphStore" -OxMemory = "oxrdflib:OxigraphStore" -OxSled = "oxrdflib:OxigraphStore" +Oxigraph = "oxrdflib.store:OxigraphStore" +OxMemory = "oxrdflib.store:OxigraphStore" +OxSled = "oxrdflib.store:OxigraphStore" + + +[project.entry-points."rdf.plugins.parser"] +ox-turtle = "oxrdflib.parser:OxigraphTurtleParser" +ox-ttl = "oxrdflib.parser:OxigraphTurtleParser" +ox-ntriples = "oxrdflib.parser:OxigraphNTriplesParser" +ox-nt = "oxrdflib.parser:OxigraphNTriplesParser" +ox-xml = "oxrdflib.parser:OxigraphRdfXmlParser" + [project.urls] Changelog = "https://github.com/oxigraph/oxrdflib/blob/main/CHANGELOG.md" @@ -68,7 +76,7 @@ select = [ "TID", "UP", "W", - "YTT" + "YTT", ] [tool.setuptools_scm] diff --git a/tests/data/test.nt b/tests/data/test.nt new file mode 100644 index 0000000..0f097e6 --- /dev/null +++ b/tests/data/test.nt @@ -0,0 +1,6 @@ + . + "Example Document" . + . + . + "John Doe" . + . diff --git a/tests/data/test.rdf b/tests/data/test.rdf new file mode 100644 index 0000000..5f78ef6 --- /dev/null +++ b/tests/data/test.rdf @@ -0,0 +1,16 @@ + + + + + Example Document + + + John Doe + + + + + + + diff --git a/tests/data/test.ttl b/tests/data/test.ttl new file mode 100644 index 0000000..edc53ab --- /dev/null +++ b/tests/data/test.ttl @@ -0,0 +1,10 @@ +@prefix ex: . +@prefix foaf: . + +ex:document a foaf:Document ; + foaf:title "Example Document" ; + foaf:creator ex:creator . + +ex:creator a foaf:Person ; + foaf:name "John Doe" ; + foaf:mbox . diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..e7c18c6 --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,132 @@ +import unittest +import warnings +from pathlib import Path + +import rdflib + +_TEST_DIR = Path(__file__).resolve().parent + + +class TestGraphParsing(unittest.TestCase): + def test_parsing_ox_turtle_bulk_load(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse(_TEST_DIR / "data/test.ttl", format="ox-turtle", transactional=False) + self.assertEqual(len(graph), 6) + + def test_parsing_ox_turtle_load(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse(_TEST_DIR / "data/test.ttl", format="ox-turtle", transactional=True) + + self.assertEqual(len(graph), 6) + + def test_parsing_ox_turtle_fallback(self): + graph = rdflib.Graph() + with warnings.catch_warnings(record=True) as warning: + graph.parse(_TEST_DIR / "data/test.ttl", format="ox-turtle", transactional=False) + + self.assertEqual( + warning[0].message.args[0], + ( + "Graph store should be an instance of OxigraphStore, got Memory" + " store instead. Attempting to parse using rdflib native parser." + ), + ) + self.assertEqual(len(graph), 6) + + def test_parsing_ox_url_turtle(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse( + "https://i-adopt.github.io/ontology/ontology.ttl", + format="ox-turtle", + transactional=True, + ) + self.assertIsNotNone(graph) + + def test_parsing_ox_ntriples_bulk_load(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse(_TEST_DIR / "data/test.nt", format="ox-ntriples", transactional=False) + self.assertEqual(len(graph), 6) + + def test_parsing_ox_ntriples_load(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse(_TEST_DIR / "data/test.nt", format="ox-ntriples", transactional=True) + + self.assertEqual(len(graph), 6) + + def test_parsing_ox_ntriples_fallback(self): + graph = rdflib.Graph() + with warnings.catch_warnings(record=True) as warning: + graph.parse(_TEST_DIR / "data/test.nt", format="ox-ntriples", transactional=False) + + self.assertEqual( + warning[0].message.args[0], + ( + "Graph store should be an instance of OxigraphStore, got Memory" + " store instead. Attempting to parse using rdflib native parser." + ), + ) + self.assertEqual(len(graph), 6) + + def test_parsing_ox_url_ntriples(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse( + "https://i-adopt.github.io/ontology/ontology.nt", + format="ox-ntriples", + transactional=True, + ) + self.assertIsNotNone(graph) + + def test_parsing_ox_rdfxml_bulk_load(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse( + _TEST_DIR / "data/test.rdf", + publicID="http://example.com/", + format="ox-xml", + transactional=False, + ) + + self.assertEqual(len(graph), 6) + self.assertTrue(next(iter(graph))[0].startswith("http://example.com/")) + + def test_parsing_ox_rdfxml_load(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse( + _TEST_DIR / "data/test.rdf", + publicID="http://example.com/", + format="ox-xml", + transactional=True, + ) + self.assertEqual(len(graph), 6) + self.assertTrue(next(iter(graph))[0].startswith("http://example.com/")) + + def test_parsing_ox_url_rdfxml_load(self): + graph = rdflib.Graph(store="Oxigraph") + graph.parse( + "https://i-adopt.github.io/ontology/ontology.xml", + format="ox-xml", + transactional=True, + ) + self.assertIsNotNone(graph) + + def test_parsing_ox_rdfxml_fallback(self): + graph = rdflib.Graph() + with warnings.catch_warnings(record=True) as warning: + graph.parse( + _TEST_DIR / "data/test.rdf", + publicID="http://example.com/", + format="ox-xml", + transactional=False, + ) + + self.assertEqual( + warning[0].message.args[0], + ( + "Graph store should be an instance of OxigraphStore, got Memory" + " store instead. Attempting to parse using rdflib native parser." + ), + ) + self.assertEqual(len(graph), 6) + + +if __name__ == "__main__": + unittest.main()