Skip to content

Commit

Permalink
Parsers: support quad formats and other stores
Browse files Browse the repository at this point in the history
  • Loading branch information
Tpt committed Oct 27, 2024
1 parent 82f87fb commit 9baa63d
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 247 deletions.
18 changes: 0 additions & 18 deletions oxrdflib/_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,21 +108,3 @@ def from_ox(
if isinstance(term, ox.Triple):
return from_ox(term.subject), from_ox(term.predicate), from_ox(term.object)
raise ValueError(f"Unexpected Oxigraph term: {term!r}")


def guess_rdf_format(rdflib_type: str) -> ox.RdfFormat:
"""Convert an rdflib type to a MIME type."""
rdflib_type = ox_to_rdflib_type(rdflib_type)
rdf_format = (
ox.RdfFormat.from_media_type(rdflib_type)
or ox.RdfFormat.from_extension(rdflib_type)
or ox.RdfFormat.from_media_type(f"application/{rdflib_type}")
)
if rdf_format is None:
raise ValueError(f"Unsupported rdflib type: {rdflib_type}")
return rdf_format


def ox_to_rdflib_type(ox_format: str) -> str:
"""Convert an Oxigraph format to a rdflib parser format."""
return ox_format[len("ox-") :] if ox_format.startswith("ox-") else ox_format
124 changes: 48 additions & 76 deletions oxrdflib/parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import warnings
from typing import Any, Optional
from abc import ABC, abstractmethod
from typing import Optional

from rdflib import ConjunctiveGraph, Graph
from pyoxigraph import DefaultGraph, RdfFormat, parse
from rdflib import Graph
from rdflib.exceptions import ParserError
from rdflib.parser import (
FileInputSource,
Expand All @@ -11,113 +12,84 @@
create_input_source,
)

from oxrdflib._converter import guess_rdf_format, ox_to_rdflib_type, to_ox
from oxrdflib._converter import from_ox, to_ox
from oxrdflib.store import OxigraphStore

__all__ = [
"OxigraphN3Parser",
"OxigraphTurtleParser",
"OxigraphNTriplesParser",
"OxigraphRdfXmlParser",
"OxigraphTriGParser",
"OxigraphNQuadsParser",
]


class OxigraphParser(Parser):
class _OxigraphParser(Parser, ABC):
def parse(
self,
source: InputSource,
sink: Graph,
format: str,
*,
encoding: Optional[str] = "utf-8",
**kwargs: Any,
transactional: bool = True,
) -> None:
if encoding not in (None, "utf-8"):
raise ParserError("N3/Turtle files are always utf-8 encoded, I was passed: {encoding}")

if not isinstance(sink.store, OxigraphStore):
warnings.warn(
"Graph store should be an instance of OxigraphStore, "
f"got {type(sink.store).__name__} store instead."
" Attempting to parse using rdflib native parser.",
stacklevel=2,
)
sink.parse(source, format=ox_to_rdflib_type(format))
return

raise ParserError(f"Only the 'utf-8' encoding is supported, '{encoding}' given")
base_iri = sink.absolutize(source.getPublicId() or source.getSystemId() or "")

args = {
"format": guess_rdf_format(format),
"format": self._format,
"base_iri": base_iri,
"to_graph": to_ox(sink.identifier),
}

if isinstance(source, URLInputSource):
source = create_input_source(source.url, format=ox_to_rdflib_type(format))
source = create_input_source(source.url, format=self._format.file_extension)
if isinstance(source, FileInputSource):
args["path"] = source.file.name
else:
args["input"] = source.getByteStream()

if kwargs.get("transactional", True):
sink.store._inner.load(**args)
if isinstance(sink.store, OxigraphStore):
if transactional:
sink.store._inner.load(**args, to_graph=to_ox(sink.identifier))
else:
sink.store._inner.bulk_load(**args, to_graph=to_ox(sink.identifier))
else:
sink.store._inner.bulk_load(**args)
sink.store.addN(
(
from_ox(quad.subject),
from_ox(quad.predicate),
from_ox(quad.object),
sink.identifier if isinstance(quad.graph_name, DefaultGraph) else from_ox(quad.graph_name),
)
for quad in parse(**args)
)

@property
@abstractmethod
def _format(self) -> RdfFormat:
pass

class OxigraphTurtleParser(OxigraphParser):
def parse(
self,
source: InputSource,
sink: Graph,
format: str = "ox-turtle",
encoding: Optional[str] = "utf-8",
**kwargs: Any,
) -> None:
super().parse(source, sink, format, encoding, **kwargs)

class OxigraphTurtleParser(_OxigraphParser):
_format = RdfFormat.TURTLE

class OxigraphNTriplesParser(OxigraphParser):
def parse(
self,
source: InputSource,
sink: Graph,
format: str = "ox-nt",
encoding: Optional[str] = None,
**kwargs: Any,
) -> None:
super().parse(source, sink, format, encoding, **kwargs)

class OxigraphNTriplesParser(_OxigraphParser):
_format = RdfFormat.N_TRIPLES

class OxigraphRdfXmlParser(OxigraphParser):
def parse(
self,
source: FileInputSource,
sink: Graph,
format: str = "ox-xml",
encoding: Optional[str] = None,
**kwargs: Any,
) -> None:
super().parse(source, sink, format, encoding, **kwargs)

class OxigraphRdfXmlParser(_OxigraphParser):
_format = RdfFormat.RDF_XML

class OxigraphNQuadsParser(OxigraphParser):
def parse(
self,
source: InputSource,
sink: ConjunctiveGraph,
format: str,
encoding: Optional[str] = None,
**kwargs: Any,
) -> None:
raise NotImplementedError("N-Quads is not supported yet")

class OxigraphN3Parser(_OxigraphParser):
_format = RdfFormat.N3

class OxigraphTriGParser(OxigraphParser):
def parse(
self,
source: InputSource,
sink: Graph,
format: str,
encoding: Optional[str] = "utf-8",
**kwargs: Any,
) -> None:
raise NotImplementedError("TriG parser is not supported yet")

class OxigraphNQuadsParser(_OxigraphParser):
_format = RdfFormat.N_QUADS


class OxigraphTriGParser(_OxigraphParser):
_format = RdfFormat.TRIG
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ oxigraph = "oxrdflib.store:OxigraphStore"
ox-turtle = "oxrdflib.parser:OxigraphTurtleParser"
ox-ttl = "oxrdflib.parser:OxigraphTurtleParser"
ox-ntriples = "oxrdflib.parser:OxigraphNTriplesParser"
ox-n3 = "oxrdflib.parser:OxigraphN3Parser"
ox-nquads = "oxrdflib.parser:OxigraphNQuadsParser"
ox-nt = "oxrdflib.parser:OxigraphNTriplesParser"
ox-nt11 = "oxrdflib.parser:OxigraphNTriplesParser"
ox-trig = "oxrdflib.parser:OxigraphTriGParser"
ox-xml = "oxrdflib.parser:OxigraphRdfXmlParser"

[project.entry-points."rdf.plugins.serializer"]
Expand Down
6 changes: 0 additions & 6 deletions tests/data/test.nt

This file was deleted.

16 changes: 0 additions & 16 deletions tests/data/test.rdf

This file was deleted.

10 changes: 0 additions & 10 deletions tests/data/test.ttl

This file was deleted.

Loading

0 comments on commit 9baa63d

Please sign in to comment.