From 5bf7acba75aa307067665673a1ab0cdba64fe9c4 Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Thu, 17 Oct 2024 20:05:39 +0200 Subject: [PATCH] Allows using Oxigraph native serializers --- oxrdflib/serializer.py | 63 ++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 13 +++++++-- tests/test_serializer.py | 60 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 oxrdflib/serializer.py create mode 100644 tests/test_serializer.py diff --git a/oxrdflib/serializer.py b/oxrdflib/serializer.py new file mode 100644 index 0000000..a151746 --- /dev/null +++ b/oxrdflib/serializer.py @@ -0,0 +1,63 @@ +from abc import ABC, abstractmethod +from typing import IO, Any, Optional + +from pyoxigraph import RdfFormat, serialize +from rdflib.serializer import Serializer + +from oxrdflib._converter import to_ox +from oxrdflib.store import OxigraphStore + +__all__ = [ + "OxigraphN3Serializer", + "OxigraphTurtleSerializer", + "OxigraphNTriplesSerializer", + "OxigraphRdfXmlSerializer", + "OxigraphTriGSerializer", + "OxigraphNQuadsSerializer", +] + + +class _OxigraphSerializer(Serializer, ABC): + def serialize( + self, + stream: IO[bytes], + _base: Optional[str] = None, + encoding: Optional[str] = None, + **kwargs: Any, # noqa: ARG002 + ) -> None: + if encoding not in (None, "utf-8"): + raise ValueError(f"RDF files are always utf-8 encoded, I was passed: {encoding}") + # TODO: base and prefixes + if isinstance(self.store, OxigraphStore): + self.store._inner.dump(stream, format=self._format) + else: + serialize((to_ox(q) for q in self.store), stream, format=self._format) + + @property + @abstractmethod + def _format(self) -> RdfFormat: + pass + + +class OxigraphN3Serializer(_OxigraphSerializer): + _format = RdfFormat.N3 + + +class OxigraphTurtleSerializer(_OxigraphSerializer): + _format = RdfFormat.TURTLE + + +class OxigraphNTriplesSerializer(_OxigraphSerializer): + _format = RdfFormat.N_TRIPLES + + +class OxigraphRdfXmlSerializer(_OxigraphSerializer): + _format = RdfFormat.RDF_XML + + +class OxigraphNQuadsSerializer(_OxigraphSerializer): + _format = RdfFormat.N_QUADS + + +class OxigraphTriGSerializer(_OxigraphSerializer): + _format = RdfFormat.TRIG diff --git a/pyproject.toml b/pyproject.toml index 4929813..74daa38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,13 +29,12 @@ name = "oxrdflib" readme = "README.md" requires-python = ">=3.8" - [project.entry-points."rdf.plugins.store"] Oxigraph = "oxrdflib.store:OxigraphStore" +oxigraph = "oxrdflib.store:OxigraphStore" OxMemory = "oxrdflib.store:OxigraphStore" OxSled = "oxrdflib.store:OxigraphStore" - [project.entry-points."rdf.plugins.parser"] ox-turtle = "oxrdflib.parser:OxigraphTurtleParser" ox-ttl = "oxrdflib.parser:OxigraphTurtleParser" @@ -43,6 +42,16 @@ ox-ntriples = "oxrdflib.parser:OxigraphNTriplesParser" ox-nt = "oxrdflib.parser:OxigraphNTriplesParser" ox-xml = "oxrdflib.parser:OxigraphRdfXmlParser" +[project.entry-points."rdf.plugins.serializer"] +ox-turtle = "oxrdflib.serializer:OxigraphTurtleSerializer" +ox-ttl = "oxrdflib.serializer:OxigraphTurtleSerializer" +ox-ntriples = "oxrdflib.serializer:OxigraphNTriplesSerializer" +ox-n3 = "oxrdflib.serializer:OxigraphN3Serializer" +ox-nquads = "oxrdflib.serializer:OxigraphNQuadsSerializer" +ox-nt = "oxrdflib.serializer:OxigraphNTriplesSerializer" +ox-nt11 = "oxrdflib.serializer:OxigraphNTriplesSerializer" +ox-trig = "oxrdflib.serializer:OxigraphTriGSerializer" +ox-xml = "oxrdflib.serializer:OxigraphRdfXmlSerializer" [project.urls] Changelog = "https://github.com/oxigraph/oxrdflib/blob/main/CHANGELOG.md" diff --git a/tests/test_serializer.py b/tests/test_serializer.py new file mode 100644 index 0000000..6a84807 --- /dev/null +++ b/tests/test_serializer.py @@ -0,0 +1,60 @@ +import unittest + +from rdflib import Dataset, Graph, URIRef + +s = URIRef("http://example.com/s") +p = URIRef("http://example.com/vocab#p") +o = URIRef("http://example.com/o") +g = URIRef("http://example.com/g") + + +class TestSerializer(unittest.TestCase): + def test_serialize_graph(self): + for store in ("default", "oxigraph"): + for fmt, serialization in ( + ("ox-turtle", " .\n"), + ("ox-ttl", " .\n"), + ("ox-ntriples", " .\n"), + ("ox-n3", " .\n"), + ("ox-nquads", " .\n"), + ("ox-nt", " .\n"), + ("ox-nt11", " .\n"), + ("ox-trig", " .\n"), + ( + "ox-xml", + """ + + + + +""", + ), + ): + with self.subTest(store=store, format=fmt): + graph = Graph(store=store) + graph.add((s, p, o)) + graph.store.add((o, p, s), context=Graph(identifier=g)) # Should not be serialized + self.assertEqual(graph.serialize(format=fmt), serialization) + + def test_serialize_dataset(self): + for store in ("default", "oxigraph"): + for fmt, serialization in ( + ( + "ox-nquads", + " " + " .\n", + ), + ( + "ox-trig", + " {\n\t " + " .\n}\n", + ), + ): + with self.subTest(store=store, format=fmt): + dataset = Dataset(store=store) + dataset.add((s, p, o, Graph(identifier=g))) + self.assertEqual(dataset.serialize(format=fmt), serialization) + + +if __name__ == "__main__": + unittest.main()