Skip to content

Commit

Permalink
Implement RDF Patch serializer (#2877)
Browse files Browse the repository at this point in the history
* Implement RDF Patch serializer. Supports serialization from Dataset instances only; triples and quads within a Dataset are supported.

* Add examples for Patch serialization.

* Remove unnecessary use of addN -> add

* Handle RDFLib graph/dataset addition quirks. Should fix static analysis issues.

* Fix mypy errors

* Attempt to fix failing test.

* Add Patch documentation + docstring

---------

Co-authored-by: Ashley Sommer <[email protected]>
  • Loading branch information
recalcitrantsupplant and ashleysommer authored Aug 26, 2024
1 parent aa9d103 commit 404be3b
Show file tree
Hide file tree
Showing 5 changed files with 359 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/plugin_serializers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ n3 :class:`~rdflib.plugins.serializers.n3.N3Serializer`
nquads :class:`~rdflib.plugins.serializers.nquads.NQuadsSerializer`
nt :class:`~rdflib.plugins.serializers.nt.NTSerializer`
hext :class:`~rdflib.plugins.serializers.hext.HextuplesSerializer`
patch :class:`~rdflib.plugins.serializers.patch.PatchSerializer`
pretty-xml :class:`~rdflib.plugins.serializers.rdfxml.PrettyXMLSerializer`
trig :class:`~rdflib.plugins.serializers.trig.TrigSerializer`
trix :class:`~rdflib.plugins.serializers.trix.TriXSerializer`
Expand All @@ -34,6 +35,11 @@ JSON-LD
-------
JSON-LD - 'json-ld' - has been incorporated into RDFLib since v6.0.0.

RDF Patch
---------

The RDF Patch Serializer - 'patch' - uses the RDF Patch format defined at https://afs.github.io/rdf-patch/. It supports serializing context aware stores as either addition or deletion patches; and also supports serializing the difference between two context aware stores as a Patch of additions and deletions.

HexTuples
---------
The HexTuples Serializer - 'hext' - uses the HexTuples format defined at https://github.com/ontola/hextuples.
Expand Down
64 changes: 64 additions & 0 deletions examples/patch_serializer_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from rdflib import Dataset, Graph, Literal, URIRef


def main():
# example for adding a quad
ds = Dataset()
g = Graph(identifier=URIRef("http://graph-a"))
ds.add_graph(g)
triple = (URIRef("http://subj-a"), URIRef("http://pred-a"), Literal("obj-a"))
ds.get_context(g.identifier).add(triple)
result = ds.serialize(format="patch", operation="add")
print("Add Quad Patch:")
print(result)

# alternate example for adding a quad
ds = Dataset()
quad = (
URIRef("http://subj-a"),
URIRef("http://pred-a"),
Literal("obj-a"),
Graph(identifier=URIRef("http://graph-a")),
)
ds.add(quad)
result = ds.serialize(format="patch", operation="add")
print("Add Quad Patch:")
print(result)

# example for adding a triple
ds = Dataset()
ds.add(triple)
result = ds.serialize(format="patch", operation="add")
print("\nAdd Triple Patch:")
print(result)

# Example for diff quads
quad_1 = (
URIRef("http://subj-a"),
URIRef("http://pred-a"),
Literal("obj-a"),
Graph(identifier=URIRef("http://graph-a")),
)
quad_2 = (
URIRef("http://subj-b"),
URIRef("http://pred-b"),
Literal("obj-b"),
Graph(identifier=URIRef("http://graph-b")),
)
quad_3 = (
URIRef("http://subj-c"),
URIRef("http://pred-c"),
Literal("obj-c"),
Graph(identifier=URIRef("http://graph-c")),
)
ds1 = Dataset()
ds2 = Dataset()
ds1.addN([quad_1, quad_2])
ds2.addN([quad_2, quad_3])
result = ds1.serialize(format="patch", target=ds2)
print("Diff Quad Patch:")
print(result)


if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions rdflib/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,12 @@ def plugins(
"rdflib.plugins.serializers.hext",
"HextuplesSerializer",
)
register(
"patch",
Serializer,
"rdflib.plugins.serializers.patch",
"PatchSerializer",
)

# Register Triple Parsers
register(
Expand Down
105 changes: 105 additions & 0 deletions rdflib/plugins/serializers/patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from __future__ import annotations

import warnings
from typing import IO, Optional
from uuid import uuid4

from rdflib import Dataset
from rdflib.plugins.serializers.nquads import _nq_row
from rdflib.plugins.serializers.nt import _nt_row
from rdflib.serializer import Serializer

add_remove_methods = {"add": "A", "remove": "D"}


class PatchSerializer(Serializer):
"""
Creates an RDF patch file to add and remove triples/quads.
Can either:
- Create an add or delete patch for a single Dataset.
- Create a patch to represent the difference between two Datasets.
"""

def __init__(
self,
store: Dataset,
):
self.store: Dataset = store
super().__init__(store)

def serialize(
self,
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
**kwargs,
):
"""
Serialize the store to the given stream.
:param stream: The stream to serialize to.
:param base: The base URI to use for the serialization.
:param encoding: The encoding to use for the serialization.
:param kwargs: Additional keyword arguments.
Supported keyword arguments:
- operation: The operation to perform. Either 'add' or 'remove'.
- target: The target Dataset to compare against.
NB: Only one of 'operation' or 'target' should be provided.
- header_id: The header ID to use.
- header_prev: The previous header ID to use.
"""
operation = kwargs.get("operation")
target = kwargs.get("target")
header_id = kwargs.get("header_id")
header_prev = kwargs.get("header_prev")
if not header_id:
header_id = f"uuid:{uuid4()}"
encoding = self.encoding
if base is not None:
warnings.warn("PatchSerializer does not support base.")
if encoding is not None and encoding.lower() != self.encoding.lower():
warnings.warn(
"PatchSerializer does not use custom encoding. "
f"Given encoding was: {encoding}"
)

def write_header():
stream.write(f"H id <{header_id}> .\n".encode(encoding, "replace"))
if header_prev:
stream.write(f"H prev <{header_prev}>\n".encode(encoding, "replace"))
stream.write("TX .\n".encode(encoding, "replace"))

def write_triples(contexts, op_code, use_passed_contexts=False):
for context in contexts:
if not use_passed_contexts:
context = self.store.get_context(context.identifier)
for triple in context:
stream.write(
self._patch_row(triple, context.identifier, op_code).encode(
encoding, "replace"
)
)

if operation:
assert operation in add_remove_methods, f"Invalid operation: {operation}"

write_header()
if operation:
operation_code = add_remove_methods.get(operation)
write_triples(self.store.contexts(), operation_code)
elif target:
to_add, to_remove = self._diff(target)
write_triples(to_add.contexts(), "A", use_passed_contexts=True)
write_triples(to_remove.contexts(), "D", use_passed_contexts=True)

stream.write("TC .\n".encode(encoding, "replace"))

def _diff(self, target):
rows_to_add = target - self.store
rows_to_remove = self.store - target
return rows_to_add, rows_to_remove

def _patch_row(self, triple, context_id, operation):
if context_id == self.store.default_context.identifier:
return f"{operation} {_nt_row(triple)}"
else:
return f"{operation} {_nq_row(triple, context_id)}"
178 changes: 178 additions & 0 deletions test/test_serializers/test_serializer_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
from rdflib import Dataset, Graph, Literal, URIRef


def test_add_quad():
ds = Dataset()
ds.add(
(
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
Graph(identifier=URIRef("http://example.org/graph1")),
)
)
result = ds.serialize(format="patch", operation="add")
assert (
"""A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
"""
in result
)


def test_delete_quad():
ds = Dataset()
ds.add(
(
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
Graph(identifier=URIRef("http://example.org/graph1")),
)
)
result = ds.serialize(format="patch", operation="remove")
assert (
"""D <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
"""
in result
)


def test_diff_quad():
quad_1 = (
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
Graph(identifier=URIRef("http://example.org/graph1")),
)
quad_2 = (
URIRef("http://example.org/subject2"),
URIRef("http://example.org/predicate3"),
Literal("object3"),
Graph(identifier=URIRef("http://example.org/graph2")),
)
ds1 = Dataset()
ds2 = Dataset()
ds1.add(quad_1)
ds2.addN([quad_1, quad_2])
result = ds1.serialize(format="patch", target=ds2)
assert (
"""A <http://example.org/subject2> <http://example.org/predicate3> "object3" <http://example.org/graph2> ."""
in result
)


def test_add_triple():
ds = Dataset()
ds.add(
(
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
)
)
result = ds.serialize(format="patch", operation="add")
assert (
"""A <http://example.org/subject1> <http://example.org/predicate2> "object2" ."""
in result
)


def test_delete_triple():
ds = Dataset()
ds.add(
(
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
)
)
result = ds.serialize(format="patch", operation="remove")
assert (
"""D <http://example.org/subject1> <http://example.org/predicate2> "object2" ."""
in result
)


def test_diff_triple():
triple_1 = (
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
)
triple_2 = (
URIRef("http://example.org/subject2"),
URIRef("http://example.org/predicate3"),
Literal("object3"),
)
ds1 = Dataset()
ds2 = Dataset()
ds1.add(triple_1)
ds2.add(triple_1)
ds2.add(triple_2)
result = ds1.serialize(format="patch", target=ds2)
assert (
"""A <http://example.org/subject2> <http://example.org/predicate3> "object3" ."""
in result
)


def test_diff_quad_overlap():
quad_1 = (
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate1"),
Literal("object1"),
Graph(identifier=URIRef("http://example.org/graph1")),
)
quad_2 = (
URIRef("http://example.org/subject2"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
Graph(identifier=URIRef("http://example.org/graph2")),
)
quad_3 = (
URIRef("http://example.org/subject3"),
URIRef("http://example.org/predicate3"),
Literal("object3"),
Graph(identifier=URIRef("http://example.org/graph3")),
)
ds1 = Dataset()
ds2 = Dataset()
ds1.addN([quad_1, quad_2])
ds2.addN([quad_2, quad_3])
result = ds1.serialize(format="patch", target=ds2)
# first quad needs to be removed
assert (
"""D <http://example.org/subject1> <http://example.org/predicate1> "object1" <http://example.org/graph1> ."""
in result
)
# third quad needs to be added
assert (
"""A <http://example.org/subject3> <http://example.org/predicate3> "object3" <http://example.org/graph3> ."""
in result
)


def test_header_id():
ds = Dataset()
ds.add(
(
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
)
)
result = ds.serialize(format="patch", operation="add", header_id="uuid:123")
assert """H id <uuid:123>""" in result


def test_prev_header():
ds = Dataset()
ds.add(
(
URIRef("http://example.org/subject1"),
URIRef("http://example.org/predicate2"),
Literal("object2"),
)
)
result = ds.serialize(format="patch", operation="add", header_prev="uuid:123")
assert """H prev <uuid:123>""" in result

0 comments on commit 404be3b

Please sign in to comment.