-
Notifications
You must be signed in to change notification settings - Fork 555
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement RDF Patch serializer (#2877)
* Implement RDF Patch serializer. Supports serialization from Dataset instances only; triples and quads within a Dataset are supported. * Add examples for Patch serialization. * Remove unnecessary use of addN -> add * Handle RDFLib graph/dataset addition quirks. Should fix static analysis issues. * Fix mypy errors * Attempt to fix failing test. * Add Patch documentation + docstring --------- Co-authored-by: Ashley Sommer <[email protected]>
- Loading branch information
1 parent
aa9d103
commit 404be3b
Showing
5 changed files
with
359 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from rdflib import Dataset, Graph, Literal, URIRef | ||
|
||
|
||
def main(): | ||
# example for adding a quad | ||
ds = Dataset() | ||
g = Graph(identifier=URIRef("http://graph-a")) | ||
ds.add_graph(g) | ||
triple = (URIRef("http://subj-a"), URIRef("http://pred-a"), Literal("obj-a")) | ||
ds.get_context(g.identifier).add(triple) | ||
result = ds.serialize(format="patch", operation="add") | ||
print("Add Quad Patch:") | ||
print(result) | ||
|
||
# alternate example for adding a quad | ||
ds = Dataset() | ||
quad = ( | ||
URIRef("http://subj-a"), | ||
URIRef("http://pred-a"), | ||
Literal("obj-a"), | ||
Graph(identifier=URIRef("http://graph-a")), | ||
) | ||
ds.add(quad) | ||
result = ds.serialize(format="patch", operation="add") | ||
print("Add Quad Patch:") | ||
print(result) | ||
|
||
# example for adding a triple | ||
ds = Dataset() | ||
ds.add(triple) | ||
result = ds.serialize(format="patch", operation="add") | ||
print("\nAdd Triple Patch:") | ||
print(result) | ||
|
||
# Example for diff quads | ||
quad_1 = ( | ||
URIRef("http://subj-a"), | ||
URIRef("http://pred-a"), | ||
Literal("obj-a"), | ||
Graph(identifier=URIRef("http://graph-a")), | ||
) | ||
quad_2 = ( | ||
URIRef("http://subj-b"), | ||
URIRef("http://pred-b"), | ||
Literal("obj-b"), | ||
Graph(identifier=URIRef("http://graph-b")), | ||
) | ||
quad_3 = ( | ||
URIRef("http://subj-c"), | ||
URIRef("http://pred-c"), | ||
Literal("obj-c"), | ||
Graph(identifier=URIRef("http://graph-c")), | ||
) | ||
ds1 = Dataset() | ||
ds2 = Dataset() | ||
ds1.addN([quad_1, quad_2]) | ||
ds2.addN([quad_2, quad_3]) | ||
result = ds1.serialize(format="patch", target=ds2) | ||
print("Diff Quad Patch:") | ||
print(result) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
from __future__ import annotations | ||
|
||
import warnings | ||
from typing import IO, Optional | ||
from uuid import uuid4 | ||
|
||
from rdflib import Dataset | ||
from rdflib.plugins.serializers.nquads import _nq_row | ||
from rdflib.plugins.serializers.nt import _nt_row | ||
from rdflib.serializer import Serializer | ||
|
||
add_remove_methods = {"add": "A", "remove": "D"} | ||
|
||
|
||
class PatchSerializer(Serializer): | ||
""" | ||
Creates an RDF patch file to add and remove triples/quads. | ||
Can either: | ||
- Create an add or delete patch for a single Dataset. | ||
- Create a patch to represent the difference between two Datasets. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
store: Dataset, | ||
): | ||
self.store: Dataset = store | ||
super().__init__(store) | ||
|
||
def serialize( | ||
self, | ||
stream: IO[bytes], | ||
base: Optional[str] = None, | ||
encoding: Optional[str] = None, | ||
**kwargs, | ||
): | ||
""" | ||
Serialize the store to the given stream. | ||
:param stream: The stream to serialize to. | ||
:param base: The base URI to use for the serialization. | ||
:param encoding: The encoding to use for the serialization. | ||
:param kwargs: Additional keyword arguments. | ||
Supported keyword arguments: | ||
- operation: The operation to perform. Either 'add' or 'remove'. | ||
- target: The target Dataset to compare against. | ||
NB: Only one of 'operation' or 'target' should be provided. | ||
- header_id: The header ID to use. | ||
- header_prev: The previous header ID to use. | ||
""" | ||
operation = kwargs.get("operation") | ||
target = kwargs.get("target") | ||
header_id = kwargs.get("header_id") | ||
header_prev = kwargs.get("header_prev") | ||
if not header_id: | ||
header_id = f"uuid:{uuid4()}" | ||
encoding = self.encoding | ||
if base is not None: | ||
warnings.warn("PatchSerializer does not support base.") | ||
if encoding is not None and encoding.lower() != self.encoding.lower(): | ||
warnings.warn( | ||
"PatchSerializer does not use custom encoding. " | ||
f"Given encoding was: {encoding}" | ||
) | ||
|
||
def write_header(): | ||
stream.write(f"H id <{header_id}> .\n".encode(encoding, "replace")) | ||
if header_prev: | ||
stream.write(f"H prev <{header_prev}>\n".encode(encoding, "replace")) | ||
stream.write("TX .\n".encode(encoding, "replace")) | ||
|
||
def write_triples(contexts, op_code, use_passed_contexts=False): | ||
for context in contexts: | ||
if not use_passed_contexts: | ||
context = self.store.get_context(context.identifier) | ||
for triple in context: | ||
stream.write( | ||
self._patch_row(triple, context.identifier, op_code).encode( | ||
encoding, "replace" | ||
) | ||
) | ||
|
||
if operation: | ||
assert operation in add_remove_methods, f"Invalid operation: {operation}" | ||
|
||
write_header() | ||
if operation: | ||
operation_code = add_remove_methods.get(operation) | ||
write_triples(self.store.contexts(), operation_code) | ||
elif target: | ||
to_add, to_remove = self._diff(target) | ||
write_triples(to_add.contexts(), "A", use_passed_contexts=True) | ||
write_triples(to_remove.contexts(), "D", use_passed_contexts=True) | ||
|
||
stream.write("TC .\n".encode(encoding, "replace")) | ||
|
||
def _diff(self, target): | ||
rows_to_add = target - self.store | ||
rows_to_remove = self.store - target | ||
return rows_to_add, rows_to_remove | ||
|
||
def _patch_row(self, triple, context_id, operation): | ||
if context_id == self.store.default_context.identifier: | ||
return f"{operation} {_nt_row(triple)}" | ||
else: | ||
return f"{operation} {_nq_row(triple, context_id)}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
from rdflib import Dataset, Graph, Literal, URIRef | ||
|
||
|
||
def test_add_quad(): | ||
ds = Dataset() | ||
ds.add( | ||
( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
Graph(identifier=URIRef("http://example.org/graph1")), | ||
) | ||
) | ||
result = ds.serialize(format="patch", operation="add") | ||
assert ( | ||
"""A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> . | ||
""" | ||
in result | ||
) | ||
|
||
|
||
def test_delete_quad(): | ||
ds = Dataset() | ||
ds.add( | ||
( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
Graph(identifier=URIRef("http://example.org/graph1")), | ||
) | ||
) | ||
result = ds.serialize(format="patch", operation="remove") | ||
assert ( | ||
"""D <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> . | ||
""" | ||
in result | ||
) | ||
|
||
|
||
def test_diff_quad(): | ||
quad_1 = ( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
Graph(identifier=URIRef("http://example.org/graph1")), | ||
) | ||
quad_2 = ( | ||
URIRef("http://example.org/subject2"), | ||
URIRef("http://example.org/predicate3"), | ||
Literal("object3"), | ||
Graph(identifier=URIRef("http://example.org/graph2")), | ||
) | ||
ds1 = Dataset() | ||
ds2 = Dataset() | ||
ds1.add(quad_1) | ||
ds2.addN([quad_1, quad_2]) | ||
result = ds1.serialize(format="patch", target=ds2) | ||
assert ( | ||
"""A <http://example.org/subject2> <http://example.org/predicate3> "object3" <http://example.org/graph2> .""" | ||
in result | ||
) | ||
|
||
|
||
def test_add_triple(): | ||
ds = Dataset() | ||
ds.add( | ||
( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
) | ||
) | ||
result = ds.serialize(format="patch", operation="add") | ||
assert ( | ||
"""A <http://example.org/subject1> <http://example.org/predicate2> "object2" .""" | ||
in result | ||
) | ||
|
||
|
||
def test_delete_triple(): | ||
ds = Dataset() | ||
ds.add( | ||
( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
) | ||
) | ||
result = ds.serialize(format="patch", operation="remove") | ||
assert ( | ||
"""D <http://example.org/subject1> <http://example.org/predicate2> "object2" .""" | ||
in result | ||
) | ||
|
||
|
||
def test_diff_triple(): | ||
triple_1 = ( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
) | ||
triple_2 = ( | ||
URIRef("http://example.org/subject2"), | ||
URIRef("http://example.org/predicate3"), | ||
Literal("object3"), | ||
) | ||
ds1 = Dataset() | ||
ds2 = Dataset() | ||
ds1.add(triple_1) | ||
ds2.add(triple_1) | ||
ds2.add(triple_2) | ||
result = ds1.serialize(format="patch", target=ds2) | ||
assert ( | ||
"""A <http://example.org/subject2> <http://example.org/predicate3> "object3" .""" | ||
in result | ||
) | ||
|
||
|
||
def test_diff_quad_overlap(): | ||
quad_1 = ( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate1"), | ||
Literal("object1"), | ||
Graph(identifier=URIRef("http://example.org/graph1")), | ||
) | ||
quad_2 = ( | ||
URIRef("http://example.org/subject2"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
Graph(identifier=URIRef("http://example.org/graph2")), | ||
) | ||
quad_3 = ( | ||
URIRef("http://example.org/subject3"), | ||
URIRef("http://example.org/predicate3"), | ||
Literal("object3"), | ||
Graph(identifier=URIRef("http://example.org/graph3")), | ||
) | ||
ds1 = Dataset() | ||
ds2 = Dataset() | ||
ds1.addN([quad_1, quad_2]) | ||
ds2.addN([quad_2, quad_3]) | ||
result = ds1.serialize(format="patch", target=ds2) | ||
# first quad needs to be removed | ||
assert ( | ||
"""D <http://example.org/subject1> <http://example.org/predicate1> "object1" <http://example.org/graph1> .""" | ||
in result | ||
) | ||
# third quad needs to be added | ||
assert ( | ||
"""A <http://example.org/subject3> <http://example.org/predicate3> "object3" <http://example.org/graph3> .""" | ||
in result | ||
) | ||
|
||
|
||
def test_header_id(): | ||
ds = Dataset() | ||
ds.add( | ||
( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
) | ||
) | ||
result = ds.serialize(format="patch", operation="add", header_id="uuid:123") | ||
assert """H id <uuid:123>""" in result | ||
|
||
|
||
def test_prev_header(): | ||
ds = Dataset() | ||
ds.add( | ||
( | ||
URIRef("http://example.org/subject1"), | ||
URIRef("http://example.org/predicate2"), | ||
Literal("object2"), | ||
) | ||
) | ||
result = ds.serialize(format="patch", operation="add", header_prev="uuid:123") | ||
assert """H prev <uuid:123>""" in result |