Skip to content

Commit

Permalink
Add initial implementation of RDF Patch parser. (#2863)
Browse files Browse the repository at this point in the history
* Add initial implementation of RDF Patch parser.

* Add example, run Black & Ruff

* Ruff again

* Add docstring for Patch Operations + add class to All declaration to fix sphinx build.

* Black again

* Add parsing of <_:identifier> style bnodes; switch to line by line parsing.

* Remove unused mypy comment.

* Add mypy exclusions (comments); the code will not enter these methods if self.line is None in the first place.

* Remove mypy comment.

* Add ignore to 181.

* Add RDF Patch to Parser documentation.

* Correct Patch Parser Name. Remove unnecssary return statements.

* Remove unneccessary deskolemize from examples.

* Attempt to fix failing test.

---------

Co-authored-by: Nicholas Car <[email protected]>
Co-authored-by: Ashley Sommer <[email protected]>
  • Loading branch information
3 people authored Aug 26, 2024
1 parent 324f20a commit aa9d103
Show file tree
Hide file tree
Showing 20 changed files with 432 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/plugin_parsers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ json-ld :class:`~rdflib.plugins.parsers.jsonld.JsonLDParser`
hext :class:`~rdflib.plugins.parsers.hext.HextuplesParser`
n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser`
nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser`
patch :class:`~rdflib.plugins.parsers.patch.RDFPatchParser`
nt :class:`~rdflib.plugins.parsers.ntriples.NTParser`
trix :class:`~rdflib.plugins.parsers.trix.TriXParser`
turtle :class:`~rdflib.plugins.parsers.notation3.TurtleParser`
Expand Down
35 changes: 35 additions & 0 deletions examples/parse_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from rdflib import Dataset


def main():
# RDF patch data
add_patch = """
TX .
A _:bn1 <http://example.org/predicate1> "object1" .
A _:bn1 <http://example.org/predicate2> "object2" .
TC .
"""

delete_patch = """
TX .
D _:bn1 <http://example.org/predicate1> "object1" .
TC .
"""

ds = Dataset()

# Apply add patch
ds.parse(data=add_patch, format="patch")
print("After add patch:")
for triple in ds:
print(triple)

# Apply delete patch
ds.parse(data=delete_patch, format="patch")
print("After delete patch:")
for triple in ds:
print(triple)


if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions rdflib/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,14 @@ def plugins(
"HextuplesParser",
)

# Register RDF Patch Parsers
register(
"patch",
Parser,
"rdflib.plugins.parsers.patch",
"RDFPatchParser",
)

# Register SPARQL Processors
register(
"sparql",
Expand Down
183 changes: 183 additions & 0 deletions rdflib/plugins/parsers/patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from __future__ import annotations

from codecs import getreader
from enum import Enum
from typing import TYPE_CHECKING, Any, MutableMapping, Optional, Union

from rdflib.exceptions import ParserError as ParseError
from rdflib.graph import Dataset
from rdflib.parser import InputSource
from rdflib.plugins.parsers.nquads import NQuadsParser

# Build up from the NTriples parser:
from rdflib.plugins.parsers.ntriples import r_nodeid, r_tail, r_uriref, r_wspace
from rdflib.term import BNode, URIRef

if TYPE_CHECKING:
import typing_extensions as te

__all__ = ["RDFPatchParser", "Operation"]

_BNodeContextType = MutableMapping[str, BNode]


class Operation(Enum):
"""
Enum of RDF Patch operations.
Operations:
- `AddTripleOrQuad` (A): Adds a triple or quad.
- `DeleteTripleOrQuad` (D): Deletes a triple or quad.
- `AddPrefix` (PA): Adds a prefix.
- `DeletePrefix` (PD): Deletes a prefix.
- `TransactionStart` (TX): Starts a transaction.
- `TransactionCommit` (TC): Commits a transaction.
- `TransactionAbort` (TA): Aborts a transaction.
- `Header` (H): Specifies a header.
"""

AddTripleOrQuad = "A"
DeleteTripleOrQuad = "D"
AddPrefix = "PA"
DeletePrefix = "PD"
TransactionStart = "TX"
TransactionCommit = "TC"
TransactionAbort = "TA"
Header = "H"


class RDFPatchParser(NQuadsParser):
def parse( # type: ignore[override]
self,
inputsource: InputSource,
sink: Dataset,
bnode_context: Optional[_BNodeContextType] = None,
skolemize: bool = False,
**kwargs: Any,
) -> Dataset:
"""
Parse inputsource as an RDF Patch file.
:type inputsource: `rdflib.parser.InputSource`
:param inputsource: the source of RDF Patch formatted data
:type sink: `rdflib.graph.Dataset`
:param sink: where to send parsed data
:type bnode_context: `dict`, optional
:param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
See `.W3CNTriplesParser.parse`
"""
assert sink.store.context_aware, (
"RDFPatchParser must be given" " a context aware store."
)
# type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]")
self.sink: Dataset = Dataset(store=sink.store)
self.skolemize = skolemize

source = inputsource.getCharacterStream()
if not source:
source = inputsource.getByteStream()
source = getreader("utf-8")(source)

if not hasattr(source, "read"):
raise ParseError("Item to parse must be a file-like object.")

self.file = source
self.buffer = ""
while True:
self.line = __line = self.readline()
if self.line is None:
break
try:
self.parsepatch(bnode_context)
except ParseError as msg:
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
return self.sink

def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
self.eat(r_wspace)
# From spec: "No comments should be included (comments start # and run to end
# of line)."
if (not self.line) or self.line.startswith("#"):
return # The line is empty or a comment

# if header, transaction, skip
operation = self.operation()
self.eat(r_wspace)

if operation in [Operation.AddTripleOrQuad, Operation.DeleteTripleOrQuad]:
self.add_or_remove_triple_or_quad(operation, bnode_context)
elif operation == Operation.AddPrefix:
self.add_prefix()
elif operation == Operation.DeletePrefix:
self.delete_prefix()

def add_or_remove_triple_or_quad(
self, operation, bnode_context: Optional[_BNodeContextType] = None
) -> None:
self.eat(r_wspace)
if (not self.line) or self.line.startswith("#"):
return # The line is empty or a comment

subject = self.labeled_bnode() or self.subject(bnode_context)
self.eat(r_wspace)

predicate = self.predicate()
self.eat(r_wspace)

obj = self.labeled_bnode() or self.object(bnode_context)
self.eat(r_wspace)

context = self.labeled_bnode() or self.uriref() or self.nodeid(bnode_context)
self.eat(r_tail)

if self.line:
raise ParseError("Trailing garbage")
# Must have a context aware store - add on a normal Graph
# discards anything where the ctx != graph.identifier
if operation == Operation.AddTripleOrQuad:
if context:
self.sink.get_context(context).add((subject, predicate, obj))
else:
self.sink.default_context.add((subject, predicate, obj))
elif operation == Operation.DeleteTripleOrQuad:
if context:
self.sink.get_context(context).remove((subject, predicate, obj))
else:
self.sink.default_context.remove((subject, predicate, obj))

def add_prefix(self):
# Extract prefix and URI from the line
prefix, ns, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
ns_stripped = ns.strip("<>")
self.sink.bind(prefix, ns_stripped)

def delete_prefix(self):
prefix, _, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
self.sink.namespace_manager.bind(prefix, None, replace=True)

def operation(self) -> Operation:
for op in Operation:
if self.line.startswith(op.value): # type: ignore[union-attr]
self.eat_op(op.value)
return op
raise ValueError(
f'Invalid or no Operation found in line: "{self.line}". Valid Operations '
f"codes are {', '.join([op.value for op in Operation])}"
)

def eat_op(self, op: str) -> None:
self.line = self.line.lstrip(op) # type: ignore[union-attr]

def nodeid(
self, bnode_context: Optional[_BNodeContextType] = None
) -> Union[te.Literal[False], BNode, URIRef]:
if self.peek("_"):
return BNode(self.eat(r_nodeid).group(1))
return False

def labeled_bnode(self):
if self.peek("<_"):
plain_uri = self.eat(r_uriref).group(1)
bnode_id = r_nodeid.match(plain_uri).group(1) # type: ignore[union-attr]
return BNode(bnode_id)
return False
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_bnode_triples.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" <http://example.org/graph1> .
A _:bn1 <http://example.org/predicate2> "object2" <http://example.org/graph1> .
A _:bn1 <http://example.org/predicate3> "object3" <http://example.org/graph1> .
D _:bn1 <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_labeled_bnode_quads.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
A <_:bn1> <http://example.org/predicate1> "object1" <http://example.org/graph1> .
A <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
A <_:bn1> <http://example.org/predicate3> "object3" <http://example.org/graph1> .
D <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_prefix.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
PA present <http://some-other-ns#> .
PA removed <http://ns-for-prefix-to-remove#> .
PD removed <http://ns-for-prefix-to-remove#> .
A <http://ns-for-prefix-to-remove#test-subj> <http://ns-for-prefix-to-remove#test-pred> "object1" .
TC .
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_triples.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
A <http://example.org/subject1> <http://example.org/predicate1> "object1" .
A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
D <http://example.org/subject1> <http://example.org/predicate1> "object1" .
D <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_graph.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" _:bn1 .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_quad.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" <https://graph-1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_triple.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A _:bn1 <http://example.org/predicate1> "object1" .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_bnode_uri.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
A <_:bn1> <http://example.org/predicate1> "object1" .
TC .
4 changes: 4 additions & 0 deletions test/data/patch/add_delete_bnode.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
TX .
A _:bn2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/object2> .
D _:bn2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/object2> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/add_prefix.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
PA testing <http://example.org/> .
TC .
4 changes: 4 additions & 0 deletions test/data/patch/add_triple_and_quad.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
TX .
A <http://example.org/subject1> <http://example.org/predicate1> "object1" .
A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_graph.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D _:bn1 <http://example.org/predicate1> "object1" _:bn1 .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_quad.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D _:bn1 <http://example.org/predicate1> "object1" <https://graph-1> .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_triple.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D _:bn1 <http://example.org/predicate1> "object1" .
TC .
3 changes: 3 additions & 0 deletions test/data/patch/delete_bnode_uri.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TX .
D <_:bn1> <http://example.org/predicate1> "object1" .
TC .
Loading

0 comments on commit aa9d103

Please sign in to comment.