feat: hextuple parser and serializer now supports anonymous graph nam…

…es (#2815) * feat: hextuple handles blank node graph names correctly in parser and serialiser * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks" This reverts commit 78106bb. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Nicholas Car <[email protected]> Co-authored-by: Ashley Sommer <[email protected]>
RDFLib · Jul 24, 2024 · bca2567 · bca2567
1 parent 5537d23
commit bca2567
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 11 deletions.
diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py
@@ -72,7 +72,11 @@ def _parse_hextuple(
 
         # 6 - context
         if tup[5] is not None:
-            c = URIRef(tup[5])
+            c = (
+                BNode(tup[5].replace("_:", ""))
+                if tup[5].startswith("_:")
+                else URIRef(tup[5])
+            )
             # type error: Argument 1 to "add" of "ConjunctiveGraph" has incompatible type "Tuple[Union[URIRef, BNode], URIRef, Union[URIRef, BNode, Literal], URIRef]"; expected "Union[Tuple[Node, Node, Node], Tuple[Node, Node, Node, Optional[Graph]]]"
             cg.add((s, p, o, c))  # type: ignore[arg-type]
         else:

diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py
@@ -9,7 +9,7 @@
 import warnings
 from typing import IO, Optional, Type, Union
 
-from rdflib.graph import ConjunctiveGraph, Graph
+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, ConjunctiveGraph, Graph
 from rdflib.namespace import RDF, XSD
 from rdflib.serializer import Serializer
 from rdflib.term import BNode, Literal, Node, URIRef
@@ -136,10 +136,14 @@ def _iri_or_bn(self, i_):
     def _context(self, context):
         if self.graph_type == Graph:
             return ""
-        if context.identifier == "urn:x-rdflib:default":
+        if context.identifier == DATASET_DEFAULT_GRAPH_ID:
             return ""
         elif context is not None and self.default_context is not None:
             # type error: "Node" has no attribute "identifier"
             if context.identifier == self.default_context.identifier:  # type: ignore[attr-defined]
                 return ""
-        return context.identifier
+        return (
+            context.identifier
+            if isinstance(context.identifier, URIRef)
+            else context.identifier.n3()
+        )
diff --git a/test/test_parsers/test_parser_hext.py b/test/test_parsers/test_parser_hext.py
@@ -1,13 +1,39 @@
 from pathlib import Path
 
-from rdflib import ConjunctiveGraph, Dataset, Literal
+from rdflib import BNode, ConjunctiveGraph, Dataset, Literal, URIRef
+from rdflib.compare import isomorphic
+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
 from rdflib.namespace import XSD
 
 
+def test_named_and_anonymous_graph_roundtrip():
+    s = """
+        ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", "https://example.com/graph/1"]
+        ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "_:graph-2"]
+        ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+    """
+    d = Dataset()
+    d.parse(data=s, format="hext")
+
+    new_s = d.serialize(format="hext")
+    new_d = Dataset()
+    new_d.parse(data=new_s, format="hext")
+
+    named_graph = URIRef("https://example.com/graph/1")
+    assert isomorphic(d.graph(named_graph), new_d.graph(named_graph))
+
+    anonymous_graph = BNode("graph-2")
+    assert isomorphic(d.graph(anonymous_graph), new_d.graph(anonymous_graph))
+
+    assert isomorphic(
+        d.graph(DATASET_DEFAULT_GRAPH_ID), new_d.graph(DATASET_DEFAULT_GRAPH_ID)
+    )
+
+
 def test_small_string():
     s = """
-        ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""]
-        ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]
+        ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", "https://example.com/graph/1"]
+        ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "_:graph-2"]
         ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""]
         ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""]
         ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""]
@@ -17,14 +43,24 @@ def test_small_string():
         ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""]
         ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""]
         """
-    d = Dataset().parse(data=s, format="hext")
+    d = Dataset()
+    d.parse(data=s, format="hext")
+
+    expected_graph_names = (
+        URIRef(DATASET_DEFAULT_GRAPH_ID),
+        URIRef("https://example.com/graph/1"),
+        BNode("graph-2"),
+    )
+    for graph in d.contexts():
+        assert graph.identifier in expected_graph_names
+
     assert len(d) == 10
 
 
 def test_small_string_cg():
     s = """
-        ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""]
-        ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]
+        ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", "https://example.com/graph/1"]
+        ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "_:graph-2"]
         ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""]
         ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""]
         ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""]
@@ -34,7 +70,17 @@ def test_small_string_cg():
         ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""]
         ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""]
         """
-    d = ConjunctiveGraph().parse(data=s, format="hext")
+    d = ConjunctiveGraph(identifier=DATASET_DEFAULT_GRAPH_ID)
+    d.parse(data=s, format="hext")
+
+    expected_graph_names = (
+        URIRef(DATASET_DEFAULT_GRAPH_ID),
+        URIRef("https://example.com/graph/1"),
+        BNode("graph-2"),
+    )
+    for graph in d.contexts():
+        assert graph.identifier in expected_graph_names
+
     assert len(d) == 10