RDFLib · ashleysommer · Jul 29, 2024 · Jul 26, 2024 · Jul 27, 2024 · Jul 29, 2024
diff --git a/README.md b/README.md
@@ -62,7 +62,7 @@ The stable release of RDFLib may be installed with Python's package management t
 
 Some features of RDFLib require optional dependencies which may be installed using *pip* extras:
 
-    $ pip install rdflib[berkeleydb,networkx,html,lxml]
+    $ pip install rdflib[berkeleydb,networkx,html,lxml,orjson]
 
 Alternatively manually download the package from the Python Package
 Index (PyPI) at https://pypi.python.org/pypi/rdflib

diff --git a/devtools/constraints.min b/devtools/constraints.min
@@ -8,3 +8,4 @@ berkeleydb==18.1.2
 networkx==2.0
 html5lib==1.0.1
 lxml==4.3.0
+orjson==3.9.14
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,6 +45,7 @@ berkeleydb = {version = "^18.1.0", optional = true}
 networkx = {version = ">=2,<4", optional = true}
 html5lib = {version = "^1.0", optional = true}
 lxml = {version = ">=4.3,<6.0", optional = true}
+orjson = {version = ">=3.9.14,<4", optional = true}
 
 [tool.poetry.group.dev.dependencies]
 black = "24.4.2"
@@ -74,6 +75,7 @@ berkeleydb = ["berkeleydb"]
 networkx = ["networkx"]
 html = ["html5lib"]
 lxml = ["lxml"]
+orjson = ["orjson"]
 
 [build-system]
 requires = ["poetry-core>=1.4.0"]

diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py
@@ -9,12 +9,23 @@
 import json
 import warnings
 from io import TextIOWrapper
-from typing import Any, BinaryIO, List, Optional, TextIO, Union
+from typing import TYPE_CHECKING, Any, BinaryIO, List, Optional, TextIO, Union
 
 from rdflib.graph import ConjunctiveGraph, Graph
 from rdflib.parser import InputSource, Parser
 from rdflib.term import BNode, Literal, URIRef
 
+try:
+    import orjson
+
+    _HAS_ORJSON = True
+except ImportError:
+    orjson = None  # type: ignore[assignment, unused-ignore]
+    _HAS_ORJSON = False
+
+if TYPE_CHECKING:
+    from io import BufferedReader
+
 __all__ = ["HextuplesParser"]
 
 
@@ -27,16 +38,6 @@ class HextuplesParser(Parser):
     def __init__(self):
         self.skolemize = False
 
-    def _load_json_line(self, line: str) -> List[Optional[Any]]:
-        # this complex handing is because the 'value' component is
-        # allowed to be "" but not None
-        # all other "" values are treated as None
-        ret1 = json.loads(line)
-        ret2 = [x if x != "" else None for x in ret1]
-        if ret1[2] == "":
-            ret2[2] = ""
-        return ret2
-
     def _parse_hextuple(
         self, cg: ConjunctiveGraph, tup: List[Union[str, None]]
     ) -> None:
@@ -106,19 +107,50 @@ def parse(self, source: InputSource, graph: Graph, skolemize: bool = False, **kw
         cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier)
         cg.default_context = graph
 
-        text_stream: Optional[TextIO] = source.getCharacterStream()
-        if text_stream is None:
+        try:
+            text_stream: Optional[TextIO] = source.getCharacterStream()
+        except (AttributeError, LookupError):
+            text_stream = None
+        try:
             binary_stream: Optional[BinaryIO] = source.getByteStream()
-            if binary_stream is None:
-                raise ValueError(
-                    f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
-                )
-            text_stream = TextIOWrapper(binary_stream, encoding="utf-8")
+        except (AttributeError, LookupError):
+            binary_stream = None
+
+        if text_stream is None and binary_stream is None:
+            raise ValueError(
+                f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
+            )
+        if TYPE_CHECKING:
+            assert text_stream is not None or binary_stream is not None
+        use_stream: Union[TextIO, BinaryIO]
+        if _HAS_ORJSON:
+            if binary_stream is not None:
+                use_stream = binary_stream
+            else:
+                if TYPE_CHECKING:
+                    assert isinstance(text_stream, TextIOWrapper)
+                use_stream = text_stream
+            loads = orjson.loads
+        else:
+            if text_stream is not None:
+                use_stream = text_stream
+            else:
+                if TYPE_CHECKING:
+                    assert isinstance(binary_stream, BufferedReader)
+                use_stream = TextIOWrapper(binary_stream, encoding="utf-8")
+            loads = json.loads
 
-        for line in text_stream:
+        for line in use_stream:  # type: Union[str, bytes]
             if len(line) == 0 or line.isspace():
                 # Skipping empty lines because this is what was being done before for the first and last lines, albeit in an rather indirect way.
                 # The result is that we accept input that would otherwise be invalid.
                 # Possibly we should just let this result in an error.
                 continue
-            self._parse_hextuple(cg, self._load_json_line(line))
+            # this complex handing is because the 'value' component is
+            # allowed to be "" but not None
+            # all other "" values are treated as None
+            raw_line: List[str] = loads(line)
+            hex_tuple_line = [x if x != "" else None for x in raw_line]
+            if raw_line[2] == "":
+                hex_tuple_line[2] = ""
+            self._parse_hextuple(cg, hex_tuple_line)
diff --git a/rdflib/plugins/parsers/jsonld.py b/rdflib/plugins/parsers/jsonld.py
@@ -62,9 +62,11 @@
     VOCAB,
 )
 from ..shared.jsonld.util import (
+    _HAS_ORJSON,
     VOCAB_DELIMS,
     context_from_urlinputsource,
     json,
+    orjson,
     source_to_json,
 )
 
@@ -681,11 +683,18 @@ def _add_list(
 
     @staticmethod
     def _to_typed_json_value(value: Any) -> Dict[str, str]:
-        return {
-            TYPE: URIRef("%sJSON" % str(RDF)),
-            VALUE: json.dumps(
+        if _HAS_ORJSON:
+            val_string: str = orjson.dumps(
+                value,
+                option=orjson.OPT_SORT_KEYS | orjson.OPT_NON_STR_KEYS,
+            ).decode("utf-8")
+        else:
+            val_string = json.dumps(
                 value, separators=(",", ":"), sort_keys=True, ensure_ascii=False
-            ),
+            )
+        return {
+            TYPE: RDF.JSON,
+            VALUE: val_string,
         }
 
     @classmethod