diff --git a/rdflib/plugins/parsers/jsonld.py b/rdflib/plugins/parsers/jsonld.py index 4eb05fcee0..a1dfc64fe8 100644 --- a/rdflib/plugins/parsers/jsonld.py +++ b/rdflib/plugins/parsers/jsonld.py @@ -525,7 +525,10 @@ def _to_object( return # type: ignore[return-value] return Literal(value, lang=lang) elif datatype: - return Literal(value, datatype=context.expand(datatype)) + datatype = context.expand(datatype) + if datatype is None: + datatype = XSD.string + return Literal(value, datatype=datatype) else: return Literal(value) else: diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 290e7d04bd..84631acbd1 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -1506,7 +1506,7 @@ def object( j, s = self.strconst(argstr, i, delim) - res.append(self._store.newLiteral(s)) # type: ignore[call-arg] # TODO FIXME + res.append(Literal(s)) return j else: return -1 @@ -1570,11 +1570,14 @@ def nodeOrLiteral(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: i = m.end() lang = argstr[j + 1 : i] j = i - if argstr[j : j + 2] == "^^": + res.append(Literal(s, lang=lang)) + elif argstr[j : j + 2] == "^^": res2: typing.List[Any] = [] j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] - res.append(self._store.newLiteral(s, dt, lang)) + res.append(Literal(s, datatype=dt)) + else: + res.append(Literal(s)) return j else: return -1 @@ -1852,11 +1855,11 @@ def newBlankNode( bn = BNode(str(arg[0]).split("#").pop().replace("_", "b")) return bn - def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Literal: - if dt: - return Literal(s, datatype=dt) - else: - return Literal(s, lang=lang) + # def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Literal: + # if dt: + # return Literal(s, datatype=dt) + # else: + # return Literal(s, lang=lang) def newList(self, n: typing.List[Any], f: Optional[Formula]) -> IdentifiedNode: nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 09656faff4..b5a033e528 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -24,6 +24,7 @@ from rdflib.compat import _string_escape_map, decodeUnicodeEscape from rdflib.exceptions import ParserError as ParseError +from rdflib.namespace import XSD from rdflib.parser import InputSource, Parser from rdflib.term import BNode as bNode from rdflib.term import Literal @@ -326,7 +327,7 @@ def literal(self) -> Union["te.Literal[False]", Literal]: dtype = uriquote(dtype) dtype = URI(dtype) else: - dtype = None + dtype = XSD.string if lang and dtype: raise ParseError("Can't have both a language and a datatype") lit = unquote(lit) diff --git a/rdflib/plugins/parsers/rdfxml.py b/rdflib/plugins/parsers/rdfxml.py index 03650fc98e..56cef5ec39 100644 --- a/rdflib/plugins/parsers/rdfxml.py +++ b/rdflib/plugins/parsers/rdfxml.py @@ -11,7 +11,7 @@ from rdflib.exceptions import Error, ParserError from rdflib.graph import Graph -from rdflib.namespace import RDF, is_ncname +from rdflib.namespace import RDF, XSD, is_ncname from rdflib.parser import InputSource, Parser from rdflib.plugins.parsers.RDFVOC import RDFVOC from rdflib.term import BNode, Identifier, Literal, URIRef @@ -361,7 +361,7 @@ def node_element_start( if not att.startswith(str(RDFNS)): predicate = absolutize(att) try: - object = Literal(atts[att], language) + object = Literal(atts[att], lang=language) except Error as e: # type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str" self.error(e.msg) # type: ignore[arg-type] @@ -377,7 +377,7 @@ def node_element_start( else: predicate = absolutize(att) try: - object = Literal(atts[att], language) + object = Literal(atts[att], lang=language) except Error as e: # type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str" self.error(e.msg) # type: ignore[arg-type] @@ -510,7 +510,11 @@ def property_element_start( if datatype is not None: # type error: Statement is unreachable language = None # type: ignore[unreachable] - o = Literal(atts[att], language, datatype) + o = Literal( + atts[att], + language, + XSD.string if datatype is None else datatype, + ) if object is None: object = BNode() diff --git a/rdflib/plugins/parsers/trix.py b/rdflib/plugins/parsers/trix.py index 8baaf5ca4d..e9105d7550 100644 --- a/rdflib/plugins/parsers/trix.py +++ b/rdflib/plugins/parsers/trix.py @@ -9,7 +9,7 @@ from rdflib.exceptions import ParserError from rdflib.graph import Graph -from rdflib.namespace import Namespace +from rdflib.namespace import XSD, Namespace from rdflib.parser import InputSource, Parser from rdflib.store import Store from rdflib.term import BNode, Identifier, Literal, URIRef @@ -185,7 +185,11 @@ def endElementNS(self, name: Tuple[Optional[str], str], qname) -> None: elif name[1] == "plainLiteral" or name[1] == "typedLiteral": if self.state == 4: self.triple += [ - Literal(self.chars, lang=self.lang, datatype=self.datatype) + Literal( + self.chars, + lang=self.lang, + datatype=XSD.string if self.datatype is None else self.datatype, + ) ] else: self.error( diff --git a/rdflib/plugins/sparql/aggregates.py b/rdflib/plugins/sparql/aggregates.py index d4a7d65924..5f42ab4cec 100644 --- a/rdflib/plugins/sparql/aggregates.py +++ b/rdflib/plugins/sparql/aggregates.py @@ -140,7 +140,9 @@ def update(self, row: FrozenBindings, aggregator: "Aggregator") -> None: pass def get_value(self) -> Literal: - return Literal(self.value, datatype=self.datatype) + return Literal( + self.value, datatype=XSD.integer if self.datatype is None else self.datatype + ) class Average(Accumulator): diff --git a/rdflib/plugins/sparql/evaluate.py b/rdflib/plugins/sparql/evaluate.py index 08dd02d57b..9f3a80e555 100644 --- a/rdflib/plugins/sparql/evaluate.py +++ b/rdflib/plugins/sparql/evaluate.py @@ -39,6 +39,7 @@ from pyparsing import ParseException from rdflib.graph import Graph +from rdflib.namespace import XSD from rdflib.plugins.sparql import CUSTOM_EVALS, parser from rdflib.plugins.sparql.aggregates import Aggregator from rdflib.plugins.sparql.evalutils import ( @@ -424,7 +425,7 @@ def _yieldBindingsFromServiceCallResult( elif var_type == "literal": res_dict[Variable(var)] = Literal( var_binding["value"], - datatype=var_binding.get("datatype"), + datatype=var_binding.get("datatype", XSD.string), lang=var_binding.get("xml:lang"), ) # This is here because of diff --git a/rdflib/plugins/sparql/operators.py b/rdflib/plugins/sparql/operators.py index 908b1d5c5a..eea4ff4712 100644 --- a/rdflib/plugins/sparql/operators.py +++ b/rdflib/plugins/sparql/operators.py @@ -290,18 +290,18 @@ def Builtin_CONCAT(expr: Expr, ctx) -> Literal: # dt/lang passed on only if they all match - dt = set(x.datatype for x in expr.arg if isinstance(x, Literal)) - # type error: Incompatible types in assignment (expression has type "Optional[str]", variable has type "Set[Optional[str]]") - dt = dt.pop() if len(dt) == 1 else None # type: ignore[assignment] - - lang = set(x.language for x in expr.arg if isinstance(x, Literal)) - # type error: error: Incompatible types in assignment (expression has type "Optional[str]", variable has type "Set[Optional[str]]") - lang = lang.pop() if len(lang) == 1 else None # type: ignore[assignment] - - # NOTE on type errors: this is because same variable is used for two incompatibel types - # type error: Argument "datatype" to "Literal" has incompatible type "Set[Any]"; expected "Optional[str]" [arg-type] - # type error: Argument "lang" to "Literal" has incompatible type "Set[Any]"; expected "Optional[str]" - return Literal("".join(string(x) for x in expr.arg), datatype=dt, lang=lang) # type: ignore[arg-type] + args = [x for x in expr.arg if isinstance(x, Literal)] + + if not args: + return Literal("") + + dt_set = set(x.datatype for x in args) + dt = dt_set.pop() + + lang_set = set(x.language for x in args) + lang = lang_set.pop() + + return Literal("".join(string(x) for x in expr.arg), datatype=dt, lang=lang) def _compatibleStrings(a: Literal, b: Literal) -> None: diff --git a/rdflib/plugins/sparql/parser.py b/rdflib/plugins/sparql/parser.py index 455377ed1b..699f4827b6 100644 --- a/rdflib/plugins/sparql/parser.py +++ b/rdflib/plugins/sparql/parser.py @@ -7,9 +7,7 @@ import re import sys -from typing import Any, BinaryIO, List -from typing import Optional as OptionalType -from typing import TextIO, Tuple, Union +from typing import Any, BinaryIO, List, TextIO, Tuple, Union from pyparsing import CaselessKeyword as Keyword # watch out :) from pyparsing import ( @@ -45,11 +43,11 @@ def neg(literal: rdflib.Literal) -> rdflib.Literal: return rdflib.Literal(-literal, datatype=literal.datatype) -def setLanguage(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal: +def setLanguage(terms: Tuple[Any, str]) -> rdflib.Literal: return rdflib.Literal(terms[0], lang=terms[1]) -def setDataType(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal: +def setDataType(terms: Tuple[Any, str]) -> rdflib.Literal: return rdflib.Literal(terms[0], datatype=terms[1]) diff --git a/rdflib/plugins/sparql/results/jsonresults.py b/rdflib/plugins/sparql/results/jsonresults.py index ecdb01247e..4f667f8390 100644 --- a/rdflib/plugins/sparql/results/jsonresults.py +++ b/rdflib/plugins/sparql/results/jsonresults.py @@ -3,6 +3,7 @@ import json from typing import IO, Any, Dict, Mapping, MutableSequence, Optional +from rdflib.namespace import XSD from rdflib.query import Result, ResultException, ResultParser, ResultSerializer from rdflib.term import BNode, Identifier, Literal, URIRef, Variable @@ -101,7 +102,9 @@ def parseJsonTerm(d: Dict[str, str]) -> Identifier: if t == "uri": return URIRef(d["value"]) elif t == "literal": - return Literal(d["value"], datatype=d.get("datatype"), lang=d.get("xml:lang")) + return Literal( + d["value"], datatype=d.get("datatype", XSD.string), lang=d.get("xml:lang") + ) elif t == "typed-literal": return Literal(d["value"], datatype=URIRef(d["datatype"])) elif t == "bnode": diff --git a/rdflib/plugins/sparql/results/xmlresults.py b/rdflib/plugins/sparql/results/xmlresults.py index 21ee3449d1..f56b4ed7e9 100644 --- a/rdflib/plugins/sparql/results/xmlresults.py +++ b/rdflib/plugins/sparql/results/xmlresults.py @@ -18,6 +18,7 @@ from xml.sax.saxutils import XMLGenerator from xml.sax.xmlreader import AttributesNSImpl +from rdflib.namespace import XSD from rdflib.query import Result, ResultException, ResultParser, ResultSerializer from rdflib.term import BNode, Identifier, Literal, URIRef, Variable @@ -131,7 +132,7 @@ def parseTerm(element: xml_etree.Element) -> Union[URIRef, Literal, BNode]: if tag == RESULTS_NS_ET + "literal": if text is None: text = "" - datatype = None + datatype = XSD.string lang = None if element.get("datatype", None): # type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str" diff --git a/rdflib/term.py b/rdflib/term.py index e68c34527e..71db3f9ae0 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -104,25 +104,25 @@ def _is_valid_uri(uri: str) -> bool: @overload -def _validate_language(lang: None) -> NoReturn: +def _validate_language_tag(tag: None) -> NoReturn: ... @overload -def _validate_language(lang: str) -> None: +def _validate_language_tag(tag: str) -> None: ... -def _validate_language(lang: Optional[str]) -> None: - if lang is None: +def _validate_language_tag(tag: Optional[str]) -> None: + if tag is None: raise ValueError("language can not be None") - if not lang: + if not tag: raise ValueError("language can not be empty string") - if not _lang_tag_regex.match(lang): + if not _lang_tag_regex.match(tag): raise ValueError( - f"language must match the pattern " + "language tag must match the pattern " "^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$ " - "but was {lang}" + f"but was {tag}" ) @@ -666,7 +666,7 @@ def __new__( cls, lexical_or_value: Any, *, - language: str, + lang: str, normalize: Optional[bool] = None, ) -> Literal: ... @@ -685,7 +685,7 @@ def __new__( cls, lexical_or_value: Any, *, - datatype: str = _XSD_STRING, + datatype: str, normalize: Optional[bool] = None, ) -> Literal: ... @@ -694,7 +694,7 @@ def __new__( def __new__( cls, lexical_or_value: str, - language: Optional[str], + lang: Optional[str], datatype: str, normalize: Optional[bool] = None, ) -> Literal: @@ -703,7 +703,7 @@ def __new__( def __new__( cls, lexical_or_value: Any, - language: Optional[str] = None, + lang: Optional[str] = None, datatype: Optional[str] = None, normalize: Optional[bool] = None, ) -> "Literal": @@ -711,9 +711,9 @@ def __new__( if datatype is not None: datatype = URIRef(datatype) - if language is not None or datatype == _RDF_LANGSTRING: - _validate_language(language) - if datatype == None: + if lang is not None or datatype == _RDF_LANGSTRING: + _validate_language_tag(lang) + if datatype is None: datatype = _RDF_LANGSTRING value = None @@ -721,7 +721,7 @@ def __new__( if isinstance(lexical_or_value, Literal): # create from another Literal instance - lang = language if language is not None else lexical_or_value.language + lang = lang if lang is not None else lexical_or_value.language if datatype is not None: # override datatype value = _castLexicalToPython(lexical_or_value, datatype) @@ -729,7 +729,7 @@ def __new__( datatype = lexical_or_value.datatype value = lexical_or_value.value - elif isinstance(lexical_or_value, str) or isinstance(lexical_or_value, bytes): + elif isinstance(lexical_or_value, (str, bytes, bytearray)): if datatype is None: datatype = _XSD_STRING @@ -753,7 +753,7 @@ def __new__( if _value is not None: lexical_or_value = _value - if lang is not None and datatype != ns.RDF.langString: + if lang is not None and datatype != _RDF_LANGSTRING: raise ValueError(f"Can't have a language tag with datatype {datatype}!") if isinstance(lexical_or_value, bytes): @@ -771,7 +771,7 @@ def __new__( def _make( cls, lexical: Any, - language: Optional[str], + lang: Optional[str], datatype: URIRef, value: Any, ill_typed: Optional[bool], @@ -781,7 +781,7 @@ def _make( except UnicodeDecodeError: inst = str.__new__(cls, lexical, "utf-8") - inst._language = language + inst._language = lang inst._datatype = datatype inst._value = value inst._ill_typed = ill_typed @@ -803,7 +803,7 @@ def normalize(self) -> "Literal": """ if self.value is not None: - return Literal(self.value, datatype=self.datatype, language=self.language) + return Literal(self.value, datatype=self.datatype, lang=self.language) else: return self diff --git a/test/test_graph/test_variants.py b/test/test_graph/test_variants.py index 09b2a156dc..793076d8a0 100644 --- a/test/test_graph/test_variants.py +++ b/test/test_graph/test_variants.py @@ -28,7 +28,6 @@ import rdflib.compare import rdflib.util from rdflib.graph import Dataset -from rdflib.namespace import XSD from rdflib.term import URIRef from rdflib.util import guess_format @@ -234,7 +233,7 @@ def test_variants(graph_variant: GraphVariants) -> None: # Stripping data types as different parsers (e.g. hext) have different # opinions of when a bare string is of datatype XSD.string or not. # Probably something that needs more investigation. - GraphHelper.strip_literal_datatypes(graph, {XSD.string}) + # GraphHelper.strip_literal_datatypes(graph, {XSD.string}) graph_variant.asserts.check(first_graph, graph) if first_graph is None: first_graph = graph diff --git a/test/test_literal/test_literal.py b/test/test_literal/test_literal.py index 074abe1e67..e77f93fb78 100644 --- a/test/test_literal/test_literal.py +++ b/test/test_literal/test_literal.py @@ -136,7 +136,7 @@ def test_cant_pass_invalid_lang( ("2147483648", XSD.integer, False), ("valid ASCII", XSD.string, False), pytest.param("هذا رجل ثلج⛄", XSD.string, False, id="snowman-ar"), - ("More ASCII", None, None), + ("More ASCII", XSD.string, None), ("Not a valid time", XSD.time, True), ("Not a valid date", XSD.date, True), ("7264666c6962", XSD.hexBinary, False), @@ -149,7 +149,7 @@ def test_cant_pass_invalid_lang( def test_ill_typed_literals( self, lexical: Union[bytes, str], - datatype: Optional[URIRef], + datatype: URIRef, is_ill_typed: Optional[bool], ) -> None: """ @@ -840,7 +840,6 @@ def unlexify(s: str) -> str: normal_l = Literal(s) assert str(normal_l) == s assert normal_l.toPython() == s - assert normal_l.datatype is None specific_l = Literal("--%s--" % s, datatype=datatype) assert str(specific_l) == lexify(s) @@ -998,8 +997,8 @@ def unlexify(s: str) -> str: ), (lambda: Literal(Literal("1")), Literal("1")), ( - lambda: Literal(Literal("blue sky", "en")), - Literal("blue sky", "en"), + lambda: Literal(Literal("blue sky", lang="en")), + Literal("blue sky", lang="en"), ), ], ) diff --git a/test/utils/__init__.py b/test/utils/__init__.py index dc27251a3b..44e2687f6f 100644 --- a/test/utils/__init__.py +++ b/test/utils/__init__.py @@ -35,7 +35,7 @@ from rdflib import BNode, ConjunctiveGraph, Graph from rdflib.graph import Dataset from rdflib.plugin import Plugin -from rdflib.term import Identifier, Literal, Node, URIRef +from rdflib.term import Identifier, Node, URIRef PluginT = TypeVar("PluginT") @@ -368,19 +368,6 @@ def get_contexts(cgraph: ConjunctiveGraph) -> Dict[URIRef, Graph]: for id, lhs_context in lhs_contexts.items(): cls.assert_isomorphic(lhs_context, rhs_contexts[id], message) - @classmethod - def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None: - """ - Strips datatypes in the provided set from literals in the graph. - """ - for object in graph.objects(): - if not isinstance(object, Literal): - continue - if object.datatype is None: - continue - if object.datatype in datatypes: - object._datatype = None - def eq_(lhs, rhs, msg=None): """