diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index 10b99f76..d4fdfcb6 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -7,6 +7,7 @@ Next release ============ - Bug fix for writing :class:`.VersionableArtefact` to SDMX-ML 2.1: :class:`KeyError` was raised if :attr:`.VersionableArtefact.version` was an instance of :class:`.Version` (:pull:`198`). +- Bug fix for reading data from structure-specific SDMX-ML: :class:`.XMLParseError` / :class:`NotImplementedError` was raised if reading 2 messages in sequence with different XML namespaces defined (:pull:`200`, thanks :gh-user:`mephinet` for :issue:`199`). v2.18.0 (2024-10-15) ==================== diff --git a/pyproject.toml b/pyproject.toml index 5a41d0ed..c18ff692 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,10 +95,11 @@ select = ["C9", "E", "F", "I", "W"] ignore = ["E501", "W191"] # Exceptions: # - .client._handle_get_kwargs: 12 -# - .reader.xml.v21.read_message: 15 +# - .reader.xml.v21._component_end: 12 +# - .testing.generate_endpoint_tests: 11 # - .writer.pandas._maybe_convert_datetime: 23 # - .writer.pandas.write_dataset: 12 -mccabe.max-complexity = 11 +mccabe.max-complexity = 10 [tool.setuptools.packages] find = {} diff --git a/sdmx/client.py b/sdmx/client.py index 6d037e34..abc2a326 100644 --- a/sdmx/client.py +++ b/sdmx/client.py @@ -244,7 +244,7 @@ def _request_from_url(self, kwargs): return requests.Request("get", url, params=parameters, headers=headers) - def _handle_get_kwargs(self, kwargs): # noqa: C901 TODO reduce complexity 12 → ≤11 + def _handle_get_kwargs(self, kwargs): # noqa: C901 TODO reduce complexity 12 → ≤10 if kwargs.pop("validate", None) is not None: warn("validate= keyword argument to Client.get()", DeprecationWarning) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index b92b12f2..0a6a4115 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -86,6 +86,7 @@ # To be formatted "com": "{}/common", "md": "{}/metadata/generic", + "md_ss": "{}/metadata/structurespecific", "data": "{}/data/structurespecific", "str": "{}/structure", "mes": "{}/message", @@ -273,6 +274,8 @@ def install_schemas( class XMLFormat: + """Information about an SDMX-ML format.""" + NS: Mapping[str, Optional[str]] _class_tag: list @@ -306,29 +309,32 @@ def ns_prefix(self, url) -> str: return prefix raise ValueError(url) + _NS_PATTERN = re.compile(r"(\{(?P.*)\}|(?P.*):)?(?P.*)") + @lru_cache() - def qname(self, ns_or_name, name=None) -> QName: + def qname(self, ns_or_name: str, name: Optional[str] = None) -> QName: """Return a fully-qualified tag `name` in namespace `ns`.""" if isinstance(ns_or_name, QName): # Already a QName; do nothing return ns_or_name - else: - if name is None: - match = re.fullmatch( - r"(\{(?P.*)\}|(?P.*):)?(?P.*)", ns_or_name - ) - assert match - name = match.group("name") - if ns_key := match.group("ns_key"): - ns = self.NS[ns_key] - elif ns := match.group("ns_full"): - pass - else: - ns = None + + if name is None: + # `ns_or_name` contains the local name ("tag") and possibly a namespace + # prefix ("ns:tag") or full namespace name ("{foo}tag") + match = self._NS_PATTERN.fullmatch(ns_or_name) + assert match + name = match.group("localname") + if prefix := match.group("ns_prefix"): + ns = self.NS[prefix] + elif ns := match.group("ns"): + pass else: - ns = self.NS[ns_or_name] + ns = None # Tag without namespace + else: + # `ns_or_name` is the namespace prefix; `name` is the local name + ns = self.NS[ns_or_name] - return QName(ns, name) + return QName(ns, name) @lru_cache() def class_for_tag(self, tag) -> Optional[type]: diff --git a/sdmx/reader/json.py b/sdmx/reader/json.py index 109835f0..e07f4b1c 100644 --- a/sdmx/reader/json.py +++ b/sdmx/reader/json.py @@ -37,7 +37,7 @@ class Reader(BaseReader): def detect(cls, content): return content.startswith(b"{") - def read_message(self, source, structure=None, **kwargs): # noqa: C901 TODO reduce complexity 15 → ≤11 + def read_message(self, source, structure=None, **kwargs): # noqa: C901 TODO reduce complexity 15 → ≤10 # Initialize message instance msg = DataMessage() diff --git a/sdmx/reader/xml/common.py b/sdmx/reader/xml/common.py index 53195f5e..55464611 100644 --- a/sdmx/reader/xml/common.py +++ b/sdmx/reader/xml/common.py @@ -129,9 +129,9 @@ class XMLEventReader(BaseReader): #: :class:`.BaseReference` subclass used by this reader. Reference: ClassVar[type[BaseReference]] - # Mapping from (QName, ["start", "end"]) to a function that parses the element/event - # or else None - parser: ClassVar[Mapping[tuple[QName, str], Callable]] + #: Mapping from (QName, ["start", "end"]) to a function that parses the + #: element/event or else None (no parsing). + parser: ClassVar[dict[tuple[QName, str], Callable]] # One-way counter for use in stacks _count: Iterator[int] @@ -151,7 +151,7 @@ def __init__(self): # BaseReader methods - def read_message( # noqa: C901 TODO reduce complexity 12 → ≤11 + def read_message( self, source, structure=None, @@ -196,20 +196,14 @@ def read_message( # noqa: C901 TODO reduce complexity 12 → ≤11 # Don't know what to do for this (element, event) raise NotImplementedError(element.tag, event) from None - try: - # Parse the element - result = func(self, element) - except TypeError: - if func is None: # Explicitly no parser for this (element, event) - continue # Skip - else: # pragma: no cover - raise - else: - # Store the result - self.push(result) + if func is None: + continue # Explicitly no parser for this (element, event) → skip + + result = func(self, element) # Parse the element + self.push(result) # Store the result - if event == "end": - element.clear() # Free memory + if event == "end": + element.clear() # Free memory except Exception as exc: # Parsing failed; display some diagnostic information @@ -368,10 +362,6 @@ def unstash(self): self.stack[s].update(values) # Delegate to version-specific module - @classmethod - def NS(cls): - return cls.format.NS - @classmethod def class_for_tag(cls, tag: str) -> type: return cls.format.class_for_tag(tag) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index ee23e45d..026d7a1b 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -9,7 +9,7 @@ import logging import re from copy import copy -from itertools import chain +from itertools import chain, filterfalse from sys import maxsize from typing import Any, MutableMapping, Optional, cast @@ -34,7 +34,6 @@ ) log = logging.getLogger(__name__) -log.setLevel(logging.DEBUG) class _NoText: @@ -151,14 +150,11 @@ def _message(reader: Reader, elem): reader.push("DataSetClass", model.get_class(f"{QName(elem).localname}Set")) # Handle namespaces mapped on `elem` but not part of the standard set - for key, value in filter( - lambda kv: kv[1] not in set(reader.NS().values()), elem.nsmap.items() - ): - # Register the namespace - reader.NS().update({key: value}) - # Use _ds_start() and _ds_end() to handle <{key}:DataSet> elements - reader.start(f"{key}:DataSet", only=False)(_ds_start) - reader.end(f"{key}:DataSet", only=False)(_ds_end) + existing_ns = set(reader.format.NS.values()) + for namespace in filterfalse(existing_ns.__contains__, elem.nsmap.values()): + # Use _ds_start() and _ds_end() to handle <{namespace}DataSet> elements + reader.parser[QName(namespace, "DataSet"), "start"] = _ds_start + reader.parser[QName(namespace, "DataSet"), "end"] = _ds_end # Instantiate the message object return reader.class_for_tag(elem.tag)() @@ -602,7 +598,7 @@ def _maybe_unbounded(value: str) -> Optional[int]: return None if value == "unbounded" else int(value) -# TODO Reduce complexity from 12 → 11, by adding separate parsers for certain COMPONENTs +# TODO Reduce complexity from 12 → ≤10, by adding separate parsers for some COMPONENTs @end(COMPONENT, only=False) @possible_reference(unstash=True) def _component_end(reader: Reader, elem): # noqa: C901 @@ -1160,7 +1156,7 @@ def _obs_ss(reader, elem): except KeyError: pass else: - elem.attrib[dim_at_obs.id] = reader.qname(tmp).localname + _, elem.attrib[dim_at_obs.id] = tmp.split(":", maxsplit=2) if ss_without_structure and dim_at_obs is not model.AllDimensions: # Create the observation key @@ -1241,8 +1237,10 @@ def _mds_start(reader, elem): mds = reader.class_for_tag(elem.tag)() # Retrieve the (message-local) ID referencing a data structure definition - id = elem.attrib.get("structureRef", None) or elem.attrib.get( - reader.qname("metadata:structureRef"), None + id = ( + elem.attrib.get("structureRef", None) + or elem.attrib.get(reader.qname("md:structureRef"), None) + or elem.attrib.get(reader.qname("md_ss:structureRef"), None) ) # Get a reference to the MSD that structures the data set diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 6ad4b152..12ac1c9e 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -134,7 +134,7 @@ def parametrize_specimens(metafunc): } -def generate_endpoint_tests(metafunc): +def generate_endpoint_tests(metafunc): # noqa: C901 TODO reduce complexity 11 → ≤10 """pytest hook for parametrizing tests that need an "endpoint" fixture. This function relies on the :class:`.DataSourceTest` base class defined in diff --git a/sdmx/tests/reader/test_reader_xml_v21.py b/sdmx/tests/reader/test_reader_xml_v21.py index 8fdf7805..c69d4296 100644 --- a/sdmx/tests/reader/test_reader_xml_v21.py +++ b/sdmx/tests/reader/test_reader_xml_v21.py @@ -201,6 +201,41 @@ def test_gh_164(specimen): assert isinstance(da.related_to, v21.NoSpecifiedRelationship) +def test_gh_199(): + """Test of https://github.com/khaeru/sdmx/issues/199.""" + import sdmx.format.xml.v21 + + # Template for DSD URN + URN = "urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FOO:BAR({})" + + # Template for SDMX-ML data message + CONTENT = """ + + + ... + +""" + + # Construct a URN and message; store as BytesIO + urn1 = URN.format("1") + dsd1 = v21.DataStructureDefinition(urn=urn1) + f1 = BytesIO(CONTENT.format(urn1).encode()) + + # Construct a *different* URN and message with this other URN mapped to the "u:" XML + # namespace prefix + urn2 = URN.format("2") + dsd2 = v21.DataStructureDefinition(urn=urn2) + f2 = BytesIO(CONTENT.format(urn2).encode()) + + # First message can be parsed + sdmx.read_sdmx(f1, structure=dsd1) + + # #199: raises XMLParseError/NotImplementedError + sdmx.read_sdmx(f2, structure=dsd2) + + # Each entry is a tuple with 2 elements: # 1. an instance of lxml.etree.Element to be parsed. # 2. Either: diff --git a/sdmx/writer/pandas.py b/sdmx/writer/pandas.py index 9f341b77..f5dae81a 100644 --- a/sdmx/writer/pandas.py +++ b/sdmx/writer/pandas.py @@ -209,7 +209,7 @@ def _rp(obj: model.RangePeriod, **kwargs): @writer -def write_dataset( # noqa: C901 TODO reduce complexity 12 → ≤11 +def write_dataset( # noqa: C901 TODO reduce complexity 12 → ≤10 obj: model.DataSet, attributes="", dtype=np.float64, @@ -377,7 +377,7 @@ def _dataset_compat(df, datetime, kwargs): return df, datetime, kwargs -def _maybe_convert_datetime(df, arg, obj, dsd=None): # noqa: C901 TODO reduce complexity 23 → ≤11 +def _maybe_convert_datetime(df, arg, obj, dsd=None): # noqa: C901 TODO reduce complexity 23 → ≤10 """Helper for :meth:`.write_dataset` to handle datetime indices. Parameters