From 48837c32253f98017f46595f57248604ea2cb7d6 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 16:05:38 -0500 Subject: [PATCH 001/103] BaseDataflow.structure inherits is_external_reference --- sdmx/model/common.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 3447a3f47..782c96a64 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -1506,10 +1506,20 @@ def compare(self, other, strict=True): ) +@dataclass(repr=False) class BaseDataflow(StructureUsage, ConstrainableArtefact): """Common features of SDMX 2.1 DataflowDefinition and 3.0 Dataflow.""" - structure: BaseDataStructureDefinition + structure: BaseDataStructureDefinition = field( + default_factory=BaseDataStructureDefinition + ) + + def __post_init__(self): + super().__post_init__() + + # Factory default `structure` inherits is_external_reference from the data flow + if self.structure.is_external_reference is None: + self.structure.is_external_reference = self.is_external_reference def iter_keys( self, constraint: Optional[BaseConstraint] = None, dims: List[str] = [] From d82366c94bee608349e04f3b9921fba0f57d9cbc Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 16:06:10 -0500 Subject: [PATCH 002/103] Mark 4 sources as not supporting actualconstraint --- sdmx/sources.json | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sdmx/sources.json b/sdmx/sources.json index 1eb2188e9..35970ab09 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -69,7 +69,10 @@ { "id": "COMP", "name": "European Commission Directorate General for Competition", - "url": "https://webgate.ec.europa.eu/comp/redisstat/api/dissemination/sdmx/2.1" + "url": "https://webgate.ec.europa.eu/comp/redisstat/api/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false + } }, { "id": "ECB", @@ -90,7 +93,10 @@ { "id": "EMPL", "name": "European Commission Directorate General for Employment, Social Affairs, and Inclusion", - "url": "https://webgate.ec.europa.eu/empl/redisstat/api/dissemination/sdmx/2.1" + "url": "https://webgate.ec.europa.eu/empl/redisstat/api/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false + } }, { "id": "ESTAT", @@ -115,12 +121,18 @@ { "id": "ESTAT_COMEXT", "name": "Eurostat (Comext and Prodcom datasets)", - "url": "https://ec.europa.eu/eurostat/api/comext/dissemination/sdmx/2.1" + "url": "https://ec.europa.eu/eurostat/api/comext/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false + } }, { "id": "GROW", "name": "European Commission Directorate General for Internal Market, Industry, Entrepreneurship and SMEs", - "url": "https://webgate.ec.europa.eu/grow/redisstat/api/dissemination/sdmx/2.1" + "url": "https://webgate.ec.europa.eu/grow/redisstat/api/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false + } }, { "id": "ILO", From 5abf000cbcc49f5e4d1272cf7d8dc532d4980c9e Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 16:07:26 -0500 Subject: [PATCH 003/103] Add args for tests of actualconstraint from 4 sources --- sdmx/tests/test_sources.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 49ff2fd8f..da31a35a3 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -156,6 +156,10 @@ class TestBBK(DataSourceTest): class TestBIS(DataSourceTest): source_id = "BIS" + endpoint_args = { + "actualconstraint": dict(resource_id="CBP_D_24D"), + } + class TestECB(DataSourceTest): source_id = "ECB" @@ -363,6 +367,9 @@ class TestISTAT(DataSourceTest): "organisationscheme": HTTPError, # 400 "structure": NotImplementedError, # 501 } + endpoint_args = { + "actualconstraint": dict(resource_id="CONS_92_143"), + } @pytest.mark.network def test_gh_75(self, client): @@ -480,10 +487,11 @@ class TestNBB(DataSourceTest): class TestOECD(DataSourceTest): source_id = "OECD" endpoint_args = { + "actualconstraint": dict(resource_id="CR_A_DSD_DEBT_TRANS_COLL@DF_MICRO"), "data": dict( resource_id="DSD_MSTI@DF_MSTI", headers={"Accept-Encoding": "compress, gzip"}, - ) + ), } @@ -519,11 +527,12 @@ class TestSPC(DataSourceTest): "structure": NotImplementedError, # 501 } endpoint_args = { + "actualconstraint": dict(resource_id="CR_A_DF_ADBKI"), "data": dict( resource_id="DF_CPI", key="A.CK+FJ..", params=dict(startPeriod=2010, endPeriod=2015), - ) + ), } From 2202a1686031e2c619fa6daf2eaddb66b7eaa344 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 16:08:42 -0500 Subject: [PATCH 004/103] Improve .reader.xml.v21._ms_component() Use .getdefault() to create the Component if the DSD is an external reference. --- sdmx/reader/xml/v21.py | 49 ++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 27f1aef8c..108cb3897 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1294,31 +1294,44 @@ def _tr(reader, elem): def _ms_component(reader, elem, kind): """Identify the Component for a ValueSelection.""" - try: - # Navigate from the current ContentConstraint to a ConstrainableArtefact - cc_content = reader.stack[reader.Reference] - assert len(cc_content) == 1, (cc_content, reader.stack, elem.attrib) - obj = reader.resolve(next(iter(cc_content.values()))) - - if isinstance(obj, model.DataflowDefinition): - # The constrained DFD has a corresponding DSD, which has a Dimension- or - # AttributeDescriptor - cl = getattr(obj.structure, kind[0]) - elif isinstance(obj, model.DataStructureDefinition): - # The DSD is constrained directly - cl = getattr(obj, kind[0]) - else: - log.warning(f"Not implemented: constraints attached to {type(obj)}") - cl = None + # Navigate from the current ContentConstraint to a ConstrainableArtefact + cc_content = reader.stack[reader.Reference] + assert len(cc_content) == 1, (cc_content, reader.stack, elem.attrib) + obj = reader.resolve(next(iter(cc_content.values()))) + + if isinstance(obj, model.DataflowDefinition): + # The constrained DFD has a corresponding DSD, which has a Dimension- or + # AttributeDescriptor + dsd = obj.structure + elif isinstance(obj, model.DataStructureDefinition): + # The DSD is constrained directly + dsd = obj + else: + log.warning(f"Not implemented: constraints attached to {type(obj)}") + dsd = None - # Get the Component - return cl, cl.get(elem.attrib["id"]) + try: + # Get the component list + cl = getattr(dsd, kind[0]) except AttributeError: # Failed because the ContentConstraint is attached to something, e.g. # DataProvider, that does not provide an association to a DSD. Try to get a # Component from the current scope with matching ID. return None, reader.get_single(kind[1], id=elem.attrib["id"], subclass=True) + # Get the Component + try: + c = cl.get(elem.attrib["id"]) + except KeyError: + if dsd.is_external_reference: + # No component with the given ID exists, but the DSD is an external + # reference → create the component automatically + c = cl.getdefault(elem.attrib["id"]) + else: + raise + + return cl, c + def _ms_agency_id(elem): """Return the MemberSelection → CubeRegion → ContentConstraint → agencyID.""" From 8767018098735fb4b4290b8072720c691330a2b2 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 16:09:48 -0500 Subject: [PATCH 005/103] Pop TimeRangeValue subclasses when parsing --- sdmx/reader/xml/v21.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 108cb3897..e20587992 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1372,7 +1372,7 @@ def _ms(reader, elem): # Convert to SelectionValue mvs = reader.pop_all("Value") - trv = reader.pop_all(model.TimeRangeValue) + trv = reader.pop_all(model.TimeRangeValue, subclass=True) if mvs: arg["values"] = list(map(lambda v: model.MemberValue(value=v), mvs)) elif trv: From 224edc644ccc2b7a660d7fd787659ccc097c4dea Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 16:10:22 -0500 Subject: [PATCH 006/103] Remove blanket Xfail of actualconstraint queries --- sdmx/tests/test_sources.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index da31a35a3..0ec97ddf3 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -34,7 +34,6 @@ class DataSourceTest: #: Failures affecting **all** data sources, internal to :mod:`sdmx`. xfail_common = { - "actualconstraint": (XMLParseError, NI), # KeyError "allowedconstraint": (XMLParseError, NI), # KeyError "contentconstraint": (XMLParseError, NI), # KeyError "hierarchicalcodelist": (XMLParseError, NI), # From 67863dadce0f366132424d72185dfbb4b68f8b7c Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 16:11:13 -0500 Subject: [PATCH 007/103] Handle RangePeriod in .writer.pandas --- sdmx/writer/pandas.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sdmx/writer/pandas.py b/sdmx/writer/pandas.py index d6d3bc018..08dac0b70 100644 --- a/sdmx/writer/pandas.py +++ b/sdmx/writer/pandas.py @@ -1,5 +1,5 @@ from itertools import chain -from typing import Any, Dict, Hashable, Set, Union, cast +from typing import Any, Dict, Hashable, Set, Union import numpy as np import pandas as pd @@ -196,15 +196,19 @@ def _cc(obj: model.ContentConstraint, **kwargs): def _cr(obj: model.CubeRegion, **kwargs): """Convert :class:`.CubeRegion`.""" result: DictLike[str, pd.Series] = DictLike() - for dim, memberselection in obj.member.items(): + for dim, ms in obj.member.items(): result[dim.id] = pd.Series( - # cast(): as of PR#30, only MemberValue is supported here - [cast(model.MemberValue, mv).value for mv in memberselection.values], - name=dim.id, + [writer.recurse(sv, **kwargs) for sv in ms.values], name=dim.id ) return result +@writer +def _rp(obj: model.RangePeriod, **kwargs): + """Convert :class:`.RangePeriod`.""" + return f"{obj.start.period}–{obj.end.period}" + + @writer def write_dataset( obj: model.DataSet, From 262a42bfafce95d83d67def64650d85dd4d9fc11 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 17:39:10 -0500 Subject: [PATCH 008/103] Ensure ProvisionAgreement is hashable --- sdmx/model/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 782c96a64..888066832 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -2285,6 +2285,7 @@ class RESTDatasource(QueryDatasource): @dataclass +@MaintainableArtefact._preserve("hash") class ProvisionAgreement(MaintainableArtefact, ConstrainableArtefact): #: structure_usage: Optional[StructureUsage] = None From 6d6af218442e14eb5a1cefe0baa09ff36dca6b99 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 17:39:49 -0500 Subject: [PATCH 009/103] Parse as reference --- sdmx/reader/xml/v21.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index e20587992..153b1f9d4 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1738,6 +1738,12 @@ def _msd(reader: Reader, elem): # pragma: no cover @end("str:ProvisionAgreement") def _pa(reader, elem): + try: + # in is a reference + return reader.reference(elem) + except NotReference: + pass + return reader.maintainable( model.ProvisionAgreement, elem, From 6944b3fcde9d2f48c4299810983e09625d75b32d Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 17:40:27 -0500 Subject: [PATCH 010/103] Handle != 1 constrained objects in _ms_component() --- sdmx/reader/xml/v21.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 153b1f9d4..bf172a601 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1296,7 +1296,11 @@ def _ms_component(reader, elem, kind): """Identify the Component for a ValueSelection.""" # Navigate from the current ContentConstraint to a ConstrainableArtefact cc_content = reader.stack[reader.Reference] - assert len(cc_content) == 1, (cc_content, reader.stack, elem.attrib) + if len(cc_content) > 1: + log.info( + f"Resolve reference to <{kind[1].__name__} {elem.attrib['id']}> using first" + f" of {len(cc_content)} constrained objects" + ) obj = reader.resolve(next(iter(cc_content.values()))) if isinstance(obj, model.DataflowDefinition): From 8fb3c611c46da32983d29986ca8155f6d66fe02d Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 17:42:09 -0500 Subject: [PATCH 011/103] Use str(response) not repr() in Message.__repr__() --- sdmx/message.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sdmx/message.py b/sdmx/message.py index 92d2b2658..5142af0bc 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field, fields from datetime import datetime from operator import attrgetter -from typing import Any, List, Optional, Text, Union, get_args +from typing import TYPE_CHECKING, List, Optional, Text, Union, get_args from sdmx import model from sdmx.dictlike import DictLike, DictLikeDescriptor, summarize_dictlike @@ -22,19 +22,29 @@ ) from sdmx.util import compare, direct_fields +if TYPE_CHECKING: + import requests + log = logging.getLogger(__name__) def _summarize(obj, include: Optional[List[str]] = None): """Helper method for __repr__ on Header and Message (sub)classes.""" + import requests + include = include or list(map(attrgetter("name"), fields(obj))) for name in include: attr = getattr(obj, name) if attr is None: continue elif isinstance(attr, datetime): - attr = attr.isoformat() - yield f"{name}: {repr(attr)}" + s_attr = repr(attr.isoformat()) + elif isinstance(attr, requests.Response): + s_attr = str(attr) + else: + s_attr = repr(attr) + + yield f"{name}: {s_attr}" @dataclass @@ -137,7 +147,7 @@ class Message: footer: Optional[Footer] = None #: :class:`requests.Response` instance for the response to the HTTP request that #: returned the Message. This is not part of the SDMX standard. - response: Optional[Any] = None + response: Optional["requests.Response"] = None def __str__(self): return repr(self) From aae7736ee37c25e717ac80d97d82aa00881f006c Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 17:43:01 -0500 Subject: [PATCH 012/103] Update sources.json per {allowed,content}constraint --- sdmx/sources.json | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sdmx/sources.json b/sdmx/sources.json index 35970ab09..ba0290150 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -71,7 +71,9 @@ "name": "European Commission Directorate General for Competition", "url": "https://webgate.ec.europa.eu/comp/redisstat/api/dissemination/sdmx/2.1", "supports": { - "actualconstraint": false + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false } }, { @@ -95,7 +97,9 @@ "name": "European Commission Directorate General for Employment, Social Affairs, and Inclusion", "url": "https://webgate.ec.europa.eu/empl/redisstat/api/dissemination/sdmx/2.1", "supports": { - "actualconstraint": false + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false } }, { @@ -123,7 +127,9 @@ "name": "Eurostat (Comext and Prodcom datasets)", "url": "https://ec.europa.eu/eurostat/api/comext/dissemination/sdmx/2.1", "supports": { - "actualconstraint": false + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false } }, { @@ -131,7 +137,9 @@ "name": "European Commission Directorate General for Internal Market, Industry, Entrepreneurship and SMEs", "url": "https://webgate.ec.europa.eu/grow/redisstat/api/dissemination/sdmx/2.1", "supports": { - "actualconstraint": false + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false } }, { @@ -160,7 +168,6 @@ "name": "International Monetary Fund", "supports": { "actualconstraint": false, - "allowedconstraint": false, "hierarchicalcodelist": false, "metadataflow": false, "metadatastructure": false, @@ -300,7 +307,6 @@ "name": "SDMX Global Registry", "supports": { "actualconstraint": false, - "allowedconstraint": false, "metadataflow": false } }, From 46262db2a850db80d8acc19645c343511061bd85 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 17:43:39 -0500 Subject: [PATCH 013/103] Remove blanket Xfail of {allowed,content}constraint queries --- sdmx/tests/test_sources.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 0ec97ddf3..fec5bbf3c 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -34,8 +34,6 @@ class DataSourceTest: #: Failures affecting **all** data sources, internal to :mod:`sdmx`. xfail_common = { - "allowedconstraint": (XMLParseError, NI), # KeyError - "contentconstraint": (XMLParseError, NI), # KeyError "hierarchicalcodelist": (XMLParseError, NI), # "metadatastructure": (XMLParseError, NI), # not parsed "structure": (XMLParseError, NI), # not parsed From a221f3a273716484ce1aa9e6a936713623397cac Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 17:58:27 -0500 Subject: [PATCH 014/103] Update sources.json per structureset support --- sdmx/sources.json | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sdmx/sources.json b/sdmx/sources.json index ba0290150..f9dfd08fe 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -73,7 +73,8 @@ "supports": { "actualconstraint": false, "allowedconstraint": false, - "contentconstraint": false + "contentconstraint": false, + "structureset": false } }, { @@ -99,7 +100,8 @@ "supports": { "actualconstraint": false, "allowedconstraint": false, - "contentconstraint": false + "contentconstraint": false, + "structureset": false } }, { @@ -129,7 +131,8 @@ "supports": { "actualconstraint": false, "allowedconstraint": false, - "contentconstraint": false + "contentconstraint": false, + "structureset": false } }, { @@ -139,7 +142,8 @@ "supports": { "actualconstraint": false, "allowedconstraint": false, - "contentconstraint": false + "contentconstraint": false, + "structureset": false } }, { @@ -307,7 +311,8 @@ "name": "SDMX Global Registry", "supports": { "actualconstraint": false, - "metadataflow": false + "metadataflow": false, + "structureset": false } }, { From 34f8fed49fd9500dcc1592f6ee6b9780f3a76e56 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 20:57:14 -0500 Subject: [PATCH 015/103] Add StructureMessage.structureset --- sdmx/message.py | 2 ++ sdmx/rest.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sdmx/message.py b/sdmx/message.py index 5142af0bc..9a1a7bce6 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -204,6 +204,8 @@ class StructureMessage(Message): structure: DictLikeDescriptor[ str, model.BaseDataStructureDefinition ] = DictLikeDescriptor() + #: Collection of :class:`.StructureSet`. + structureset: DictLikeDescriptor[str, model.StructureSet] = DictLikeDescriptor() #: Collection of :class:`.OrganisationScheme`. organisation_scheme: DictLikeDescriptor[ str, model.OrganisationScheme diff --git a/sdmx/rest.py b/sdmx/rest.py index 62440e6d7..bc58cf501 100644 --- a/sdmx/rest.py +++ b/sdmx/rest.py @@ -130,7 +130,7 @@ def describe(cls): @dataclass class URL: - """Utility class to build SDMX REST URLs. + """Utility class to build SDMX 2.1 REST web service URLs. See also -------- From c95eec1831ec9b6c75ecae830b7c5e31b8528e8a Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:12:30 -0500 Subject: [PATCH 016/103] Bump mypy to 1.8.0; ruff to 0.1.9 Gitignore .ruff_cache --- .gitignore | 1 + .pre-commit-config.yaml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index bcb64ea3d..b4355d3ab 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ __pycache__ .coverage* .mypy_cache .pytest_cache +.ruff_cache build coverage.xml dist diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f04ac048..3a6f462d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.6.1 + rev: v1.8.0 hooks: - id: mypy additional_dependencies: @@ -15,7 +15,7 @@ repos: - types-requests args: [] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.2 + rev: v0.1.9 hooks: - id: ruff - id: ruff-format From f49a5751a248dcf06811c03f244aacb240d022cd Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:15:21 -0500 Subject: [PATCH 017/103] Add StructureSet, ItemSchemeMap, ItemAssociation IM classes --- sdmx/model/common.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 888066832..e624b19b3 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -2064,6 +2064,44 @@ class BaseMetadataflow(StructureUsage, ConstrainableArtefact): """ABC for SDMX 2.1 MetadataflowDefinition and SDMX 3.0 Metadataflow.""" +# SDMX 2.1 §9: Structure Set and Mappings + + +@dataclass +class ItemAssociation(AnnotableArtefact, Generic[IT]): + _Item: ClassVar[Type[Item]] = Item + + source: Optional[IT] = None + target: Optional[IT] = None + + +class CodeMap(ItemAssociation[Code]): + _Item = Code + + +IAT = TypeVar("IAT", bound="ItemAssociation") +IST = TypeVar("IST", bound="ItemScheme") + + +@dataclass +class ItemSchemeMap(NameableArtefact, Generic[IST, IAT]): + _ItemAssociation: ClassVar[Type[ItemAssociation]] = ItemAssociation + + source: Optional[IST] = None + target: Optional[IST] = None + + item_association: List[IAT] = field(default_factory=list) + + +class CodelistMap(ItemSchemeMap[Codelist, CodeMap]): + _ItemAssociation = CodeMap + + +@dataclass +class StructureSet(MaintainableArtefact): + item_scheme_map: List[ItemSchemeMap] = field(default_factory=list) + + # SDMX 2.1 §10.2: Constraint inheritance # SDMX 3.0 §12: Constraints @@ -2431,6 +2469,7 @@ class BaseContentConstraint: "DataStructureDefinition", "StructureUsage", }, + "mapping": {"CodelistMap", "StructureSet"}, "metadatastructure": { "MetadataflowDefinition", # SDMX 2.1 "Metadataflow", # SDMX 3.0 From 474b9d2395607e7334d2a81cb1248765684cddd7 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:26:47 -0500 Subject: [PATCH 018/103] Add StructureSet, CodelistMap, CodeMap to .format.xml --- sdmx/format/xml/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 070d8ca9c..3fd06a3a0 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -17,6 +17,8 @@ "CategoryScheme", "Code", "Codelist", + "CodelistMap", + "CodeMap", "Concept", "ConceptScheme", "CustomType", @@ -29,6 +31,7 @@ "NamePersonalisationScheme", "Ruleset", "RulesetScheme", + "StructureSet", "TimeDimension", "TransformationScheme", "UserDefinedOperatorScheme", From b042f95ff065937ea872bfbd027f63a79c66f7f3 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:44:57 -0500 Subject: [PATCH 019/103] Simplify .reader.xml.v21 using @possible_reference() --- sdmx/reader/xml/v21.py | 100 +++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index bf172a601..adcb9940a 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -336,6 +336,46 @@ def decorator(func): return decorator + @classmethod + def possible_reference(cls, cls_hint: Optional[type] = None, unstash: bool = False): + """Decorator for a function where the `elem` parsed may be a Reference. + + Before calling the decorated function, attempt to parse the `elem` as a + :class:`.Reference`. If successful, return the reference instead of calling the + function. If `elem` does not contain a reference, call the decorated function. + + Parameters + ---------- + cls_hint : + Passed to :class:`.Reference`. + unstash : bool, optional + If :data:`True`, call :meth:`.unstash` after successfully resolving a + reference. + """ + + def decorator(func): + def wrapped(reader: "Reader", elem): + try: + # Identify a reference + result = reader.Reference( + reader, + elem, + cls_hint=cls_hint or reader.class_for_tag(elem.tag), + ) + except NotReference: + # Call the wrapped function + result = func(reader, elem) + else: + # Successful; unstash if configured + if unstash: + reader.unstash() + + return result + + return wrapped + + return decorator + # Stack handling def _clean(self): # pragma: no cover @@ -488,7 +528,7 @@ def pop_resolved_ref(self, cls_or_name: Union[Type, str]): """Pop a reference to `cls_or_name` and resolve it.""" return self.resolve(self.pop_single(cls_or_name)) - def reference(self, elem, cls_hint=None): + def reference(self, elem, cls_hint=None) -> Reference: return self.Reference(self, elem, cls_hint=cls_hint) def resolve(self, ref): @@ -638,6 +678,7 @@ def maintainable(self, cls, elem, **kwargs): # Shorthand start = Reader.start end = Reader.end +possible_reference = Reader.possible_reference # Tags to skip entirely start( @@ -973,17 +1014,10 @@ def _item_start(reader, elem): """, only=False, ) +# is a reference, e.g. in +# Restore "Name" and "Description" that may have been stashed by _item_start +@possible_reference(unstash=True) def _item_end(reader: Reader, elem): - try: - # may be a reference, e.g. in - item = reader.reference(elem, cls_hint=reader.class_for_tag(elem.tag)) - except NotReference: - pass - else: - # Restore "Name" and "Description" that may have been stashed by _item_start - reader.unstash() - return item - cls = reader.class_for_tag(elem.tag) item = reader.nameable(cls, elem) @@ -1019,13 +1053,8 @@ def _item_end(reader: Reader, elem): str:VtlMappingScheme """ ) +@possible_reference() # in def _itemscheme(reader: Reader, elem): - try: - # may be a reference, e.g. in - return reader.reference(elem, cls_hint=reader.class_for_tag(elem.tag)) - except NotReference: - pass - cls: Type[common.ItemScheme] = reader.class_for_tag(elem.tag) try: @@ -1107,13 +1136,8 @@ def _concept(reader, elem): "str:Attribute str:Dimension str:GroupDimension str:MeasureDimension " "str:PrimaryMeasure str:TimeDimension" ) +@possible_reference() def _component(reader: Reader, elem): - try: - # May be a reference - return reader.reference(elem) - except NotReference: - pass - # Object class: {,Measure,Time}Dimension or DataAttribute cls = reader.class_for_tag(elem.tag) @@ -1155,13 +1179,8 @@ def _component(reader: Reader, elem): @end("str:AttributeList str:DimensionList str:Group str:MeasureList") +@possible_reference(cls_hint=model.GroupDimensionDescriptor) # def _cl(reader: Reader, elem): - try: - # may be a reference - return reader.reference(elem, cls_hint=model.GroupDimensionDescriptor) - except NotReference: - pass - # Retrieve the DSD dsd = reader.peek("current DSD") assert dsd is not None @@ -1477,14 +1496,9 @@ def _ar(reader, elem): @start("str:DataStructure", only=False) +@possible_reference() # in def _dsd_start(reader: Reader, elem): - try: - # may be a reference, e.g. in - return reader.reference(elem) - except NotReference: - pass - - # Get any external reference created earlier, or instantiate a new object. + # Get any external reference created earlier, or instantiate a new object dsd = reader.maintainable(reader.model.DataStructureDefinition, elem) if dsd not in reader.stack[reader.model.DataStructureDefinition]: @@ -1507,13 +1521,8 @@ def _dsd_end(reader, elem): @end("str:Dataflow str:Metadataflow") +@possible_reference() # in def _dfd(reader: Reader, elem): - try: - # may be a reference, e.g. in - return reader.reference(elem) - except NotReference: - pass - structure = reader.pop_resolved_ref("Structure") if structure is None: log.warning( @@ -1741,13 +1750,8 @@ def _msd(reader: Reader, elem): # pragma: no cover @end("str:ProvisionAgreement") +@possible_reference() # in def _pa(reader, elem): - try: - # in is a reference - return reader.reference(elem) - except NotReference: - pass - return reader.maintainable( model.ProvisionAgreement, elem, From f4fe4aacaf092480d9e6af3e38225099cd663d5e Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:45:56 -0500 Subject: [PATCH 020/103] Use multi-line strings consistently in .reader.xml --- sdmx/reader/xml/v21.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index adcb9940a..e768546d6 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -705,8 +705,10 @@ def maintainable(self, cls, elem, **kwargs): @start( - "mes:Error mes:GenericData mes:GenericTimeSeriesData mes:StructureSpecificData " - "mes:StructureSpecificTimeSeriesData" + """ + mes:Error mes:GenericData mes:GenericTimeSeriesData mes:StructureSpecificData + mes:StructureSpecificTimeSeriesData + """ ) @start("mes:Structure", only=False) def _message(reader: Reader, elem): @@ -939,8 +941,10 @@ def _datetime(reader, elem): @end( - "com:AnnotationText com:Name com:Description com:Text mes:Source mes:Department " - "mes:Role str:Department str:Role" + """ + com:AnnotationText com:Name com:Description com:Text mes:Source mes:Department + mes:Role str:Department str:Role + """ ) def _localization(reader, elem): reader.push( @@ -952,8 +956,8 @@ def _localization(reader, elem): @end( """ com:Structure com:StructureUsage str:AttachmentGroup str:ConceptIdentity - str:ConceptRole str:DimensionReference str:Parent str:Source str:Structure - str:StructureUsage str:Target str:Enumeration + str:ConceptRole str:DimensionReference str:Enumeration str:Parent str:Source + str:Structure str:StructureUsage str:Target """ ) def _ref(reader: Reader, elem): @@ -1133,8 +1137,10 @@ def _concept(reader, elem): @end( - "str:Attribute str:Dimension str:GroupDimension str:MeasureDimension " - "str:PrimaryMeasure str:TimeDimension" + """ + str:Attribute str:Dimension str:GroupDimension str:MeasureDimension + str:PrimaryMeasure str:TimeDimension + """ ) @possible_reference() def _component(reader: Reader, elem): From 4a72a5eae0d832d3e99a0d9829583a35d7e0804a Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:47:00 -0500 Subject: [PATCH 021/103] Remove blanket Xfail of structure{,set} queries --- sdmx/tests/test_sources.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index fec5bbf3c..4bf44b7f2 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -36,8 +36,6 @@ class DataSourceTest: xfail_common = { "hierarchicalcodelist": (XMLParseError, NI), # "metadatastructure": (XMLParseError, NI), # not parsed - "structure": (XMLParseError, NI), # not parsed - "structureset": (XMLParseError, NI), # not implemented } #: Mapping of endpoint → Exception subclass. Tests of these endpoints are expected From c53cf9f25bab240db2d2c5d583c21b2773878abf Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:53:51 -0500 Subject: [PATCH 022/103] Parse StructureSet, CodelistMap, CodeMap from XML --- sdmx/reader/xml/v21.py | 73 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index e768546d6..21ef77314 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -961,8 +961,9 @@ def _localization(reader, elem): """ ) def _ref(reader: Reader, elem): - cls_hint = None - if QName(elem).localname in ("Parent", "Target"): + cls_hint = reader.peek("ItemAssociation class") or None + + if not cls_hint and QName(elem).localname in ("Parent", "Target"): # Use the *grand*-parent of the or for a class hint cls_hint = reader.class_for_tag(elem.getparent().tag) @@ -1752,6 +1753,74 @@ def _msd(reader: Reader, elem): # pragma: no cover return NotImplemented +# §9: Structure Set and Mappings + + +@start("str:CodelistMap", only=False) +def _ismap_start(reader: Reader, elem): + cls: Type[common.ItemSchemeMap] = reader.class_for_tag(elem.tag) + # Push class for reference while parsing sub-elements + reader.push("ItemAssociation class", cls._ItemAssociation._Item) + + +@end("str:CodelistMap", only=False) +def _ismap_end(reader: Reader, elem): + cls: Type[common.ItemSchemeMap] = reader.class_for_tag(elem.tag) + + # Remove class from stacks + reader.pop_single("ItemAssociation class") + + # Retrieve the source and target ItemSchemes + source: model.ItemScheme = reader.pop_resolved_ref("Source") + target: model.ItemScheme = reader.pop_resolved_ref("Target") + + # Iterate over the ItemAssociation instances + ia_all = list() + for ia in reader.pop_all(cls._ItemAssociation): + for name, scheme in ("source", source), ("target", target): + # ia.source is a Reference; retrieve its ID + id_ = getattr(ia, name).id + try: + # Use the ID to look up an Item in the ItemScheme + item = scheme[id_] + except KeyError: + if scheme.is_external_reference: + # Externally-referenced ItemScheme → create the Item + item = scheme.setdefault(id=id_) + else: + raise + setattr(ia, name, item) + + ia_all.append(ia) + + return reader.nameable( + cls, elem, source=source, target=target, item_association=ia_all + ) + + +@end("str:CodeMap") +def _item_map(reader: Reader, elem): + cls: Type[common.ItemAssociation] = reader.class_for_tag(elem.tag) + + # Store Source and Target as Reference instances + return reader.annotable( + cls, + elem, + source=reader.pop_single("Source"), + target=reader.pop_single("Target"), + ) + + +@end("str:StructureSet") +def _ss(reader: Reader, elem): + return reader.maintainable( + common.StructureSet, + elem, + # Collect all ItemSchemeMaps + item_scheme_map=reader.pop_all(common.ItemSchemeMap, subclass=True), + ) + + # §11: Data Provisioning From 2b8017d53711cc7182037dd512af334a9cef55fd Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 27 Dec 2023 21:54:36 -0500 Subject: [PATCH 023/103] Add actualconstraint, structureset specimens --- sdmx/testing/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index e60eab714..70f45ccd2 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -259,7 +259,9 @@ def __init__(self, base_path): self.specimens.extend( (base_path.joinpath(*parts), "xml", "structure") for parts in [ + ("BIS", "actualconstraint-0.xml"), ("ECB", "orgscheme.xml"), + ("ECB", "structureset-0.xml"), ("ESTAT", "apro_mk_cola-structure.xml"), ("ESTAT", "GOV_10Q_GGNFA.xml"), ("IMF", "1PI-structure.xml"), @@ -267,14 +269,18 @@ def __init__(self, base_path): # Manually reduced subset of the response for this DSD. Test for # containing both and ("IMF", "ECOFIN_DSD-structure.xml"), + ("IMF", "structureset-0.xml"), ("INSEE", "CNA-2010-CONSO-SI-A17-structure.xml"), ("INSEE", "dataflow.xml"), ("INSEE", "IPI-2010-A21-structure.xml"), ("ISTAT", "22_289-structure.xml"), ("ISTAT", "47_850-structure.xml"), + ("ISTAT", "actualconstraint-0.xml"), + ("OECD", "actualconstraint-0.xml"), ("UNICEF", "GLOBAL_DATAFLOW-structure.xml"), ("UNSD", "codelist_partial.xml"), ("SGR", "common-structure.xml"), + ("SPC", "actualconstraint-0.xml"), ("TEST", "gh-142.xml"), ("TEST", "gh-149.xml"), ] From 6876e3da2a6376b0b1f3e0fb38f8dcd47871d3a9 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:15:55 -0500 Subject: [PATCH 024/103] Add 2.1/3.0 IM classes HierarchicalCode, Level --- sdmx/model/common.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index e624b19b3..5b4021246 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -2064,6 +2064,25 @@ class BaseMetadataflow(StructureUsage, ConstrainableArtefact): """ABC for SDMX 2.1 MetadataflowDefinition and SDMX 3.0 Metadataflow.""" +# SDMX 2.1 §8: Hierarchical Code List +# SDMX 3.9 §8: Hierarchy + + +@dataclass +class HierarchicalCode(IdentifiableArtefact): + code: Optional[Code] = None + parent: Optional[ + Union["HierarchicalCode", Any] + ] = None # NB second element is "Hierarchy" + child: List["HierarchicalCode"] = field(default_factory=list) + + +@dataclass +class Level(NameableArtefact): + parent: Optional[Union["Level", Any]] = None # NB second element is "Hierarchy" + child: Optional["Level"] = None + + # SDMX 2.1 §9: Structure Set and Mappings @@ -2461,7 +2480,14 @@ class BaseContentConstraint: _PACKAGE_CLASS: Dict[str, set] = { "base": {"Agency", "AgencyScheme", "DataProvider", "DataProviderScheme"}, "categoryscheme": {"Category", "Categorisation", "CategoryScheme"}, - "codelist": {"Code", "Codelist"}, + "codelist": { + "Code", + "Codelist", + "HierarchicalCode", + "HierarchicalCodelist", # SDMX 2.1 + "Hierarchy", + "Level", + }, "conceptscheme": {"Concept", "ConceptScheme"}, "datastructure": { "DataflowDefinition", # SDMX 2.1 From 6ff2a700650b681b83b3dc08473cae68240f1048 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:19:54 -0500 Subject: [PATCH 025/103] Add 2.1 IM classes Hierarchy, HierarchicalCodelist --- sdmx/model/common.py | 7 +++++++ sdmx/model/v21.py | 27 ++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 5b4021246..ee25b4a7f 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -106,6 +106,13 @@ "Key", "GroupKey", "SeriesKey", + "HierarchicalCode", + "Level", + "ItemAssociation", + "CodeMap", + "ItemSchemeMap", + "CodelistMap", + "StructureSet", "ConstraintRole", "StartPeriod", "EndPeriod", diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 7c372e4c6..2f2cbf3fa 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -4,7 +4,7 @@ # TODO for complete implementation of the IM, enforce TimeKeyValue (instead of KeyValue) # for {Generic,StructureSpecific} TimeSeriesDataSet. from dataclasses import dataclass, field -from typing import Generator, List, Optional, Set, Union +from typing import Dict, Generator, List, Optional, Set, Union from sdmx.dictlike import DictLikeDescriptor @@ -50,6 +50,8 @@ "StructureSpecificTimeSeriesDataSet", "MetadataflowDefinition", "MetadataStructureDefinition", + "Hierarchy", + "HierarchicalCodelist", ] log = logging.getLogger(__name__) @@ -291,6 +293,28 @@ class MetadataflowDefinition(common.BaseMetadataflow): """SDMX 2.1 MetadataflowDefinition.""" +# §8 Hierarchical Code List + + +@dataclass +class Hierarchy(NameableArtefact): + has_formal_levels: bool = False + + #: Hierarchical codes in the hierarchy. + codes: Dict[str, common.HierarchicalCode] = field(default_factory=dict) + + level: Optional[common.Level] = None + + +@dataclass +class HierarchicalCodelist(common.MaintainableArtefact): + hierarchy: List[Hierarchy] = field(default_factory=list) + + def __repr__(self) -> str: + tmp = super(NameableArtefact, self).__repr__()[:-1] + return f"{tmp}: {len(self.hierarchy)} hierarchies>" + + CF = common.ClassFinder( __name__, name_map={ @@ -298,6 +322,7 @@ class MetadataflowDefinition(common.BaseMetadataflow): "Metadataflow": "MetadataflowDefinition", }, parent_map={ + common.HierarchicalCode: Hierarchy, PrimaryMeasure: MeasureDescriptor, }, ) From c151300510ff26de1789104e05ef39c102f96bdb Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:20:22 -0500 Subject: [PATCH 026/103] Add Hierarchy classes to .format.xml --- sdmx/format/xml/common.py | 2 ++ sdmx/format/xml/v21.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 3fd06a3a0..776e7e5bf 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -27,6 +27,8 @@ "DataConsumerScheme", "DataProvider", "DataProviderScheme", + "HierarchicalCode", + "Level", "NamePersonalisation", "NamePersonalisationScheme", "Ruleset", diff --git a/sdmx/format/xml/v21.py b/sdmx/format/xml/v21.py index 4fe90a6bf..8e62d009f 100644 --- a/sdmx/format/xml/v21.py +++ b/sdmx/format/xml/v21.py @@ -16,7 +16,13 @@ ] + [ (f"model.{name}", f"str:{name}") - for name in "ContentConstraint MeasureDimension PrimaryMeasure".split() + for name in """ + ContentConstraint + HierarchicalCodelist + Hierarchy + MeasureDimension + PrimaryMeasure + """.split() ], ) From bac226861e52d913a89d7821975a0922aee657cf Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:21:08 -0500 Subject: [PATCH 027/103] Parse str:HierarchicalCodelist etc. from v2.1 XML --- sdmx/reader/xml/v21.py | 87 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 7 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 21ef77314..ad1ee3f61 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -921,9 +921,9 @@ def _structures(reader, elem): """ com:AnnotationTitle com:AnnotationType com:AnnotationURL com:None com:URN com:Value mes:DataSetAction mes:DataSetID mes:Email mes:ID mes:Test mes:Timezone - str:DataType str:Email str:Expression str:NullValue str:OperatorDefinition - str:PersonalisedName str:Result str:RulesetDefinition str:Telephone str:URI - str:VtlDefaultName str:VtlScalarType + str:CodelistAliasRef str:DataType str:Email str:Expression str:NullValue + str:OperatorDefinition str:PersonalisedName str:Result str:RulesetDefinition + str:Telephone str:URI str:VtlDefaultName str:VtlScalarType """ ) def _text(reader, elem): @@ -955,15 +955,15 @@ def _localization(reader, elem): @end( """ - com:Structure com:StructureUsage str:AttachmentGroup str:ConceptIdentity - str:ConceptRole str:DimensionReference str:Enumeration str:Parent str:Source - str:Structure str:StructureUsage str:Target + com:Structure com:StructureUsage str:AttachmentGroup str:CodeID str:ConceptIdentity + str:ConceptRole str:DimensionReference str:Enumeration + str:Parent str:Source str:Structure str:StructureUsage str:Target """ ) def _ref(reader: Reader, elem): cls_hint = reader.peek("ItemAssociation class") or None - if not cls_hint and QName(elem).localname in ("Parent", "Target"): + if not cls_hint and QName(elem).localname in ("CodeID", "Parent", "Target"): # Use the *grand*-parent of the or for a class hint cls_hint = reader.class_for_tag(elem.getparent().tag) @@ -1753,6 +1753,79 @@ def _msd(reader: Reader, elem): # pragma: no cover return NotImplemented +# §8: Hierarchical Code List + + +@end("str:HierarchicalCode") +def _hc(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + code = reader.resolve(reader.pop_single(Reference)) + + if code is None: + # Retrieve and resolve the reference to the Codelist + cl_alias = reader.pop_single("CodelistAliasRef") + cl_ref = reader.peek("CodelistAlias")[cl_alias] + cl = reader.resolve(cl_ref) + + # Manually resolve the CodeID + code_id = reader.pop_single("CodeID").id + try: + code = cl[code_id] + except KeyError: + if cl.is_external_reference: + code = cl.setdefault(id=code_id) + else: + raise + + # Create the HierarchicalCode + obj = reader.identifiable(cls, elem, code=code) + + # Count children represented as XML sub-elements of the parent + n_child = sum(e.tag == elem.tag for e in elem) + # Collect this many children and append them to `obj` + obj.child.extend(reversed([reader.pop_single(cls) for i in range(n_child)])) + + return obj + + +@end("str:Level") +def _l(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + return reader.nameable(cls, elem, child=reader.pop_single(cls)) + + +@end("str:Hierarchy") +def _h(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + return reader.nameable( + cls, + elem, + has_formal_levels=bool(elem.attrib["leveled"]), + codes={c.id: c for c in reader.pop_all(model.HierarchicalCode)}, + level=reader.pop_single(common.Level), + ) + + +@end("str:IncludedCodelist") +def _icl(reader: Reader, elem): + obj = reader.reference(elem, common.Codelist) + + if reader.peek("CodelistAlias") is None: + reader.push("CodelistAlias", dict()) + reader.peek("CodelistAlias")[elem.attrib["alias"]] = obj + + return None + + +@end("str:HierarchicalCodelist") +def _hcl(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + reader.pop_all("CodelistAlias") + return reader.maintainable(cls, elem, hierarchy=reader.pop_all(model.Hierarchy)) + + # §9: Structure Set and Mappings From 833cf9bfc149485d71e67a2a885090d3f36640b9 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:21:36 -0500 Subject: [PATCH 028/103] Add StructureMessage.hierarchical_code_list --- sdmx/message.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdmx/message.py b/sdmx/message.py index 9a1a7bce6..55efa8f2a 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -192,6 +192,10 @@ class StructureMessage(Message): ] = DictLikeDescriptor() #: Collection of :class:`.Codelist`. codelist: DictLikeDescriptor[str, model.Codelist] = DictLikeDescriptor() + #: Collection of :class:`.HierarchicalCodelist`. + hierarchical_code_list: DictLikeDescriptor[ + str, v21.HierarchicalCodelist + ] = DictLikeDescriptor() #: Collection of :class:`.ConceptScheme`. concept_scheme: DictLikeDescriptor[str, model.ConceptScheme] = DictLikeDescriptor() #: Collection of :class:`.ContentConstraint`. From 716e0f1180a5165d8fa1fdda20a103b2f1d482ac Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:22:34 -0500 Subject: [PATCH 029/103] Test .model.v21.HierarchicalCodelist --- sdmx/tests/model/test_v21.py | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sdmx/tests/model/test_v21.py b/sdmx/tests/model/test_v21.py index e673211aa..41b79d5d7 100644 --- a/sdmx/tests/model/test_v21.py +++ b/sdmx/tests/model/test_v21.py @@ -3,10 +3,13 @@ import pytest +import sdmx +import sdmx.message from sdmx.model.v21 import ( AttributeDescriptor, AttributeValue, Code, + Codelist, Component, ComponentList, ComponentValue, @@ -578,3 +581,41 @@ def test_init(self): ds1 = DataSet(action="information") assert ds0.action == ds1.action + + +class TestHierarchicalCodelist: + @pytest.fixture(scope="class") + def msg(self, specimen): + with specimen("BIS/hierarchicalcodelist-0.xml") as f: + return sdmx.read_sdmx(f) + + def test_hierarchy(self, msg: sdmx.message.StructureMessage) -> None: + for key, hcl in msg.hierarchical_code_list.items(): + assert 1 == len(hcl.hierarchy) + # print(f"{hcl = }") + + hcl = msg.hierarchical_code_list["BIS:HCL_COUNTRY(1.0)"] + + # Access a Hierarchy + h = hcl.hierarchy[0] + assert "HIERARCHY_COUNTRY" == h.id + assert 2 == len(h.codes) + + c1 = h.codes["1"] + c2 = h.codes["2"] + + assert 4 == len(c1.child) + + assert 56 == len(c2.child) + # HierarchicalCode has a `code` attribute + assert isinstance(c2.code, Code) + assert "OC" == c2.code + + # This Code is contained within a code list + assert isinstance(c2.code.parent, Codelist) + assert c2.code.parent.urn.endswith("Codelist=BIS:CL_WEBSTATS_CODES(1.0)") + + # The code has a child associated with a different code list + c3 = c2.child[0] + assert "6J" == c3.code + assert c3.code.parent.urn.endswith("Codelist=BIS:CL_BIS_IF_REF_AREA(1.0)") From e68330c6ba1987a165cc135edb117caf083b8a37 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:23:05 -0500 Subject: [PATCH 030/103] Update sources.json per hierarchicalcodelist support --- sdmx/sources.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sdmx/sources.json b/sdmx/sources.json index f9dfd08fe..173e9215b 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -74,6 +74,8 @@ "actualconstraint": false, "allowedconstraint": false, "contentconstraint": false, + "hierarchicalcodelist": false, + "structure": false, "structureset": false } }, @@ -101,6 +103,8 @@ "actualconstraint": false, "allowedconstraint": false, "contentconstraint": false, + "hierarchicalcodelist": false, + "structure": false, "structureset": false } }, @@ -132,6 +136,8 @@ "actualconstraint": false, "allowedconstraint": false, "contentconstraint": false, + "hierarchicalcodelist": false, + "structure": false, "structureset": false } }, @@ -143,6 +149,8 @@ "actualconstraint": false, "allowedconstraint": false, "contentconstraint": false, + "hierarchicalcodelist": false, + "structure": false, "structureset": false } }, @@ -322,7 +330,6 @@ "supports": { "dataconsumerscheme": false, "dataproviderscheme": false, - "hierarchicalcodelist": false, "metadataflow": false, "provisionagreement": false, "structureset": false @@ -384,6 +391,7 @@ "metadataflow": false, "metadatastructure": false, "provisionagreement": false, + "structure": false, "structureset": false, "preview": true } From 9ca7565304f276e9ae141fd96924f8b4052fe068 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:24:33 -0500 Subject: [PATCH 031/103] Add 3 specimens of v2.1 HierarchicalCodelist --- sdmx/testing/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 70f45ccd2..83622afed 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -260,6 +260,7 @@ def __init__(self, base_path): (base_path.joinpath(*parts), "xml", "structure") for parts in [ ("BIS", "actualconstraint-0.xml"), + ("BIS", "hierarchicalcodelist-0.xml"), ("ECB", "orgscheme.xml"), ("ECB", "structureset-0.xml"), ("ESTAT", "apro_mk_cola-structure.xml"), @@ -269,6 +270,7 @@ def __init__(self, base_path): # Manually reduced subset of the response for this DSD. Test for # containing both and ("IMF", "ECOFIN_DSD-structure.xml"), + ("IMF", "hierarchicalcodelist-0.xml"), ("IMF", "structureset-0.xml"), ("INSEE", "CNA-2010-CONSO-SI-A17-structure.xml"), ("INSEE", "dataflow.xml"), @@ -280,6 +282,7 @@ def __init__(self, base_path): ("UNICEF", "GLOBAL_DATAFLOW-structure.xml"), ("UNSD", "codelist_partial.xml"), ("SGR", "common-structure.xml"), + ("SGR", "hierarchicalcodelist-0.xml"), ("SPC", "actualconstraint-0.xml"), ("TEST", "gh-142.xml"), ("TEST", "gh-149.xml"), From 10a451c5b4815f73bc4fb054a4450af182bb2810 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:25:12 -0500 Subject: [PATCH 032/103] Remove blanket Xfail of hierarchicalcodelist queries --- sdmx/tests/test_sources.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 4bf44b7f2..2fe3534a7 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -34,7 +34,6 @@ class DataSourceTest: #: Failures affecting **all** data sources, internal to :mod:`sdmx`. xfail_common = { - "hierarchicalcodelist": (XMLParseError, NI), # "metadatastructure": (XMLParseError, NI), # not parsed } From ad2dfd762c51c02366b4607925dc126ce38db9ee Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:52:53 -0500 Subject: [PATCH 033/103] Consolidate common CodingFormat, Level Remove duplicate definitions from .model.v30 --- sdmx/model/common.py | 32 +++++++++++++++++++++++++------- sdmx/model/v30.py | 40 +++------------------------------------- 2 files changed, 28 insertions(+), 44 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index ee25b4a7f..3c12c5c72 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -106,8 +106,9 @@ "Key", "GroupKey", "SeriesKey", - "HierarchicalCode", + "CodingFormat", "Level", + "HierarchicalCode", "ItemAssociation", "CodeMap", "ItemSchemeMap", @@ -2075,21 +2076,38 @@ class BaseMetadataflow(StructureUsage, ConstrainableArtefact): # SDMX 3.9 §8: Hierarchy +class CodingFormat: + """SDMX CodingFormat.""" + + coding_format: Facet + + +@dataclass +class Level(NameableArtefact): + parent: Optional[Union["Level", Any]] = None # NB second element is "Hierarchy" + child: Optional["Level"] = None + + code_format: CodingFormat = field(default_factory=CodingFormat) + + @dataclass class HierarchicalCode(IdentifiableArtefact): + #: Date from which the construct is valid. + valid_from: Optional[str] = None + #: Date from which the construct is superseded. + valid_to: Optional[str] = None + + #: The Code that is used at the specific point in the hierarchy. code: Optional[Code] = None + + level: Optional[Level] = None + parent: Optional[ Union["HierarchicalCode", Any] ] = None # NB second element is "Hierarchy" child: List["HierarchicalCode"] = field(default_factory=list) -@dataclass -class Level(NameableArtefact): - parent: Optional[Union["Level", Any]] = None # NB second element is "Hierarchy" - child: Optional["Level"] = None - - # SDMX 2.1 §9: Structure Set and Mappings diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index a32a16f1b..6f6f0a69d 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -13,7 +13,6 @@ ConstrainableArtefact, ConstraintRole, ConstraintRoleType, - Facet, IdentifiableArtefact, MaintainableArtefact, NameableArtefact, @@ -46,9 +45,6 @@ "StructureSpecificDataSet", "MetadataStructureDefinition", "Metadataflow", - "CodingFormat", - "Level", - "HierarchicalCode", "Hierarchy", "HierarchyAssociation", "SelectionValue", @@ -390,47 +386,17 @@ class Metadataflow(common.BaseMetadataflow): # §8: Hierarchy -class CodingFormat: - """SDMX 3.0 CodingFormat.""" - - coding_format: Facet - - -@dataclass -class Level(NameableArtefact): - child: Optional["Level"] = None - parent: Optional["Level"] = None - - code_format: CodingFormat = field(default_factory=CodingFormat) - - -@dataclass -class HierarchicalCode(IdentifiableArtefact): - #: Date from which the construct is valid. - valid_from: Optional[str] = None - #: Date from which the construct is superseded. - valid_to: Optional[str] = None - - child: List["HierarchicalCode"] = field(default_factory=list) - parent: List["HierarchicalCode"] = field(default_factory=list) - - #: The Code that is used at the specific point in the hierarchy. - code: Optional[Code] = None - - level: Optional[Level] = None - - @dataclass class Hierarchy(MaintainableArtefact): """SDMX 3.0 Hierarchy.""" - has_format_levels: bool = False + has_formal_levels: bool = False #: The top :class:`Level` in the hierarchy. - level: Optional[Level] = None + level: Optional[common.Level] = None #: The top-level :class:`HierarchicalCodes ` in the hierarchy. - codes: List[HierarchicalCode] = field(default_factory=list) + codes: List[common.HierarchicalCode] = field(default_factory=list) @dataclass From 94ad346ef8969aadc678ea81c972585c3e08867f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:53:12 -0500 Subject: [PATCH 034/103] Add v30.Hierarchy to .format.xml --- sdmx/format/xml/v30.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdmx/format/xml/v30.py b/sdmx/format/xml/v30.py index 52ad3d327..7b105d67b 100644 --- a/sdmx/format/xml/v30.py +++ b/sdmx/format/xml/v30.py @@ -23,6 +23,7 @@ GeographicCodelist GeoGridCode GeoGridCodelist + Hierarchy Measure MetadataConstraint ValueItem From aefbbeef78e5a96f01b5347a9c7eca6a3337a0e3 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:53:33 -0500 Subject: [PATCH 035/103] Add StructureMessage.hierarchy --- sdmx/message.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdmx/message.py b/sdmx/message.py index 55efa8f2a..7cafade22 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -196,6 +196,8 @@ class StructureMessage(Message): hierarchical_code_list: DictLikeDescriptor[ str, v21.HierarchicalCodelist ] = DictLikeDescriptor() + #: Collection of :class:`.v30.Hierarchy`. + hierarchy: DictLikeDescriptor[str, v30.Hierarchy] = DictLikeDescriptor() #: Collection of :class:`.ConceptScheme`. concept_scheme: DictLikeDescriptor[str, model.ConceptScheme] = DictLikeDescriptor() #: Collection of :class:`.ContentConstraint`. From ba2561baa5ba414eb8836cabb36ac351a163af51 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:54:35 -0500 Subject: [PATCH 036/103] Test read of bool Hierarchy.has_formal_levels --- sdmx/reader/xml/v21.py | 2 +- sdmx/tests/model/test_v21.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index ad1ee3f61..6bba66840 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1802,7 +1802,7 @@ def _h(reader: Reader, elem): return reader.nameable( cls, elem, - has_formal_levels=bool(elem.attrib["leveled"]), + has_formal_levels=eval(elem.attrib["leveled"].title()), codes={c.id: c for c in reader.pop_all(model.HierarchicalCode)}, level=reader.pop_single(common.Level), ) diff --git a/sdmx/tests/model/test_v21.py b/sdmx/tests/model/test_v21.py index 41b79d5d7..d4abca1d0 100644 --- a/sdmx/tests/model/test_v21.py +++ b/sdmx/tests/model/test_v21.py @@ -599,6 +599,7 @@ def test_hierarchy(self, msg: sdmx.message.StructureMessage) -> None: # Access a Hierarchy h = hcl.hierarchy[0] assert "HIERARCHY_COUNTRY" == h.id + assert False is h.has_formal_levels assert 2 == len(h.codes) c1 = h.codes["1"] From 7e08b580733f2f6dcf523a85c7bb45d6d85932af Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:55:29 -0500 Subject: [PATCH 037/103] Use versioned Reference to parse HierarchicalCode --- sdmx/reader/xml/v21.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 6bba66840..10c5dc20b 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1760,7 +1760,7 @@ def _msd(reader: Reader, elem): # pragma: no cover def _hc(reader: Reader, elem): cls = reader.class_for_tag(elem.tag) - code = reader.resolve(reader.pop_single(Reference)) + code = reader.resolve(reader.pop_single(reader.Reference)) if code is None: # Retrieve and resolve the reference to the Codelist From 1ba97ec661b0ad246e798ed043d662bee0c268a2 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:57:12 -0500 Subject: [PATCH 038/103] Read SDMX-ML 3.0.0 --- sdmx/model/v30.py | 4 ++-- sdmx/reader/xml/v30.py | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index 6f6f0a69d..38eccf4c6 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from enum import Enum -from typing import Any, ClassVar, List, Optional, Set +from typing import Any, ClassVar, Dict, List, Optional, Set from . import common from .common import ( @@ -396,7 +396,7 @@ class Hierarchy(MaintainableArtefact): level: Optional[common.Level] = None #: The top-level :class:`HierarchicalCodes ` in the hierarchy. - codes: List[common.HierarchicalCode] = field(default_factory=list) + codes: Dict[str, common.HierarchicalCode] = field(default_factory=dict) @dataclass diff --git a/sdmx/reader/xml/v30.py b/sdmx/reader/xml/v30.py index 15a1441af..2c1e52963 100644 --- a/sdmx/reader/xml/v30.py +++ b/sdmx/reader/xml/v30.py @@ -58,9 +58,9 @@ class Reader(v21.Reader): start( """ str:AgencySchemes str:ConceptSchemes str:CustomTypeSchemes str:DataConstraints - str:GeographicCodelists str:GeoGridCodelists str:NamePersonalisationSchemes - str:RulesetSchemes str:TransformationSchemes str:UserDefinedOperatorSchemes - str:ValueLists str:VtlMappingSchemes + str:GeographicCodelists str:GeoGridCodelists str:Hierarchies + str:NamePersonalisationSchemes str:RulesetSchemes str:TransformationSchemes + str:UserDefinedOperatorSchemes str:ValueLists str:VtlMappingSchemes """ )(None) @@ -183,3 +183,18 @@ def _complex(reader: Reader, elem): reader.stack["Attributes"][-1][da.id] = model.AttributeValue( value=reader.pop_all("ComplexValue"), value_for=da ) + + +# §8: Hierarchy + + +@end("str:Hierarchy") +def _h(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + return reader.maintainable( + cls, + elem, + has_formal_levels=eval(elem.attrib["hasFormalLevels"].title()), + codes={c.id: c for c in reader.pop_all(model.HierarchicalCode)}, + level=reader.pop_single(common.Level), + ) From 5090a1e82f69721a817e149c955f62d4937794af Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 00:57:59 -0500 Subject: [PATCH 039/103] Add 3 specimens of v3 Hierarchy --- sdmx/testing/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 83622afed..23c756e8b 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -265,6 +265,8 @@ def __init__(self, base_path): ("ECB", "structureset-0.xml"), ("ESTAT", "apro_mk_cola-structure.xml"), ("ESTAT", "GOV_10Q_GGNFA.xml"), + ("ESTAT", "HCL_WSTATUS_SCL_BNSPART.xml"), + ("ESTAT", "HCL_WSTATUS_SCL_WSTATUSPR.xml"), ("IMF", "1PI-structure.xml"), ("IMF", "CL_AREA-structure.xml"), # Manually reduced subset of the response for this DSD. Test for @@ -281,6 +283,7 @@ def __init__(self, base_path): ("OECD", "actualconstraint-0.xml"), ("UNICEF", "GLOBAL_DATAFLOW-structure.xml"), ("UNSD", "codelist_partial.xml"), + ("SDMX", "HCL_TEST_AREA.xml"), ("SGR", "common-structure.xml"), ("SGR", "hierarchicalcodelist-0.xml"), ("SPC", "actualconstraint-0.xml"), From c334c1265f8d5bbad5d05d7452047856b5fc2d7c Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 01:25:55 -0500 Subject: [PATCH 040/103] Update test_model.test_common --- sdmx/tests/test_model.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sdmx/tests/test_model.py b/sdmx/tests/test_model.py index 37e6e98eb..528710343 100644 --- a/sdmx/tests/test_model.py +++ b/sdmx/tests/test_model.py @@ -49,6 +49,14 @@ "Key", "GroupKey", "SeriesKey", + "CodingFormat", + "Level", + "HierarchicalCode", + "ItemAssociation", + "CodeMap", + "ItemSchemeMap", + "CodelistMap", + "StructureSet", "ConstraintRole", "ConstrainableArtefact", "SelectionValue", @@ -118,6 +126,8 @@ "GenericTimeSeriesDataSet", "StructureSpecificTimeSeriesDataSet", "MetadataflowDefinition", + "Hierarchy", + "HierarchicalCodelist", ] V30_ONLY = [ @@ -134,9 +144,6 @@ "MetadataProviderScheme", "Measure", "Dataflow", # Instead of DataflowDefinition - "CodingFormat", - "Level", - "HierarchicalCode", "Hierarchy", "HierarchyAssociation", "DataflowRelationship", From 71d89a558e863dd3f2b2dfba460da17f6f5da5e0 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 01:26:31 -0500 Subject: [PATCH 041/103] Update sources.json per metadatastructure query support --- sdmx/sources.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdmx/sources.json b/sdmx/sources.json index 173e9215b..7dbd10b2e 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -75,6 +75,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadatastructure": false, "structure": false, "structureset": false } @@ -104,6 +105,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadatastructure": false, "structure": false, "structureset": false } @@ -137,6 +139,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadatastructure": false, "structure": false, "structureset": false } @@ -150,6 +153,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadatastructure": false, "structure": false, "structureset": false } From 09de1c4d6197c175ae00a02cfa221d0955f62ed2 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Dec 2023 23:14:17 -0500 Subject: [PATCH 042/103] =?UTF-8?q?Add=20partial=20implementations=20of=20?= =?UTF-8?q?some=20IM=20=C2=A77=20classes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sdmx/model/common.py | 47 ++++++++++++++++- sdmx/model/v21.py | 119 +++++++++++++++++++++++++++++++++++++++++++ sdmx/model/v30.py | 101 ++++++++++++++++++++++++++++++++++++ 3 files changed, 266 insertions(+), 1 deletion(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 3c12c5c72..25e9e48dd 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -2064,6 +2064,21 @@ def compare(self, other, strict=True): # §7.3: Metadata Structure Definition +class AttributeComponent(Component): + """SDMX 3.0 AttributeComponent. + + .. note:: This intermediate, abstract class is not present in the SDMX 2.1 IM. + """ + + +class MetadataAttribute(AttributeComponent): + """SDMX MetadataAttribute.""" + + is_presentational: bool + max_occurs: int + min_occurs: int + + class BaseMetadataStructureDefinition(Structure, ConstrainableArtefact): """ABC for SDMX 2.1 and 3.0 MetadataStructureDefinition.""" @@ -2072,8 +2087,38 @@ class BaseMetadataflow(StructureUsage, ConstrainableArtefact): """ABC for SDMX 2.1 MetadataflowDefinition and SDMX 3.0 Metadataflow.""" +# §7.4 MetadataSet + + +@dataclass +class BaseTextAttributeValue: + """ABC for SDMX 2.1 and 3.0 TextAttributeValue.""" + + text: InternationalStringDescriptor = InternationalStringDescriptor() + + +@dataclass +class BaseXHTMLAttributeValue: + """ABC for SDMX 2.1 and 3.0 XHTMLAttributeValue.""" + + value: str + + +@dataclass +class BaseMetadataSet: + """ABC for SDMX 2.1 and 3.0 MetadataSet.""" + + action: ActionType + + reporting_begin: date + reporting_end: date + + publication_period: date + publication_year: date + + # SDMX 2.1 §8: Hierarchical Code List -# SDMX 3.9 §8: Hierarchy +# SDMX 3.0 §8: Hierarchy class CodingFormat: diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 2f2cbf3fa..d9cd421cd 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -285,13 +285,132 @@ class StructureSpecificTimeSeriesDataSet(DataSet): # §7.3 Metadata Structure Definition +class ReportingCategory(common.Item): + pass + + +class ReportingTaxonomy(common.ItemScheme): + pass + + +class TargetObject(common.Component): + pass + + +class DataSetTarget(TargetObject): + pass + + +class DimensionDescriptorValuesTarget(TargetObject): + pass + + +class IdentifiableObjectTarget(TargetObject): + pass + + +class ReportPeriodTarget(TargetObject): + pass + + +class MetadataTarget(ComponentList): + """SDMX 2.1 MetadataTarget.""" + + _Component = TargetObject + + +class ReportStructure(ComponentList): + """SDMX 2.1 ReportStructure.""" + + _Component = common.MetadataAttribute + + class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): """SDMX 2.1 MetadataStructureDefinition.""" + # NB narrows the type of common.Structure.grouping + #: .. note:: SDMX 2.1 IM (2011-08), in Figure 28, gives the cardinality of this + #: association as "1..*", but the text (§7.4.3.2) reads "An association to a + #: [singular] Metadata Target or Report Structure." This implementation follows + #: the latter, which is consistent with the typing of :class:`.common.Structure`. + grouping: Optional[Union[MetadataTarget, ReportStructure]] = None + class MetadataflowDefinition(common.BaseMetadataflow): """SDMX 2.1 MetadataflowDefinition.""" + # NB narrows the type of common.StructureUsage.structure + structure: MetadataStructureDefinition + + +# §7.4: Metadata Set + + +@dataclass +class ReportedAttribute: + """SDMX 2.1 ReportedAttribute. + + Analogous to :class:`.v30.MetadataAttributeValue`. + """ + + value_for: common.MetadataAttribute + parent: Optional["ReportedAttribute"] = None + child: List["ReportedAttribute"] = field(default_factory=list) + + +class EnumeratedAttributeValue(ReportedAttribute): + """SDMX 2.1 EnumeratedAttributeValue. + + Analogous to :class:`.v30.CodedMetadataAttributeValue`. + """ + + value: str + + #: .. note:: The SDMX 2.1 IM (2011-08) gives this as `valueFor`, but this name + #: duplicates :attr:`ReporterAttribute.value_for`. :mod:`sdmx` uses `value_of` + #: for consistency with :attr:`.v30.CodedMetadataAttributeValue.value_of`. + value_of: common.Code + + +class NonEnumeratedAttributeValue(ReportedAttribute): + pass + + +class OtherNonEnumeratedAttributeValue(NonEnumeratedAttributeValue): + value: str + + +class TextAttributeValue(NonEnumeratedAttributeValue, common.BaseTextAttributeValue): + pass + + +class XHTMLAttributeValue(NonEnumeratedAttributeValue, common.BaseXHTMLAttributeValue): + pass + + +@dataclass +class MetadataReport: + metadata: List[ReportedAttribute] = field(default_factory=list) + target: Optional[MetadataTarget] = None + + +@dataclass +class MetadataSet(NameableArtefact, common.BaseMetadataSet): + """SDMX 2.1 MetadataSet. + + .. note:: Contrast :class:`.v30.MetadataSet`, which is a + :class:`.MaintainableArtefact`. + """ + + described_by: Optional[MetadataflowDefinition] = None + # described_by: Optional[ReportStructure] = None + structured_by: Optional[MetadataStructureDefinition] = None + + #: Analogous to :attr:`.v30.MetadataSet.provided_by`. + published_by: Optional[common.DataProvider] = None + + report: List[MetadataReport] = field(default_factory=list) + # §8 Hierarchical Code List diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index 38eccf4c6..5e58b636f 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -1,6 +1,7 @@ """SDMX 3.0 Information Model.""" from dataclasses import dataclass, field +from datetime import date from enum import Enum from typing import Any, ClassVar, Dict, List, Optional, Set @@ -375,13 +376,113 @@ class StructureSpecificDataSet(DataSet): # §7.3 Metadata Structure Definition +class MetadataAttributeDescriptor(common.ComponentList): + """SDMX 3.0 MetadataAttributeDescriptor.""" + + _Component = common.MetadataAttribute + + +class IdentifiableObjectSelection: + pass + + class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): """SDMX 3.0 MetadataStructureDefinition.""" + #: A :class:`MetadataAttributeDescriptor` that describes the attributes of the + #: metadata structure. + #: + #: .. note:: The SDMX 3.0.0 IM (version 1.0 / 2021-10) does not give a name for this + #: association. :mod:`sdmx` uses `attributes` for consistency with + #: :class:`.DataStructureDefinition`. + attributes: MetadataAttributeDescriptor = field( + default_factory=MetadataAttributeDescriptor + ) + class Metadataflow(common.BaseMetadataflow): """SDMX 3.0 MetadataflowDefinition.""" + structure: MetadataStructureDefinition + + +# §7.4: Metadata Set + + +class MetadataAttributeValue: + """SDMX 3.0 MetadataAttributeValue. + + Analogous to :class:`.v21.ReportedAttribute`. + """ + + # NB the IM specifies this is a subclass of common.AttributeValue, but the + # implementation in .common has both Coded- and UncodedAttributeValue, which + # offends mypy. + + parent: Optional["MetadataAttributeValue"] = None + child: List["MetadataAttributeValue"] = field(default_factory=list) + + +class CodedMetadataAttributeValue(MetadataAttributeValue): + """SDMX 3.0 CodedMetadataAttributeValue. + + Analogous to :class:`.v21.EnumeratedAttributeValue. + """ + + value_of: common.Code + + +class UncodedMetadataAttributeValue(MetadataAttributeValue): + pass + + +class OtherUncodedAttributeValue(UncodedMetadataAttributeValue): + value: str + start_time: date + + +class TextAttributeValue(UncodedMetadataAttributeValue, common.BaseTextAttributeValue): + pass + + +class XHTMLAttributeValue( + UncodedMetadataAttributeValue, common.BaseXHTMLAttributeValue +): + pass + + +class TargetIdentifiableObject: + pass + + +@dataclass +class MetadataSet(MaintainableArtefact, common.BaseMetadataSet): + """SDMX 3.0 MetadataSet. + + .. note:: Contrast :class:`.v21.MetadataSet`, which is a :class:`.NameableArtefact`. + """ + + # NB Would prefer to type as datetime.date, but VersionableArtefact currently uses + # str + valid_from: Optional[str] = None + # NB Would prefer to type as datetime.date, but VersionableArtefact currently uses + # str + valid_to: Optional[str] = None + set_id: Optional[str] = None + + described_by: Optional[Metadataflow] = None + + # described_by: Optional[MetadataProvisionAgreement] = None + + structured_by: Optional[MetadataAttributeDescriptor] = None + + #: Analogous to :attr:`.v21.MetadataSet.published_by`. + provided_by: Optional[MetadataProvider] = None + + attaches_to: List[TargetIdentifiableObject] = field(default_factory=list) + + metadata: List[MetadataAttributeValue] = field(default_factory=list) + # §8: Hierarchy From 982d084d28b790969a61601ccf28ed5878c25df8 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:46:39 -0500 Subject: [PATCH 043/103] Add StructureMessage.metadatastructure --- sdmx/message.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sdmx/message.py b/sdmx/message.py index 7cafade22..708b3c939 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -204,7 +204,12 @@ class StructureMessage(Message): constraint: DictLikeDescriptor[str, model.BaseConstraint] = DictLikeDescriptor() #: Collection of :class:`Dataflow(Definition) <.BaseDataflow>`. dataflow: DictLikeDescriptor[str, model.BaseDataflow] = DictLikeDescriptor() - #: Collection of :class:`MetaDataflow(Definition) <.BaseMetaDataflow>`. + #: Collection of :class:`MetadataStructureDefinition + #: <.BaseMetadataStructureDefinition>`. + metadatastructure: DictLikeDescriptor[ + str, model.BaseMetadataStructureDefinition + ] = DictLikeDescriptor() + #: Collection of :class:`Metadataflow(Definition) <.BaseMetadataflow>`. metadataflow: DictLikeDescriptor[str, model.BaseMetadataflow] = DictLikeDescriptor() #: Collection of :class:`DataStructureDefinition <.BaseDataStructureDefinition>`. structure: DictLikeDescriptor[ From b2823bf0ae60e215a0f404a81e0d88275ef33916 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:51:59 -0500 Subject: [PATCH 044/103] Improve common.Structure, v21.MetadataStructureDefinition - Construct .grouping dynamically from subclass fields. - Add .replace_grouping(). - Add .compare(), replacing BaseDataStructureDefinition.compare() and usable for MetadataStructureDefinition. - Add v21.MetadataStructureDefinition.{report_structure,target} --- sdmx/model/common.py | 59 +++++++++++++++++++++++++++++--------------- sdmx/model/v21.py | 12 ++++----- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 25e9e48dd..80405c0cf 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -21,7 +21,7 @@ from abc import ABC, abstractmethod from collections import ChainMap from copy import copy -from dataclasses import InitVar, dataclass, field +from dataclasses import InitVar, dataclass, field, fields from datetime import date, datetime, timedelta from enum import Enum from functools import lru_cache @@ -45,6 +45,7 @@ TypeVar, Union, get_args, + get_origin, ) from sdmx.dictlike import DictLikeDescriptor @@ -1098,8 +1099,42 @@ class DataProviderScheme(OrganisationScheme[DataProvider]): @dataclass(repr=False) class Structure(MaintainableArtefact): - #: - grouping: Optional[ComponentList] = None + @property + def grouping(self) -> Sequence[ComponentList]: + """A collection of all the ComponentLists associated with a subclass.""" + result: List[ComponentList] = [] + for f in fields(self): + if isinstance(f.type, ComponentList): + result.append(getattr(self.f.name)) + return result + + def replace_grouping(self, cl: ComponentList) -> None: + """Replace existing component list with `cl`.""" + field = None + for f in fields(self): + is_dictlike = get_origin(f.type) is DictLikeDescriptor + if f.type == type(cl) or (is_dictlike and get_args(f.type)[1] is type(cl)): + field = f + break + + if not field: + raise TypeError(f"No grouping of type {type(cl)} on {type(self)}") + + if is_dictlike: + getattr(self, field.name).setdefault(cl.id, cl) + else: + setattr(self, field.name, cl) + + def compare(self, other: "Structure", strict: bool = True) -> bool: + from operator import attrgetter + + return all( + s.compare(o, strict) + for s, o in zip( + sorted(self.grouping, key=attrgetter("id")), + sorted(other.grouping, key=attrgetter("id")), + ) + ) class StructureUsage(MaintainableArtefact): @@ -1258,6 +1293,7 @@ class BaseDataStructureDefinition(Structure, ConstrainableArtefact): str, GroupDimensionDescriptor ] = DictLikeDescriptor() + # Specific types to be used in concrete subclasses MemberValue: ClassVar[Type["BaseMemberValue"]] MemberSelection: ClassVar[Type["BaseMemberSelection"]] ConstraintType: ClassVar[Type[BaseConstraint]] @@ -1496,23 +1532,6 @@ def dim(id): # noqa: F811 return key - def compare(self, other, strict=True): - """Return :obj:`True` if `self` is the same as `other`. - - Two DataStructureDefinitions are the same if each of :attr:`attributes`, - :attr:`dimensions`, :attr:`measures`, and :attr:`group_dimensions` compares - equal. - - Parameters - ---------- - strict : bool, optional - Passed to :meth:`.ComponentList.compare`. - """ - return all( - getattr(self, attr).compare(getattr(other, attr), strict) - for attr in ("attributes", "dimensions", "measures", "group_dimensions") - ) - @dataclass(repr=False) class BaseDataflow(StructureUsage, ConstrainableArtefact): diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index d9cd421cd..dcbc67978 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -325,15 +325,15 @@ class ReportStructure(ComponentList): _Component = common.MetadataAttribute +@dataclass +@common.MaintainableArtefact._preserve("hash") class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): """SDMX 2.1 MetadataStructureDefinition.""" - # NB narrows the type of common.Structure.grouping - #: .. note:: SDMX 2.1 IM (2011-08), in Figure 28, gives the cardinality of this - #: association as "1..*", but the text (§7.4.3.2) reads "An association to a - #: [singular] Metadata Target or Report Structure." This implementation follows - #: the latter, which is consistent with the typing of :class:`.common.Structure`. - grouping: Optional[Union[MetadataTarget, ReportStructure]] = None + report_structure: ReportStructure = field(default_factory=ReportStructure) + + #: Association to 1 or more :class:`.MetadataTarget` + target: DictLikeDescriptor[str, MetadataTarget] = DictLikeDescriptor() class MetadataflowDefinition(common.BaseMetadataflow): From ece0051f47e50b7144957646780a3f3ff2ec0061 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:52:45 -0500 Subject: [PATCH 045/103] Ensure .v30.MetadataStructureDefinition is hashable --- sdmx/model/v30.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index 5e58b636f..cc585cca0 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -386,6 +386,8 @@ class IdentifiableObjectSelection: pass +@dataclass +@MaintainableArtefact._preserve("hash") class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): """SDMX 3.0 MetadataStructureDefinition.""" From 2213e6221e4b2ed5724471aa51b771f97f9f61d3 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:53:17 -0500 Subject: [PATCH 046/103] Update .model.v30 ClassFinder --- sdmx/model/v30.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index cc585cca0..c0e323e3a 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -514,7 +514,13 @@ class HierarchyAssociation(MaintainableArtefact): linked_hierarchy: Optional[Hierarchy] = None -CF = common.ClassFinder(__name__, parent_map={Measure: MeasureDescriptor}) +CF = common.ClassFinder( + __name__, + parent_map={ + Measure: MeasureDescriptor, + common.MetadataAttribute: MetadataAttributeDescriptor, + }, +) get_class = CF.get_class parent_class = CF.parent_class __dir__ = CF.dir From 09b681c46dc872206baafeda88932369f91f4a9d Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:53:38 -0500 Subject: [PATCH 047/103] Add .v21.ReportStructure.report_for --- sdmx/model/v21.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index dcbc67978..c46115a4c 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -319,11 +319,14 @@ class MetadataTarget(ComponentList): _Component = TargetObject +@dataclass class ReportStructure(ComponentList): """SDMX 2.1 ReportStructure.""" _Component = common.MetadataAttribute + report_for: List[MetadataTarget] = field(default_factory=list) + @dataclass @common.MaintainableArtefact._preserve("hash") @@ -443,6 +446,7 @@ def __repr__(self) -> str: parent_map={ common.HierarchicalCode: Hierarchy, PrimaryMeasure: MeasureDescriptor, + MetadataTarget: MetadataStructureDefinition, }, ) get_class = CF.get_class From 86998d2717d4e4f8a64c52a31938a218d7f04fd1 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:54:01 -0500 Subject: [PATCH 048/103] Update .format.xml --- sdmx/format/xml/v21.py | 5 +++++ sdmx/format/xml/v30.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/sdmx/format/xml/v21.py b/sdmx/format/xml/v21.py index 8e62d009f..18e8b04f3 100644 --- a/sdmx/format/xml/v21.py +++ b/sdmx/format/xml/v21.py @@ -12,6 +12,7 @@ ("message.DataMessage", "mes:StructureSpecificTimeSeriesData"), ("model.NoSpecifiedRelationship", "str:None"), ("model.DataflowDefinition", "str:Dataflow"), + ("model.DimensionDescriptorValuesTarget", "str:KeyDescriptorValuesTarget"), ("model.MetadataflowDefinition", "str:Metadataflow"), ] + [ @@ -20,8 +21,12 @@ ContentConstraint HierarchicalCodelist Hierarchy + IdentifiableObjectTarget MeasureDimension + MetadataAttribute + MetadataTarget PrimaryMeasure + ReportStructure """.split() ], ) diff --git a/sdmx/format/xml/v30.py b/sdmx/format/xml/v30.py index 7b105d67b..014637f21 100644 --- a/sdmx/format/xml/v30.py +++ b/sdmx/format/xml/v30.py @@ -10,6 +10,7 @@ ("model.DataflowRelationship", "str:None"), ("model.ObservationRelationship", "str:Observation"), ("model.Dataflow", "str:Dataflow"), + ("model.MetadataAttributeDescriptor", "str:MetadataAttributeList"), ("model.Metadataflow", "str:Metadataflow"), ] + [ @@ -25,6 +26,7 @@ GeoGridCodelist Hierarchy Measure + MetadataAttribute MetadataConstraint ValueItem ValueList From 69cb498c445bb39d70dfffc6ea6461675e5d8f40 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:56:49 -0500 Subject: [PATCH 049/103] Read {Metad,D}ataStructureDefinition with 1 method --- sdmx/reader/xml/v21.py | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 10c5dc20b..17bc30d2a 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1502,29 +1502,30 @@ def _ar(reader, elem): return common.GroupRelationship(**args) -@start("str:DataStructure", only=False) +@start("str:DataStructure str:MetadataStructure", only=False) @possible_reference() # in -def _dsd_start(reader: Reader, elem): +def _structure_start(reader: Reader, elem): # Get any external reference created earlier, or instantiate a new object - dsd = reader.maintainable(reader.model.DataStructureDefinition, elem) + cls = reader.class_for_tag(elem.tag) + obj = reader.maintainable(cls, elem) - if dsd not in reader.stack[reader.model.DataStructureDefinition]: + if obj not in reader.stack[cls]: # A new object was created - reader.push(dsd) + reader.push(obj) # Store a separate reference to the current DSD - reader.push("current DSD", dsd) + reader.push("current DSD", obj) -@end("str:DataStructure", only=False) -def _dsd_end(reader, elem): - dsd = reader.pop_single("current DSD") +@end("str:DataStructure str:MetadataStructure", only=False) +def _structure_end(reader, elem): + obj = reader.pop_single("current DSD") - if dsd: + if obj: # Collect annotations, name, and description - dsd.annotations = list(reader.pop_all(model.Annotation)) - add_localizations(dsd.name, reader.pop_all("Name")) - add_localizations(dsd.description, reader.pop_all("Description")) + obj.annotations = list(reader.pop_all(model.Annotation)) + add_localizations(obj.name, reader.pop_all("Name")) + add_localizations(obj.description, reader.pop_all("Description")) @end("str:Dataflow str:Metadataflow") @@ -1741,18 +1742,6 @@ def _ds_end(reader, elem): # §7.3: Metadata Structure Definition -@end("str:MetadataTarget") -def _mdt(reader: Reader, elem): # pragma: no cover - raise NotImplementedError - - -@end("str:MetadataStructure") -def _msd(reader: Reader, elem): # pragma: no cover - cls = reader.class_for_tag(elem) - log.warning(f"Not parsed: {elem.tag} -> {cls}") - return NotImplemented - - # §8: Hierarchical Code List From 821d7ab9b2beb964b3dafb18d82138e304089bc5 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 17:57:41 -0500 Subject: [PATCH 050/103] Extend XML tags handled by .v21._component() --- sdmx/reader/xml/v21.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 17bc30d2a..13d8ac630 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1139,7 +1139,8 @@ def _concept(reader, elem): @end( """ - str:Attribute str:Dimension str:GroupDimension str:MeasureDimension + str:Attribute str:Dimension str:GroupDimension str:IdentifiableObjectTarget + str:KeyDescriptorValuesTarget str:MeasureDimension str:MetadataAttribute str:PrimaryMeasure str:TimeDimension """ ) From 165771fc39d32201f5099f93a93ac0db6765cd34 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 18:01:12 -0500 Subject: [PATCH 051/103] Use Structure.replace_grouping(); handle metadata ComponentLists --- sdmx/reader/xml/v21.py | 50 +++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 13d8ac630..9acd82e61 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -36,7 +36,7 @@ from sdmx import message from sdmx.exceptions import XMLParseError # noqa: F401 from sdmx.format import Version, list_media_types -from sdmx.model import common +from sdmx.model import common, v21 from sdmx.model import v21 as model from sdmx.reader.base import BaseReader @@ -1186,30 +1186,43 @@ def _component(reader: Reader, elem): return reader.identifiable(cls, elem, **args) -@end("str:AttributeList str:DimensionList str:Group str:MeasureList") +@end( + """ + str:AttributeList str:DimensionList str:Group str:MetadataTarget str:MeasureList + str:ReportStructure + """ +) @possible_reference(cls_hint=model.GroupDimensionDescriptor) # def _cl(reader: Reader, elem): - # Retrieve the DSD - dsd = reader.peek("current DSD") + # Retrieve the DSD (or MSD) + dsd: common.Structure = reader.peek("current DSD") assert dsd is not None - # Retrieve the components - args = dict(components=reader.pop_all(model.Component, subclass=True)) - # Determine the class - localname = QName(elem).localname - if localname == "Group": - cls: Type = model.GroupDimensionDescriptor + cls = reader.class_for_tag(elem.tag) + args = dict( + # Retrieve the components + components=reader.pop_all(model.Component, subclass=True), + # SDMX-ML spec for, e.g. DimensionList: "The id attribute is provided in this + # case for completeness. However, its value is fixed to 'DimensionDescriptor'." + id=elem.attrib.get("id", cls.__name__), + ) + + if cls is common.GroupDimensionDescriptor: + assert isinstance(dsd, common.BaseDataStructureDefinition) # Replace components with references args["components"] = [ dsd.dimensions.get(ref.target_id) for ref in reader.pop_all("DimensionReference") ] + elif cls is v21.ReportStructure: + assert isinstance(dsd, v21.MetadataStructureDefinition) + # Assemble MetadataTarget references for the `report_for` field + args["report_for"] = list() + for target_ref in reader.pop_all(reader.Reference): + args["report_for"].append(dsd.target[target_ref.id]) else: - # SDMX-ML spec for, e.g. DimensionList: "The id attribute is provided in this - # case for completeness. However, its value is fixed to 'DimensionDescriptor'." - cls = reader.class_for_tag(elem.tag) args["id"] = elem.attrib.get("id", cls.__name__) cl = reader.identifiable(cls, elem, **args) @@ -1223,16 +1236,7 @@ def _cl(reader: Reader, elem): # Assign to the DSD eagerly (instead of in _dsd_end()) for reference by next # ComponentList e.g. so that AttributeRelationship can reference the # DimensionDescriptor - attr = { - common.DimensionDescriptor: "dimensions", - common.AttributeDescriptor: "attributes", - reader.model.MeasureDescriptor: "measures", - common.GroupDimensionDescriptor: "group_dimensions", - }[cl.__class__] - if attr == "group_dimensions": - getattr(dsd, attr)[cl.id] = cl - else: - setattr(dsd, attr, cl) + dsd.replace_grouping(cl) # §4.5: Category Scheme From cc6b191bbba751c44eb514a9f70b381e1bdf75ef Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 18:01:38 -0500 Subject: [PATCH 052/103] Read SDMX-ML 3.0 metadata structures --- sdmx/reader/xml/v30.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v30.py b/sdmx/reader/xml/v30.py index 2c1e52963..7b9358043 100644 --- a/sdmx/reader/xml/v30.py +++ b/sdmx/reader/xml/v30.py @@ -71,7 +71,8 @@ class Reader(v21.Reader): v21._item_start ) end("str:GeoFeatureSetCode str:GeoGridCode str:ValueItem", only=False)(v21._item_end) -end("str:Measure")(v21._component) +end("str:Measure str:MetadataAttribute")(v21._component) +end("str:MetadataAttributeList")(v21._cl) end("str:DataConstraint")(v21._cc) end("str:KeyValue")(v21._ms) end("str:Observation")(v21._ar_kind) From 98c1b69365318c1fc5ba7a17c8b698063d7ba45d Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 18:02:13 -0500 Subject: [PATCH 053/103] Remove blanket Xfail of metadatastructure queries --- sdmx/tests/test_sources.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 2fe3534a7..8c6e29831 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -33,9 +33,7 @@ class DataSourceTest: source_id: str #: Failures affecting **all** data sources, internal to :mod:`sdmx`. - xfail_common = { - "metadatastructure": (XMLParseError, NI), # not parsed - } + xfail_common: Dict[str, Any] = {} #: Mapping of endpoint → Exception subclass. Tests of these endpoints are expected #: to fail with the given kind of exception. From 607fc249629f8161eb92c8fb8674a71699f6f1f5 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 29 Dec 2023 18:04:14 -0500 Subject: [PATCH 054/103] Add 5 specimens of SDMX-ML 2.1 and 3.0 metadata structures --- sdmx/testing/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 23c756e8b..73817e901 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -267,6 +267,7 @@ def __init__(self, base_path): ("ESTAT", "GOV_10Q_GGNFA.xml"), ("ESTAT", "HCL_WSTATUS_SCL_BNSPART.xml"), ("ESTAT", "HCL_WSTATUS_SCL_WSTATUSPR.xml"), + ("IAEG-SDGs", "metadatastructure-0.xml"), ("IMF", "1PI-structure.xml"), ("IMF", "CL_AREA-structure.xml"), # Manually reduced subset of the response for this DSD. Test for @@ -280,13 +281,17 @@ def __init__(self, base_path): ("ISTAT", "22_289-structure.xml"), ("ISTAT", "47_850-structure.xml"), ("ISTAT", "actualconstraint-0.xml"), + ("ISTAT", "metadatastructure-0.xml"), ("OECD", "actualconstraint-0.xml"), + ("OECD", "metadatastructure-0.xml"), ("UNICEF", "GLOBAL_DATAFLOW-structure.xml"), ("UNSD", "codelist_partial.xml"), ("SDMX", "HCL_TEST_AREA.xml"), ("SGR", "common-structure.xml"), ("SGR", "hierarchicalcodelist-0.xml"), + ("SGR", "metadatastructure-0.xml"), ("SPC", "actualconstraint-0.xml"), + ("SPC", "metadatastructure-0.xml"), ("TEST", "gh-142.xml"), ("TEST", "gh-149.xml"), ] From 1f0594ed9e2e4156fe275a915af574f722b9c4ca Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:29:37 -0500 Subject: [PATCH 055/103] Update sources.json per metadataflow support --- sdmx/sources.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdmx/sources.json b/sdmx/sources.json index 7dbd10b2e..7fbf83823 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -75,6 +75,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadataflow": false, "metadatastructure": false, "structure": false, "structureset": false @@ -105,6 +106,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadataflow": false, "metadatastructure": false, "structure": false, "structureset": false @@ -139,6 +141,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadataflow": false, "metadatastructure": false, "structure": false, "structureset": false @@ -153,6 +156,7 @@ "allowedconstraint": false, "contentconstraint": false, "hierarchicalcodelist": false, + "metadataflow": false, "metadatastructure": false, "structure": false, "structureset": false From ae895e2544965f55fd5e36c047b87cc0d34bea5b Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:30:04 -0500 Subject: [PATCH 056/103] Update docstring of Resource class --- sdmx/rest.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sdmx/rest.py b/sdmx/rest.py index bc58cf501..660f9b62c 100644 --- a/sdmx/rest.py +++ b/sdmx/rest.py @@ -48,30 +48,30 @@ class Resource(str, Enum): ``codelist`` :class:`.Codelist` ``conceptscheme`` :class:`.ConceptScheme` ``contentconstraint`` :class:`.ContentConstraint` + ``customtypescheme`` :class:`.CustomTypeScheme`. ``data`` :class:`.DataSet` ``dataflow`` :class:`Dataflow(Definition) <.BaseDataflow>` ``dataconsumerscheme`` :class:`.DataConsumerScheme` ``dataproviderscheme`` :class:`.DataProviderScheme` ``datastructure`` :class:`DataStructureDefinition <.BaseDataStructureDefinition>` + ``hierarchicalcodelist`` :class:`.v21.HierarchicalCodelist`. + ``metadata`` :class:`MetadataSet <.BaseMetadataSet>`. ``metadataflow`` :class:`Metadataflow(Definition) <.Metadataflow>` ``metadatastructure`` :class:`MetadataStructureDefinition <.BaseMetadataStructureDefinition>` + ``namepersonalisationscheme`` :class:`.NamePersonalisationScheme`. ``organisationscheme`` :class:`.OrganisationScheme` ``provisionagreement`` :class:`.ProvisionAgreement` + ``rulesetscheme`` :class:`.RulesetScheme`. ``structure`` Mixed. + ``structureset`` :class:`.StructureSet`. + ``transformationscheme`` :class:`.TransformationScheme`. + ``userdefinedoperatorscheme`` :class:`.UserdefinedoperatorScheme`. + ``vtlmappingscheme`` :class:`.VTLMappingScheme`. ----------------------------- ------------------------------------------------------ - ``customtypescheme`` Not implemented. - ``hierarchicalcodelist`` Not implemented. - ``metadata`` Not implemented. - ``namepersonalisationscheme`` Not implemented. ``organisationunitscheme`` Not implemented. ``process`` Not implemented. ``reportingtaxonomy`` Not implemented. - ``rulesetscheme`` Not implemented. ``schema`` Not implemented. - ``structureset`` Not implemented. - ``transformationscheme`` Not implemented. - ``userdefinedoperatorscheme`` Not implemented. - ``vtlmappingscheme`` Not implemented. ============================= ====================================================== """ # noqa: E501 From 415b44ac411627982e2583a492e452e132166a1e Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:32:58 -0500 Subject: [PATCH 057/103] Ensure MetadataflowDefinition is a dataclass --- sdmx/model/v21.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index c46115a4c..642e1ec74 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -339,11 +339,12 @@ class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): target: DictLikeDescriptor[str, MetadataTarget] = DictLikeDescriptor() +@dataclass class MetadataflowDefinition(common.BaseMetadataflow): """SDMX 2.1 MetadataflowDefinition.""" # NB narrows the type of common.StructureUsage.structure - structure: MetadataStructureDefinition + structure: Optional[MetadataStructureDefinition] = None # §7.4: Metadata Set From 4ae4655baa2654a3ab789bbf2d9179940ce7d1ea Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:33:33 -0500 Subject: [PATCH 058/103] Add .model.v21.TargetObject{Value,Key} --- sdmx/model/v21.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 642e1ec74..959079f1f 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -350,6 +350,26 @@ class MetadataflowDefinition(common.BaseMetadataflow): # §7.4: Metadata Set +@dataclass +class TargetObjectValue: + value_for: TargetObject + + +@dataclass +class TargetReportPeriod(TargetObjectValue): + report_period: str + + +@dataclass +class TargetIdentifiableObject(TargetObjectValue): + obj: IdentifiableArtefact + + +@dataclass +class TargetObjectKey: + key_values: DictLikeDescriptor[str, TargetObjectValue] = DictLikeDescriptor() + + @dataclass class ReportedAttribute: """SDMX 2.1 ReportedAttribute. From 5f7b081f92749f8136ac4398f6538dc646414938 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:34:11 -0500 Subject: [PATCH 059/103] Add ReportedAttribute.__{getitem,len}__() --- sdmx/model/v21.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 959079f1f..3d590df41 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -381,6 +381,12 @@ class ReportedAttribute: parent: Optional["ReportedAttribute"] = None child: List["ReportedAttribute"] = field(default_factory=list) + def __getitem__(self, index: int) -> "ReportedAttribute": + return self.child[index] + + def __len__(self) -> int: + return len(self.child) + class EnumeratedAttributeValue(ReportedAttribute): """SDMX 2.1 EnumeratedAttributeValue. From e496239dbb931ae184470b6114b4dd9a7df10c8b Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:34:40 -0500 Subject: [PATCH 060/103] Add .v21.XHTMLAttributeValue.value --- sdmx/model/v21.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 3d590df41..f0603063a 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -414,8 +414,9 @@ class TextAttributeValue(NonEnumeratedAttributeValue, common.BaseTextAttributeVa pass +@dataclass class XHTMLAttributeValue(NonEnumeratedAttributeValue, common.BaseXHTMLAttributeValue): - pass + value: str @dataclass From cd22574a4f106afc4a2a9bf731079db579442d35 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:35:00 -0500 Subject: [PATCH 061/103] Add .v21.MetadataReport.attaches_to --- sdmx/model/v21.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index f0603063a..68f266242 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -423,6 +423,7 @@ class XHTMLAttributeValue(NonEnumeratedAttributeValue, common.BaseXHTMLAttribute class MetadataReport: metadata: List[ReportedAttribute] = field(default_factory=list) target: Optional[MetadataTarget] = None + attaches_to: Optional[TargetObjectKey] = None @dataclass From 6e440e1a701c7ff022edf8dae65e3e84347431d6 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:38:12 -0500 Subject: [PATCH 062/103] Add MetadataMessage Currently this is an identical subclass of DataMessage. --- sdmx/format/xml/common.py | 2 ++ sdmx/message.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 776e7e5bf..38c95ae40 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -67,6 +67,8 @@ ("model.VTLMappingScheme", "str:VtlMappingScheme"), # Message classes ("message.DataMessage", "mes:StructureSpecificData"), + ("message.MetadataMessage", "mes:GenericMetadata"), + ("message.MetadataMessage", "mes:StructureSpecificMetadata"), ("message.ErrorMessage", "mes:Error"), ("message.StructureMessage", "mes:Structure"), ] diff --git a/sdmx/message.py b/sdmx/message.py index 708b3c939..5839c776c 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -427,3 +427,8 @@ def compare(self, other, strict=True): and len(self.data) == len(other.data) and all(ds[0].compare(ds[1], strict) for ds in zip(self.data, other.data)) ) + + +@dataclass +class MetadataMessage(DataMessage): + pass From f11f8dee56ddac1069572cd458c6e9bcbfeca8a1 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:38:41 -0500 Subject: [PATCH 063/103] Add "md" XML namespace for /metadata/generic --- sdmx/format/xml/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 38c95ae40..f0c4a80fc 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -79,6 +79,7 @@ "xsi": "http://www.w3.org/2001/XMLSchema-instance", # To be formatted "com": "{}/common", + "md": "{}/metadata/generic", "data": "{}/data/structurespecific", "str": "{}/structure", "mes": "{}/message", From 4e574df86e841a62cb853893543f798b3058ab67 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:39:14 -0500 Subject: [PATCH 064/103] Extend .format.xml --- sdmx/format/xml/common.py | 7 +++---- sdmx/format/xml/v21.py | 10 ++++++++++ sdmx/format/xml/v30.py | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index f0c4a80fc..b6234f261 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -44,17 +44,14 @@ ("model.Agency", "str:Agency"), # Order matters ("model.Agency", "mes:Receiver"), ("model.Agency", "mes:Sender"), + ("model.AttributeDescriptor", "str:AttributeList"), ("model.Concept", "str:ConceptIdentity"), ("model.Codelist", "str:Enumeration"), # This could possibly be ItemScheme ("model.Dimension", "str:Dimension"), # Order matters ("model.Dimension", "str:DimensionReference"), ("model.Dimension", "str:GroupDimension"), - ("model.StructureUsage", "com:StructureUsage"), - ("model.AttributeDescriptor", "str:AttributeList"), ("model.DataAttribute", "str:Attribute"), ("model.DataStructureDefinition", "str:DataStructure"), - ("model.DataStructureDefinition", "com:Structure"), - ("model.DataStructureDefinition", "str:Structure"), ("model.DimensionDescriptor", "str:DimensionList"), ("model.GroupDimensionDescriptor", "str:Group"), ("model.GroupDimensionDescriptor", "str:AttachmentGroup"), @@ -63,6 +60,8 @@ ("model.MeasureDescriptor", "str:MeasureList"), ("model.MetadataStructureDefinition", "str:MetadataStructure"), ("model.SeriesKey", "gen:SeriesKey"), + ("model.Structure", "com:Structure"), + ("model.Structure", "str:Structure"), ("model.StructureUsage", "com:StructureUsage"), ("model.VTLMappingScheme", "str:VtlMappingScheme"), # Message classes diff --git a/sdmx/format/xml/v21.py b/sdmx/format/xml/v21.py index 18e8b04f3..a47f547be 100644 --- a/sdmx/format/xml/v21.py +++ b/sdmx/format/xml/v21.py @@ -14,6 +14,16 @@ ("model.DataflowDefinition", "str:Dataflow"), ("model.DimensionDescriptorValuesTarget", "str:KeyDescriptorValuesTarget"), ("model.MetadataflowDefinition", "str:Metadataflow"), + ("model.MetadataSet", "mes:MetadataSet"), + ("model.ReportedAttribute", "md:ReportedAttribute"), + ("model.TargetIdentifiableObject", ":ObjectReference"), + ("model.TargetIdentifiableObject", "md:ObjectReference"), + ("model.TargetObjectKey", ":Target"), + ("model.TargetObjectKey", "md:Target"), + ("model.TargetReportPeriod", ":ReportPeriod"), + ("model.TargetReportPeriod", "md:ReportPeriod"), + ("model.MetadataReport", ":Report"), + ("model.MetadataReport", "md:Report"), ] + [ (f"model.{name}", f"str:{name}") diff --git a/sdmx/format/xml/v30.py b/sdmx/format/xml/v30.py index 014637f21..c3c982594 100644 --- a/sdmx/format/xml/v30.py +++ b/sdmx/format/xml/v30.py @@ -12,6 +12,7 @@ ("model.Dataflow", "str:Dataflow"), ("model.MetadataAttributeDescriptor", "str:MetadataAttributeList"), ("model.Metadataflow", "str:Metadataflow"), + ("model.MetadataSet", "mes:MetadataSet"), ] + [ (f"model.{name}", f"str:{name}") From 1e921a0617fb413135c586a0303d8aaff6607534 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:39:35 -0500 Subject: [PATCH 065/103] BaseMetadataSet attribute values are optional --- sdmx/model/common.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 80405c0cf..e0f091a18 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -2127,13 +2127,13 @@ class BaseXHTMLAttributeValue: class BaseMetadataSet: """ABC for SDMX 2.1 and 3.0 MetadataSet.""" - action: ActionType + action: Optional[ActionType] = None - reporting_begin: date - reporting_end: date + reporting_begin: Optional[date] = None + reporting_end: Optional[date] = None - publication_period: date - publication_year: date + publication_period: Optional[date] = None + publication_year: Optional[date] = None # SDMX 2.1 §8: Hierarchical Code List From f2da35895988410045e32d2a1edea9389a95d295 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:40:09 -0500 Subject: [PATCH 066/103] Don't parse through XHTML embedded in XML --- sdmx/reader/xml/v21.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 9acd82e61..b67678b80 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -266,6 +266,8 @@ def read_message( # Retrieve the parsing function for this element & event func = self.parser[element.tag, event] except KeyError: # pragma: no cover + if QName(element.tag).namespace == "http://www.w3.org/1999/xhtml": + continue # Don't know what to do for this (element, event) raise NotImplementedError(element.tag, event) from None From 168da3ad02e20a7ba4769c9efe037214503f1a17 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:42:49 -0500 Subject: [PATCH 067/103] Add metadata elements to .xml.v21 parsers --- sdmx/reader/xml/v21.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index b67678b80..753eed379 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -689,6 +689,7 @@ def maintainable(self, cls, elem, **kwargs): "gen:ObsDimension gen:ObsValue gen:Value " # Tags that are bare containers for other XML elements """ + :AttributeSet md:AttributeSet str:Categorisations str:CategorySchemes str:Codelists str:Concepts str:ConstraintAttachment str:Constraints str:CustomTypes str:Dataflows str:DataStructureComponents str:DataStructures str:FromVtlSuperSpace @@ -708,7 +709,8 @@ def maintainable(self, cls, elem, **kwargs): @start( """ - mes:Error mes:GenericData mes:GenericTimeSeriesData mes:StructureSpecificData + mes:Error mes:GenericData mes:GenericMetadata mes:GenericTimeSeriesData + mes:StructureSpecificData mes:StructureSpecificMetadata mes:StructureSpecificTimeSeriesData """ ) @@ -730,13 +732,13 @@ def _message(reader: Reader, elem): log.warning(f"xml.Reader got no dsd=… argument for {QName(elem).localname}") ss_without_dsd = True elif "StructureSpecific" not in elem.tag and reader.get_single( - common.BaseDataStructureDefinition + common.BaseDataStructureDefinition, subclass=True ): log.info("Use supplied dsd=… argument for non–structure-specific message") # Store values for other methods reader.push("SS without DSD", ss_without_dsd) - if "Data" in elem.tag: + if elem.tag.endswith("Data"): reader.push("DataSetClass", model.get_class(f"{QName(elem).localname}Set")) # Handle namespaces mapped on `elem` but not part of the standard set @@ -921,11 +923,11 @@ def _structures(reader, elem): @end( """ - com:AnnotationTitle com:AnnotationType com:AnnotationURL com:None com:URN - com:Value mes:DataSetAction mes:DataSetID mes:Email mes:ID mes:Test mes:Timezone - str:CodelistAliasRef str:DataType str:Email str:Expression str:NullValue - str:OperatorDefinition str:PersonalisedName str:Result str:RulesetDefinition - str:Telephone str:URI str:VtlDefaultName str:VtlScalarType + com:AnnotationTitle com:AnnotationType com:AnnotationURL com:None com:URN com:Value + mes:DataSetAction :ReportPeriod md:ReportPeriod mes:DataSetID mes:Email mes:ID + mes:Test mes:Timezone str:CodelistAliasRef str:DataType str:Email str:Expression + str:NullValue str:OperatorDefinition str:PersonalisedName str:Result + str:RulesetDefinition str:Telephone str:URI str:VtlDefaultName str:VtlScalarType """ ) def _text(reader, elem): @@ -957,9 +959,10 @@ def _localization(reader, elem): @end( """ - com:Structure com:StructureUsage str:AttachmentGroup str:CodeID str:ConceptIdentity - str:ConceptRole str:DimensionReference str:Enumeration - str:Parent str:Source str:Structure str:StructureUsage str:Target + com:Structure com:StructureUsage :ObjectReference md:ObjectReference + str:AttachmentGroup str:CodeID str:ConceptIdentity str:ConceptRole + str:DimensionReference str:Enumeration str:Parent str:Source str:Structure + str:StructureUsage str:Target """ ) def _ref(reader: Reader, elem): From dac74bacc29b3ceda83daed09ac6df57be6297be Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:51:02 -0500 Subject: [PATCH 068/103] Add (Metad,D}ataMessage.structure_type property --- sdmx/message.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sdmx/message.py b/sdmx/message.py index 5839c776c..514598b92 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -10,12 +10,12 @@ from dataclasses import dataclass, field, fields from datetime import datetime from operator import attrgetter -from typing import TYPE_CHECKING, List, Optional, Text, Union, get_args +from typing import TYPE_CHECKING, List, Optional, Text, Type, Union, get_args from sdmx import model from sdmx.dictlike import DictLike, DictLikeDescriptor, summarize_dictlike from sdmx.format import Version -from sdmx.model import v21, v30 +from sdmx.model import common, v21, v30 from sdmx.model.internationalstring import ( InternationalString, InternationalStringDescriptor, @@ -394,6 +394,14 @@ def structure(self): """DataStructureDefinition used in the :attr:`dataflow`.""" return self.dataflow.structure + @property + def structure_type(self) -> Type[common.Structure]: + """:class:`.Structure` subtype describing the contained (meta)data.""" + return { + Version["2.1"]: v21.DataStructureDefinition, + Version["3.0.0"]: v30.DataStructureDefinition, + }[self.version] + def __repr__(self): """String representation.""" lines = [super().__repr__()] @@ -431,4 +439,9 @@ def compare(self, other, strict=True): @dataclass class MetadataMessage(DataMessage): - pass + @property + def structure_type(self) -> Type[common.Structure]: + return { + Version["2.1"]: v21.MetadataStructureDefinition, + Version["3.0.0"]: v30.MetadataStructureDefinition, + }[self.version] From 6db4689a38bb67d686d2cfa21568ec753a955c4b Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:51:57 -0500 Subject: [PATCH 069/103] Generalize .v21._header_structure() for data/metadata --- sdmx/reader/xml/v21.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 753eed379..a96b49531 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -792,18 +792,19 @@ def _header_org(reader, elem): @end("mes:Structure", only=False) def _header_structure(reader, elem): - """ within of a DataMessage.""" + """ within of a {Metad,D}ataMessage.""" # The root node of a structure message is handled by _message(), above. if elem.getparent() is None: return - msg = reader.get_single(message.DataMessage) + msg = reader.get_single(message.DataMessage, subclass=True) + assert msg is not None # Retrieve a DSD supplied to the parser, e.g. for a structure specific message - provided_dsd = reader.get_single(common.BaseDataStructureDefinition, subclass=True) + provided_structure = reader.get_single(common.Structure, subclass=True) # Resolve the child to a DSD, maybe is_external_reference=True - header_dsd = reader.pop_resolved_ref("Structure") + header_structure = reader.pop_resolved_ref("Structure") # The header may give either a StructureUsage, or a specific reference to a subclass # like BaseDataflow. Resolve the child, if any, and remove it @@ -817,34 +818,34 @@ def _header_structure(reader, elem): # DSD to use: the provided one; the one referenced by ; or a # candidate constructed using the information contained in `header_su` (if any) - dsd = provided_dsd or ( + structure = provided_structure or ( reader.maintainable( - reader.model.DataStructureDefinition, + msg.structure_type, None, id=header_su.id, maintainer=header_su.maintainer, version=header_su.version, # NB this may not always be the case ) if header_su - else header_dsd + else header_structure ) - if header_dsd and header_su: + if header_structure and header_su: # Ensure the constructed candidate and the one given directly are equivalent - assert header_dsd == dsd - elif header_su and not provided_dsd: - reader.push(dsd) - elif dsd is None: + assert header_structure == structure + elif header_su and not provided_structure: + reader.push(structure) + elif structure is None: raise RuntimeError # Store on the data flow - msg.dataflow.structure = dsd + msg.dataflow.structure = structure # Store under the structure ID, so it can be looked up by that ID - reader.push(elem.attrib["structureID"], dsd) + reader.push(elem.attrib["structureID"], structure) # Store as an object that won't cause a parsing error if it is left over - reader.ignore.add(id(dsd)) + reader.ignore.add(id(structure)) try: # Information about the 'dimension at observation level' @@ -856,12 +857,12 @@ def _header_structure(reader, elem): if dim_at_obs == "AllDimensions": # Use a singleton object dim = model.AllDimensions - elif provided_dsd: + elif provided_structure: # Use existing dimension from the provided DSD - dim = dsd.dimensions.get(dim_at_obs) + dim = structure.dimensions.get(dim_at_obs) else: # Force creation of the 'dimension at observation' level - dim = dsd.dimensions.getdefault( + dim = structure.dimensions.getdefault( dim_at_obs, cls=( model.TimeDimension From e46c7bf833b43220221ca38945f2ace567fb1be5 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:52:50 -0500 Subject: [PATCH 070/103] Parse as XHTML --- sdmx/reader/xml/v21.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index a96b49531..99b5c604b 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -936,6 +936,12 @@ def _text(reader, elem): reader.push(elem, elem.text or NoText) +@start("com:StructuredText") +def _st(reader, elem): + """Contained XHTML.""" + reader.push(elem, etree.tostring(elem[0], pretty_print=True)) + + @end("mes:Extracted mes:Prepared mes:ReportingBegin mes:ReportingEnd") def _datetime(reader, elem): text, n = re.subn(r"(.*\.)(\d{6})\d+(\+.*)", r"\1\2\3", elem.text) From 36c81ba365d010d05f6709560c794b493d5b8941 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:59:20 -0500 Subject: [PATCH 071/103] Check {Metad,D}ataMessage.structure_type in .v21._ref() --- sdmx/reader/xml/v21.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 99b5c604b..87b90d4cc 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -978,6 +978,10 @@ def _ref(reader: Reader, elem): if not cls_hint and QName(elem).localname in ("CodeID", "Parent", "Target"): # Use the *grand*-parent of the or for a class hint cls_hint = reader.class_for_tag(elem.getparent().tag) + elif not cls_hint and QName(elem).localname == "Structure": + # /: use message property for a class hint + msg = reader.get_single(message.DataMessage, subclass=True) + cls_hint = cast(Type[message.DataMessage], type(msg)).structure_type reader.push(QName(elem).localname, reader.reference(elem, cls_hint)) From d4a62b544082e5422e6c594bdd114f52aa036ea8 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 01:59:51 -0500 Subject: [PATCH 072/103] Update SDMX_ML_SUPPORTS[Resource.metadata] to True --- sdmx/source/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdmx/source/__init__.py b/sdmx/source/__init__.py index ac2ee0798..50048579f 100644 --- a/sdmx/source/__init__.py +++ b/sdmx/source/__init__.py @@ -21,10 +21,10 @@ #: endpoints that are described in the standards but are not implemented by any source #: currently in :file:`sources.json`; these all return 404. SDMX_ML_SUPPORTS = { - Resource.data: True, Resource.attachementconstraint: False, Resource.customtypescheme: False, - Resource.metadata: False, + Resource.data: True, + Resource.metadata: True, Resource.namepersonalisationscheme: False, Resource.organisationunitscheme: False, Resource.process: False, From e98a4cf72fe7f2b80e647d6eccbe817c6eb3b129 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 02:01:38 -0500 Subject: [PATCH 073/103] Parse SDMX-ML 2.1 and contents --- sdmx/reader/xml/v21.py | 113 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 87b90d4cc..918793a17 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1762,6 +1762,119 @@ def _ds_end(reader, elem): # §7.3: Metadata Structure Definition +# §7.4: Metadata Set + + +@start("mes:MetadataSet", only=False) +def _mds_start(reader, elem): + # Create an instance of a MetadataSet + mds = reader.class_for_tag(elem.tag)() + + # Retrieve the (message-local) ID referencing a data structure definition + id = elem.attrib.get("structureRef", None) or elem.attrib.get( + reader.qname("metadata:structureRef"), None + ) + + # Get a reference to the MSD that structures the data set + # Provided in the / + dsd = reader.get_single(id) + if not dsd: + # Fall back to a MSD provided as an argument to read_message() + dsd = reader.get_single(common.BaseMetadataStructureDefinition, subclass=True) + + if not dsd: # pragma: no cover + raise RuntimeError("No MSD when creating DataSet") + + log.debug( + f'Use provided {dsd!r} for structureRef="{id}" not defined in message' + ) + + mds.structured_by = dsd + + reader.push("MetadataSet", mds) + + +@end("mes:MetadataSet", only=False) +def _mds_end(reader, elem): + mds = reader.pop_single("MetadataSet") + + # Collect the contained MetadataReports + mds.report.extend(reader.pop_all(v21.MetadataReport)) + + # Add the data set to the message + reader.get_single(message.MetadataMessage).data.append(mds) + + +@end(":Report md:Report") +def _md_report(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + obj = cls( + attaches_to=reader.pop_single(model.TargetObjectKey), + metadata=reader.pop_all(model.ReportedAttribute, subclass=True), + ) + return obj + + +@end(":Target md:Target") +def _tov(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + obj = cls( + key_values={ + v.value_for: v for v in reader.pop_all(v21.TargetObjectValue, subclass=True) + } + ) + return obj + + +@end(":ReferenceValue md:ReferenceValue") +def _rv(reader: Reader, elem): + cls = reader.class_for_tag(elem[0].tag) + + mds = reader.get_single(common.BaseMetadataStructureDefinition, subclass=True) + + # TODO resolve the TargetObject + del mds + + args = dict(value_for=elem.attrib["id"]) + + if cls is v21.TargetReportPeriod: + args["report_period"] = reader.pop_single("ReportPeriod") + elif cls is model.TargetIdentifiableObject: + args["obj"] = reader.pop_single("ObjectReference") + + obj = cls(**args) + + return obj + + +@start("md:ReportedAttribute", only=False) +def _ra_generic_start(reader: Reader, elem): + # Avoid collecting previous/sibling ReportedAttribute as children of this one + reader.stash(model.ReportedAttribute) + + +@end("md:ReportedAttribute", only=False) +def _ra_generic_end(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + args = dict( + # Pop all child elements + child=reader.pop_all(cls, subclass=True), + value_for=elem.attrib["id"], + ) + + xhtml = reader.pop_single("StructuredText") + if xhtml: + cls = v21.XHTMLAttributeValue + args["value"] = xhtml + + obj = cls(**args) + + reader.unstash() + return obj + # §8: Hierarchical Code List From f2abcc4b69a9f3323251ff71e0f3d9b7fca64a93 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 02:03:27 -0500 Subject: [PATCH 074/103] Update test specimens --- sdmx/testing/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 73817e901..c70b72ba2 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -281,6 +281,7 @@ def __init__(self, base_path): ("ISTAT", "22_289-structure.xml"), ("ISTAT", "47_850-structure.xml"), ("ISTAT", "actualconstraint-0.xml"), + ("ISTAT", "metadataflow-0.xml"), ("ISTAT", "metadatastructure-0.xml"), ("OECD", "actualconstraint-0.xml"), ("OECD", "metadatastructure-0.xml"), From e916ba87ad41410c8cd21d57b1e2dbc9ab7278c4 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 02:04:00 -0500 Subject: [PATCH 075/103] Add tests of .v21.MetadataSet & parsing --- sdmx/tests/model/test_v21.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sdmx/tests/model/test_v21.py b/sdmx/tests/model/test_v21.py index d4abca1d0..5b57a8328 100644 --- a/sdmx/tests/model/test_v21.py +++ b/sdmx/tests/model/test_v21.py @@ -583,6 +583,27 @@ def test_init(self): assert ds0.action == ds1.action +class TestMetadataSet: + @pytest.fixture(scope="class") + def msg(self, specimen) -> sdmx.message.MetadataMessage: + with specimen("esms_generic.xml") as f: + return sdmx.read_sdmx(f) + + def test_report_hierarchy(self, msg: sdmx.message.MetadataMessage) -> None: + # Access message → metadata set → report + r = msg.data[0].report[0] + + # Number of top-level ReportedAttribute + assert 3 == len(r.metadata) + # Number of ReportedAttribute in tree branches + assert 4 == len(r.metadata[0]) + assert 0 == len(r.metadata[0][0]) + assert 4 == len(r.metadata[0][2]) + assert 0 == len(r.metadata[0][2][0]) + assert 3 == len(r.metadata[1]) + assert 1 == len(r.metadata[2]) + + class TestHierarchicalCodelist: @pytest.fixture(scope="class") def msg(self, specimen): From dabe7ca615406f1fd28043a7c23b678f04fc5889 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 13:33:31 -0500 Subject: [PATCH 076/103] Add {Metad,D}ataStructure to name map --- sdmx/model/v21.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 68f266242..b2d1de978 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -470,6 +470,8 @@ def __repr__(self) -> str: __name__, name_map={ "Dataflow": "DataflowDefinition", + "DataStructure": "DataStructureDefinition", + "MetadataStructure": "MetadataStructureDefinition", "Metadataflow": "MetadataflowDefinition", }, parent_map={ From ef17250859e573216e950543ca08161b90dd1b4e Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Sat, 30 Dec 2023 13:34:48 -0500 Subject: [PATCH 077/103] Instantiate Message to check structure_type in _ref() --- sdmx/reader/xml/v21.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 918793a17..148b353ab 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -981,7 +981,10 @@ def _ref(reader: Reader, elem): elif not cls_hint and QName(elem).localname == "Structure": # /: use message property for a class hint msg = reader.get_single(message.DataMessage, subclass=True) - cls_hint = cast(Type[message.DataMessage], type(msg)).structure_type + if msg: + cls_hint = cast(Type[message.DataMessage], type(msg))( + version=reader.xml_version + ).structure_type reader.push(QName(elem).localname, reader.reference(elem, cls_hint)) From 2c86b2a57bd1b167fe652aac4b5cf73df7ace66e Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 3 Jan 2024 21:30:32 -0500 Subject: [PATCH 078/103] Add .model.common.ExtendedFacetValueType --- sdmx/model/common.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index e0f091a18..090a6c73d 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -511,6 +511,23 @@ def __contains__(self, name): "geospatialInformation", ) + +#: SDMX ExtendedFaceValueType. +#: +#: This enumeration is identical to :class:`.FacetValueType` except for one additional +#: member, "Xhtml". This member is used only in metadata. +ExtendedFacetValueType = Enum( + "ExtendedFacetValueType", + """string bigInteger integer long short decimal float double boolean uri count + inclusiveValueRange alpha alphaNumeric numeric exclusiveValueRange incremental + observationalTimePeriod standardTimePeriod basicTimePeriod gregorianTimePeriod + gregorianYear gregorianMonth gregorianYearMonth gregorianDay reportingTimePeriod + reportingYear reportingSemester reportingTrimester reportingQuarter reportingMonth + reportingWeek reportingDay dateTime timesRange month monthDay day time duration + keyValues identifiableReference dataSetReference Xhtml""", +) + + UsageStatus = Enum("UsageStatus", "mandatory conditional") From 9d5d7ca0a5d22e4757e056f77d2e5b78677acec6 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 3 Jan 2024 21:31:51 -0500 Subject: [PATCH 079/103] Allow names without namespace in XMLFormat.qname() --- sdmx/format/xml/common.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index b6234f261..5e5660eb2 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -130,15 +130,16 @@ def qname(self, ns_or_name, name=None) -> QName: else: if name is None: match = re.fullmatch( - r"(\{(?P.*)\}|(?P.*):)(?P.*)", ns_or_name + r"(\{(?P.*)\}|(?P.*):)?(?P.*)", ns_or_name ) assert match name = match.group("name") - ns_key = match.group("ns_key") - if ns_key: + if ns_key := match.group("ns_key"): ns = self.NS[ns_key] + elif ns := match.group("ns_full"): + pass else: - ns = match.group("ns_full") + ns = None else: ns = self.NS[ns_or_name] From 8ec58d4def7021c0bbf50ad18400827d53a9b372 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 3 Jan 2024 21:32:42 -0500 Subject: [PATCH 080/103] Add ReportPeriodTarget to .format.xml.v21 --- sdmx/format/xml/v21.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdmx/format/xml/v21.py b/sdmx/format/xml/v21.py index a47f547be..b5448b94b 100644 --- a/sdmx/format/xml/v21.py +++ b/sdmx/format/xml/v21.py @@ -36,6 +36,7 @@ MetadataAttribute MetadataTarget PrimaryMeasure + ReportPeriodTarget ReportStructure """.split() ], From 9d85b3d6bd34b395ce40558a01b7f35be81bef1b Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 4 Jan 2024 03:36:54 +0100 Subject: [PATCH 081/103] Xfail read of esms_structured.xml This specimen cannot be read without its MSD. --- sdmx/tests/reader/test_reader_xml.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdmx/tests/reader/test_reader_xml.py b/sdmx/tests/reader/test_reader_xml.py index 5528d50ab..fa13d50d8 100644 --- a/sdmx/tests/reader/test_reader_xml.py +++ b/sdmx/tests/reader/test_reader_xml.py @@ -7,5 +7,8 @@ @pytest.mark.parametrize_specimens("path", format="xml") def test_read_xml(path): """XML specimens can be read.""" + if "esms_structured" in path.name: + pytest.xfail("Not implemented") + result = sdmx.read_sdmx(path) assert isinstance(result, Message) From 2a3e1d3459225d0738e362539f76c86674522546 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 4 Jan 2024 03:38:03 +0100 Subject: [PATCH 082/103] Parse value_for ID in structure-specifc --- sdmx/reader/xml/v21.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 148b353ab..2cbce3705 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1840,7 +1840,12 @@ def _rv(reader: Reader, elem): # TODO resolve the TargetObject del mds - args = dict(value_for=elem.attrib["id"]) + if QName(elem).namespace is None: + # Structure-specific: the TargetObject ID is stored in the "xsi:type" attribute + # as the last part of a value like "esms:CATEGORY_TARGET.ReportPeriodTarget" + args = dict(value_for=elem.attrib[reader.qname("xsi", "type")].split(".")[-1]) + else: + args = dict(value_for=elem.attrib["id"]) if cls is v21.TargetReportPeriod: args["report_period"] = reader.pop_single("ReportPeriod") From 5d505806a92b791d5d4ab13ef84e4fcfdeaebafe Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 4 Jan 2024 03:39:36 +0100 Subject: [PATCH 083/103] Parse ExtendedFacetValueType from XML --- sdmx/reader/xml/v21.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 2cbce3705..0b7d45066 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1119,7 +1119,12 @@ def _facet(reader, elem): # in XML, first letter is uppercase; in the spec and Python enum, lowercase. SDMX-ML # default is "String". tt = args.pop("text_type", "String") - fvt = model.FacetValueType[f"{tt[0].lower()}{tt[1:]}"] + try: + fvt = model.FacetValueType[f"{tt[0].lower()}{tt[1:]}"] + except KeyError: + # ExtendedFacetValueType instead. Convert case of the value: in XML, the string + # is "XHTML", upper case; in the spec and Python enum, "Xhtml", title case. + fvt = model.ExtendedFacetValueType[f"{tt[0]}{tt[1:].lower()}"] # NB Erratum: "isMultiLingual" appears in XSD schemas ("The isMultiLingual attribute # indicates for a text format of type 'string', whether the value should allow From bc739377bc784aededfa25e13adae01a581fdbdd Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 4 Jan 2024 03:42:14 +0100 Subject: [PATCH 084/103] Parse as DSD ref in StructureMessage --- sdmx/reader/xml/v21.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 0b7d45066..100898417 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -985,6 +985,9 @@ def _ref(reader: Reader, elem): cls_hint = cast(Type[message.DataMessage], type(msg))( version=reader.xml_version ).structure_type + elif QName(elem.getparent()).localname == "Dataflow": + # In a StructureMessage + cls_hint = reader.model.DataStructureDefinition reader.push(QName(elem).localname, reader.reference(elem, cls_hint)) From c092700e5f6d7b06ce192f99e04add37d0511211 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 4 Jan 2024 03:43:31 +0100 Subject: [PATCH 085/103] Parse ReportPeriod{,Target} from XML --- sdmx/format/xml/v21.py | 2 +- sdmx/reader/xml/v21.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdmx/format/xml/v21.py b/sdmx/format/xml/v21.py index b5448b94b..f215358e2 100644 --- a/sdmx/format/xml/v21.py +++ b/sdmx/format/xml/v21.py @@ -20,7 +20,7 @@ ("model.TargetIdentifiableObject", "md:ObjectReference"), ("model.TargetObjectKey", ":Target"), ("model.TargetObjectKey", "md:Target"), - ("model.TargetReportPeriod", ":ReportPeriod"), + ("model.TargetReportPeriod", "ReportPeriod"), ("model.TargetReportPeriod", "md:ReportPeriod"), ("model.MetadataReport", ":Report"), ("model.MetadataReport", "md:Report"), diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 100898417..a55a1d17f 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1168,7 +1168,7 @@ def _concept(reader, elem): """ str:Attribute str:Dimension str:GroupDimension str:IdentifiableObjectTarget str:KeyDescriptorValuesTarget str:MeasureDimension str:MetadataAttribute - str:PrimaryMeasure str:TimeDimension + str:PrimaryMeasure str:ReportPeriodTarget str:TimeDimension """ ) @possible_reference() From 2a820e079352d9b16ebd7012902d0a86c350dd59 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 4 Jan 2024 03:57:35 +0100 Subject: [PATCH 086/103] Add test_reader_xml.test_read_xml_ss() --- sdmx/tests/reader/test_reader_xml.py | 47 +++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/sdmx/tests/reader/test_reader_xml.py b/sdmx/tests/reader/test_reader_xml.py index fa13d50d8..4d16521fa 100644 --- a/sdmx/tests/reader/test_reader_xml.py +++ b/sdmx/tests/reader/test_reader_xml.py @@ -5,10 +5,55 @@ @pytest.mark.parametrize_specimens("path", format="xml") -def test_read_xml(path): +def test_read_xml(path) -> None: """XML specimens can be read.""" if "esms_structured" in path.name: pytest.xfail("Not implemented") result = sdmx.read_sdmx(path) assert isinstance(result, Message) + + +@pytest.mark.parametrize( + "message_path, structure_path, structure_id", + ( + # Structure-specific data; same as test_reader_xml_v21.test_read_ss_xml but + # without additional assertions + ("M.USD.EUR.SP00.A.xml", "ECB_EXR/1/structure.xml", "ECB_EXR1"), + # Structure-specific metadata + pytest.param( + "esms_structured.xml", + "v21/xml/demography/esms.xml", + "ESMS_SIMPLE", + marks=pytest.mark.xfail(reason="Not implemented"), + ), + ), +) +def test_read_xml_ss(specimen, message_path, structure_path, structure_id) -> None: + """Structure-specific (meta)data specimens can be read *using* their structures. + + Note that :func:`.test_read_xml` tests some of the same {Metad,D}ataMessage + specimens, but *without* supplying the [MD]SD; in those cases, the [MD]SD contents + are inferred while parsing. This test, in contrast, parses with the [MD]SD + available. + """ + # Read the structure message and retrieve the structure object + with specimen(structure_path) as f: + sm = sdmx.read_sdmx(f) + + # Structure may be for data or metadata + for name in "structure", "metadatastructure": + try: + s = getattr(sm, name)[structure_id] + except KeyError: + pass + else: + break + + assert s + + # The (meta)data message can be read using its associated structure + with specimen(message_path) as f: + result = sdmx.read_sdmx(f, dsd=s) + + assert isinstance(result, Message) From c1be38b4ee47cd7cf0f80402b744282a6a1901d3 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:30:24 +0100 Subject: [PATCH 087/103] =?UTF-8?q?Move=20IM=20=C2=A79=20v21-specific=20cl?= =?UTF-8?q?asses=20from=20.common?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sdmx/model/common.py | 43 --------------------------- sdmx/model/v21.py | 67 +++++++++++++++++++++++++++++++++++++++++- sdmx/reader/xml/v21.py | 10 +++---- 3 files changed, 71 insertions(+), 49 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 090a6c73d..9e0dc1664 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -110,11 +110,6 @@ "CodingFormat", "Level", "HierarchicalCode", - "ItemAssociation", - "CodeMap", - "ItemSchemeMap", - "CodelistMap", - "StructureSet", "ConstraintRole", "StartPeriod", "EndPeriod", @@ -2189,44 +2184,6 @@ class HierarchicalCode(IdentifiableArtefact): child: List["HierarchicalCode"] = field(default_factory=list) -# SDMX 2.1 §9: Structure Set and Mappings - - -@dataclass -class ItemAssociation(AnnotableArtefact, Generic[IT]): - _Item: ClassVar[Type[Item]] = Item - - source: Optional[IT] = None - target: Optional[IT] = None - - -class CodeMap(ItemAssociation[Code]): - _Item = Code - - -IAT = TypeVar("IAT", bound="ItemAssociation") -IST = TypeVar("IST", bound="ItemScheme") - - -@dataclass -class ItemSchemeMap(NameableArtefact, Generic[IST, IAT]): - _ItemAssociation: ClassVar[Type[ItemAssociation]] = ItemAssociation - - source: Optional[IST] = None - target: Optional[IST] = None - - item_association: List[IAT] = field(default_factory=list) - - -class CodelistMap(ItemSchemeMap[Codelist, CodeMap]): - _ItemAssociation = CodeMap - - -@dataclass -class StructureSet(MaintainableArtefact): - item_scheme_map: List[ItemSchemeMap] = field(default_factory=list) - - # SDMX 2.1 §10.2: Constraint inheritance # SDMX 3.0 §12: Constraints diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index b2d1de978..d5cef5564 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -4,12 +4,24 @@ # TODO for complete implementation of the IM, enforce TimeKeyValue (instead of KeyValue) # for {Generic,StructureSpecific} TimeSeriesDataSet. from dataclasses import dataclass, field -from typing import Dict, Generator, List, Optional, Set, Union +from typing import ( + ClassVar, + Dict, + Generator, + Generic, + List, + Optional, + Set, + Type, + TypeVar, + Union, +) from sdmx.dictlike import DictLikeDescriptor from . import common from .common import ( + IT, AttributeRelationship, Component, ComponentList, @@ -52,6 +64,11 @@ "MetadataStructureDefinition", "Hierarchy", "HierarchicalCodelist", + "ItemAssociation", + "CodeMap", + "ItemSchemeMap", + "CodelistMap", + "StructureSet", ] log = logging.getLogger(__name__) @@ -466,6 +483,54 @@ def __repr__(self) -> str: return f"{tmp}: {len(self.hierarchy)} hierarchies>" +# §9: Structure Set and Mappings + + +@dataclass +class ItemAssociation(common.AnnotableArtefact, Generic[IT]): + """SDMX 2.1 ItemAssociation.""" + + _Item: ClassVar[Type[common.Item]] = common.Item + + source: Optional[IT] = None + target: Optional[IT] = None + + +class CodeMap(ItemAssociation[common.Code]): + """SDMX 2.1 CodeMap.""" + + _Item = common.Code + + +IAT = TypeVar("IAT", bound="ItemAssociation") +IST = TypeVar("IST", bound="common.ItemScheme") + + +@dataclass +class ItemSchemeMap(NameableArtefact, Generic[IST, IAT]): + """SDMX 2.1 ItemSchemeMap.""" + + _ItemAssociation: ClassVar[Type[ItemAssociation]] = ItemAssociation + + source: Optional[IST] = None + target: Optional[IST] = None + + item_association: List[IAT] = field(default_factory=list) + + +class CodelistMap(ItemSchemeMap[common.Codelist, CodeMap]): + """SDMX 2.1 CodelistMap.""" + + _ItemAssociation = CodeMap + + +@dataclass +class StructureSet(common.MaintainableArtefact): + """SDMX 2.1 StructureSet.""" + + item_scheme_map: List[ItemSchemeMap] = field(default_factory=list) + + CF = common.ClassFinder( __name__, name_map={ diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index a55a1d17f..3a28044ee 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -1970,14 +1970,14 @@ def _hcl(reader: Reader, elem): @start("str:CodelistMap", only=False) def _ismap_start(reader: Reader, elem): - cls: Type[common.ItemSchemeMap] = reader.class_for_tag(elem.tag) + cls: Type[model.ItemSchemeMap] = reader.class_for_tag(elem.tag) # Push class for reference while parsing sub-elements reader.push("ItemAssociation class", cls._ItemAssociation._Item) @end("str:CodelistMap", only=False) def _ismap_end(reader: Reader, elem): - cls: Type[common.ItemSchemeMap] = reader.class_for_tag(elem.tag) + cls: Type[model.ItemSchemeMap] = reader.class_for_tag(elem.tag) # Remove class from stacks reader.pop_single("ItemAssociation class") @@ -2012,7 +2012,7 @@ def _ismap_end(reader: Reader, elem): @end("str:CodeMap") def _item_map(reader: Reader, elem): - cls: Type[common.ItemAssociation] = reader.class_for_tag(elem.tag) + cls: Type[model.ItemAssociation] = reader.class_for_tag(elem.tag) # Store Source and Target as Reference instances return reader.annotable( @@ -2026,10 +2026,10 @@ def _item_map(reader: Reader, elem): @end("str:StructureSet") def _ss(reader: Reader, elem): return reader.maintainable( - common.StructureSet, + model.StructureSet, elem, # Collect all ItemSchemeMaps - item_scheme_map=reader.pop_all(common.ItemSchemeMap, subclass=True), + item_scheme_map=reader.pop_all(model.ItemSchemeMap, subclass=True), ) From 91e31faecafc11bedead70a08560f67d65b7067f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:31:35 +0100 Subject: [PATCH 088/103] Add MetadataAttribute.{child,parent}, defaults --- sdmx/model/common.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 9e0dc1664..20879c503 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -2102,12 +2102,16 @@ class AttributeComponent(Component): """ +@dataclass class MetadataAttribute(AttributeComponent): """SDMX MetadataAttribute.""" - is_presentational: bool - max_occurs: int - min_occurs: int + is_presentational: Optional[bool] = None + max_occurs: Optional[int] = None + min_occurs: Optional[int] = None + + parent: Optional["MetadataAttribute"] = None + child: List["MetadataAttribute"] = field(default_factory=list) class BaseMetadataStructureDefinition(Structure, ConstrainableArtefact): From 799d98f229ec5c643db76bf32bef1553d3ce9bdf Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:31:50 +0100 Subject: [PATCH 089/103] Export enums from .model.common --- sdmx/model/common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 20879c503..81465cdfe 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -69,6 +69,10 @@ "NameableArtefact", "VersionableArtefact", "MaintainableArtefact", + "ActionType", + "ConstraintRoleType", + "FacetValueType", + "ExtendedFacetValueType", "Item", "ItemScheme", "FacetType", From 4ef47d06f3a92ec21c6035dce477c34022901dac Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:32:44 +0100 Subject: [PATCH 090/103] .v21.MetadataStructureDefinition associates to multiple ReportStructures --- sdmx/model/v21.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index d5cef5564..e7222e3bb 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -350,7 +350,7 @@ class ReportStructure(ComponentList): class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): """SDMX 2.1 MetadataStructureDefinition.""" - report_structure: ReportStructure = field(default_factory=ReportStructure) + report_structure: DictLikeDescriptor[str, ReportStructure] = DictLikeDescriptor() #: Association to 1 or more :class:`.MetadataTarget` target: DictLikeDescriptor[str, MetadataTarget] = DictLikeDescriptor() From 448cd2bc655d6d9f5522e3b54572ffe725e34661 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:33:15 +0100 Subject: [PATCH 091/103] Label and export newly implemented IM classes --- sdmx/model/common.py | 4 +++ sdmx/model/v21.py | 59 ++++++++++++++++++++++++++++++++++++-------- sdmx/model/v30.py | 22 ++++++++++++++--- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 81465cdfe..724ba61be 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -2168,6 +2168,8 @@ class CodingFormat: @dataclass class Level(NameableArtefact): + """SDMX Level.""" + parent: Optional[Union["Level", Any]] = None # NB second element is "Hierarchy" child: Optional["Level"] = None @@ -2176,6 +2178,8 @@ class Level(NameableArtefact): @dataclass class HierarchicalCode(IdentifiableArtefact): + """SDMX HierarchicalCode.""" + #: Date from which the construct is valid. valid_from: Optional[str] = None #: Date from which the construct is superseded. diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index e7222e3bb..7b34427b7 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -60,8 +60,29 @@ "GenericDataSet", "GenericTimeSeriesDataSet", "StructureSpecificTimeSeriesDataSet", - "MetadataflowDefinition", + "ReportingCategory", + "ReportingTaxonomy", + "TargetObject", + "DataSetTarget", + "DimensionDescriptorValuesTarget", + "IdentifiableObjectTarget", + "ReportPeriodTarget", + "MetadataTarget", + "ReportStructure", "MetadataStructureDefinition", + "MetadataflowDefinition", + "TargetObjectValue", + "TargetReportPeriod", + "TargetIdentifiableObject", + "TargetObjectKey", + "ReportedAttribute", + "EnumeratedAttributeValue", + "NonEnumeratedAttributeValue", + "OtherNonEnumeratedAttributeValue", + "TextAttributeValue", + "XHTMLAttributeValue", + "MetadataReport", + "MetadataSet", "Hierarchy", "HierarchicalCodelist", "ItemAssociation", @@ -303,31 +324,31 @@ class StructureSpecificTimeSeriesDataSet(DataSet): class ReportingCategory(common.Item): - pass + """SDMX 2.1 ReportingCategory.""" class ReportingTaxonomy(common.ItemScheme): - pass + """SDMX 2.1 ReportingTaxonomy.""" class TargetObject(common.Component): - pass + """SDMX 2.1 TargetObject.""" class DataSetTarget(TargetObject): - pass + """SDMX 2.1 DataSetTarget.""" class DimensionDescriptorValuesTarget(TargetObject): - pass + """SDMX 2.1 DimensionDescriptorValuesTarget.""" class IdentifiableObjectTarget(TargetObject): - pass + """SDMX 2.1 IdentifiableObjectTarget.""" class ReportPeriodTarget(TargetObject): - pass + """SDMX 2.1 ReportPeriodTarget.""" class MetadataTarget(ComponentList): @@ -369,21 +390,29 @@ class MetadataflowDefinition(common.BaseMetadataflow): @dataclass class TargetObjectValue: + """SDMX 2.1 TargetObjectValue.""" + value_for: TargetObject @dataclass class TargetReportPeriod(TargetObjectValue): + """SDMX 2.1 TargetReportPeriod.""" + report_period: str @dataclass class TargetIdentifiableObject(TargetObjectValue): + """SDMX 2.1 TargetIdentifiableObject.""" + obj: IdentifiableArtefact @dataclass class TargetObjectKey: + """SDMX 2.1 TargetObjectKey.""" + key_values: DictLikeDescriptor[str, TargetObjectValue] = DictLikeDescriptor() @@ -420,24 +449,30 @@ class EnumeratedAttributeValue(ReportedAttribute): class NonEnumeratedAttributeValue(ReportedAttribute): - pass + """SDMX 2.1 NonEnumeratedAttributeValue.""" class OtherNonEnumeratedAttributeValue(NonEnumeratedAttributeValue): + """SDMX 2.1 OtherNonEnumeratedAttributeValue.""" + value: str class TextAttributeValue(NonEnumeratedAttributeValue, common.BaseTextAttributeValue): - pass + """SDMX 2.1 TextAttributeValue.""" @dataclass class XHTMLAttributeValue(NonEnumeratedAttributeValue, common.BaseXHTMLAttributeValue): + """SDMX 2.1 XHTMLAttributeValue.""" + value: str @dataclass class MetadataReport: + """SDMX 2.1 MetadataReport.""" + metadata: List[ReportedAttribute] = field(default_factory=list) target: Optional[MetadataTarget] = None attaches_to: Optional[TargetObjectKey] = None @@ -466,6 +501,8 @@ class MetadataSet(NameableArtefact, common.BaseMetadataSet): @dataclass class Hierarchy(NameableArtefact): + """SDMX 2.1 Hierarchy.""" + has_formal_levels: bool = False #: Hierarchical codes in the hierarchy. @@ -476,6 +513,8 @@ class Hierarchy(NameableArtefact): @dataclass class HierarchicalCodelist(common.MaintainableArtefact): + """SDMX 2.1 HierarchicalCodelist.""" + hierarchy: List[Hierarchy] = field(default_factory=list) def __repr__(self) -> str: diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index c0e323e3a..c3f317c14 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -44,8 +44,18 @@ "Dataflow", "Observation", "StructureSpecificDataSet", + "MetadataAttributeDescriptor", + "IdentifiableObjectSelection", "MetadataStructureDefinition", "Metadataflow", + "MetadataAttributeValue", + "CodedMetadataAttributeValue", + "UncodedMetadataAttributeValue", + "OtherUncodedAttributeValue", + "TextAttributeValue", + "XHTMLAttributeValue", + "TargetIdentifiableObject", + "MetadataSet", "Hierarchy", "HierarchyAssociation", "SelectionValue", @@ -383,7 +393,7 @@ class MetadataAttributeDescriptor(common.ComponentList): class IdentifiableObjectSelection: - pass + """SDMX 3.0 IdentifiableObjectSelection.""" @dataclass @@ -435,26 +445,30 @@ class CodedMetadataAttributeValue(MetadataAttributeValue): class UncodedMetadataAttributeValue(MetadataAttributeValue): + """SDMX 3.0 UncodedMetadataAttributeValue.""" + pass class OtherUncodedAttributeValue(UncodedMetadataAttributeValue): + """SDMX 3.0 OtherUncodedAttributeValue.""" + value: str start_time: date class TextAttributeValue(UncodedMetadataAttributeValue, common.BaseTextAttributeValue): - pass + """SDMX 3.0 TextAttributeValue.""" class XHTMLAttributeValue( UncodedMetadataAttributeValue, common.BaseXHTMLAttributeValue ): - pass + """SDMX 3.0 XHTMLAttributeValue.""" class TargetIdentifiableObject: - pass + """SDMX 3.0 TargetIdentifiableObject.""" @dataclass From f8aa5329e613272e982885c6d321da1acb2e23d0 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:37:26 +0100 Subject: [PATCH 092/103] Parse generic and ss MetadataSet from v21 and v30 XML --- sdmx/reader/xml/v21.py | 105 +++++++++++++++++++++++++++-------------- sdmx/reader/xml/v30.py | 3 +- 2 files changed, 71 insertions(+), 37 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 3a28044ee..b3eff71aa 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -231,6 +231,7 @@ def detect(cls, content): def read_message( self, source, + structure: Optional[common.Structure] = None, dsd: Optional[common.BaseDataStructureDefinition] = None, _events=None, ) -> message.Message: @@ -242,8 +243,10 @@ def read_message( # Elements to ignore when parsing finishes self.ignore = set() - # If calling code provided a DSD, add it to a stack, and let it be ignored when - # parsing finishes + # If calling code provided a {Metad,D}ataStructureDefinition, add it to a stack, + # and let it be ignored when parsing finishes + self.push(structure) + self.ignore.add(id(structure)) self.push(dsd) self.ignore.add(id(dsd)) @@ -296,7 +299,7 @@ def read_message( # parsing errors # Remove some internal items - self.pop_single("SS without DSD") + self.pop_single("SS without structure") self.pop_single("DataSetClass") # Count only non-ignored items @@ -722,22 +725,24 @@ def _message(reader: Reader, elem): if getattr(elem.getparent(), "tag", None) == reader.qname("mes", "Header"): return - ss_without_dsd = False + ss_without_structure = False - # With 'dsd' argument, the message should be structure-specific - if ( - "StructureSpecific" in elem.tag - and reader.get_single(common.BaseDataStructureDefinition, subclass=True) is None - ): - log.warning(f"xml.Reader got no dsd=… argument for {QName(elem).localname}") - ss_without_dsd = True - elif "StructureSpecific" not in elem.tag and reader.get_single( - common.BaseDataStructureDefinition, subclass=True - ): + # Retrieve any {Metad,D}ataStructure definition given to Reader.read_message() + supplied_structure = reader.get_single(common.Structure, subclass=True) + + # Handle + qname = QName(elem) + if "StructureSpecific" in elem.tag: + if supplied_structure is None: + log.warning(f"xml.Reader got no structure=… argument for {qname.localname}") + ss_without_structure = True + elif isinstance(supplied_structure, model.MetadataStructureDefinition): + add_mds_events(reader, supplied_structure) + elif supplied_structure: log.info("Use supplied dsd=… argument for non–structure-specific message") # Store values for other methods - reader.push("SS without DSD", ss_without_dsd) + reader.push("SS without structure", ss_without_structure) if elem.tag.endswith("Data"): reader.push("DataSetClass", model.get_class(f"{QName(elem).localname}Set")) @@ -800,10 +805,10 @@ def _header_structure(reader, elem): msg = reader.get_single(message.DataMessage, subclass=True) assert msg is not None - # Retrieve a DSD supplied to the parser, e.g. for a structure specific message + # Retrieve a structure supplied to the reader, e.g. for a structure specific message provided_structure = reader.get_single(common.Structure, subclass=True) - # Resolve the child to a DSD, maybe is_external_reference=True + # Resolve the child to an object, maybe is_external_reference=True header_structure = reader.pop_resolved_ref("Structure") # The header may give either a StructureUsage, or a specific reference to a subclass @@ -844,8 +849,8 @@ def _header_structure(reader, elem): # Store under the structure ID, so it can be looked up by that ID reader.push(elem.attrib["structureID"], structure) - # Store as an object that won't cause a parsing error if it is left over - reader.ignore.add(id(structure)) + # Store as objects that won't cause a parsing error if it is left over + reader.ignore.update({id(structure), id(header_structure)}) try: # Information about the 'dimension at observation level' @@ -1163,16 +1168,21 @@ def _concept(reader, elem): # §3.3: Basic Inheritance - -@end( - """ +COMPONENT = """ str:Attribute str:Dimension str:GroupDimension str:IdentifiableObjectTarget str:KeyDescriptorValuesTarget str:MeasureDimension str:MetadataAttribute str:PrimaryMeasure str:ReportPeriodTarget str:TimeDimension """ -) -@possible_reference() -def _component(reader: Reader, elem): + + +@start(COMPONENT, only=False) +def _component_start(reader: Reader, elem): + reader.stash(reader.class_for_tag(elem.tag)) + + +@end(COMPONENT, only=False) +@possible_reference(unstash=True) +def _component_end(reader: Reader, elem): # Object class: {,Measure,Time}Dimension or DataAttribute cls = reader.class_for_tag(elem.tag) @@ -1200,6 +1210,12 @@ def _component(reader: Reader, elem): assert len(ar) == 1, ar args["related_to"] = ar[0] + # MetadataAttribute.child only + if children := reader.pop_all(cls): + args["child"] = children + + reader.unstash() + # SDMX 2.1 spec §3A, part III, p.140: “The id attribute holds an explicit # identification of the component. If this identifier is not supplied, then it is # assumed to be the same as the identifier of the concept referenced from the @@ -1616,7 +1632,7 @@ def _series_ss(reader, elem): ds.add_obs( reader.pop_all(model.Observation), ds.structured_by.make_key( - model.SeriesKey, elem.attrib, extend=reader.peek("SS without DSD") + model.SeriesKey, elem.attrib, extend=reader.peek("SS without structure") ), ) @@ -1640,7 +1656,7 @@ def _group_ss(reader, elem): group_id = attrib.pop(reader.qname("xsi", "type"), None) gk = ds.structured_by.make_key( - model.GroupKey, attrib, extend=reader.peek("SS without DSD") + model.GroupKey, attrib, extend=reader.peek("SS without structure") ) if group_id: @@ -1652,7 +1668,7 @@ def _group_ss(reader, elem): try: gk.described_by = ds.structured_by.group_dimensions[group_id] except KeyError: - if not reader.peek("SS without DSD"): + if not reader.peek("SS without structure"): raise ds.group[gk] = [] @@ -1686,7 +1702,7 @@ def _obs(reader, elem): @end(":Obs") def _obs_ss(reader, elem): # True if the user failed to provide a DSD to use in parsing structure-specific data - extend = reader.peek("SS without DSD") + extend = reader.peek("SS without structure") # Retrieve the PrimaryMeasure from the DSD for the current data set dsd = reader.get_single("DataSet").structured_by @@ -1865,21 +1881,38 @@ def _rv(reader: Reader, elem): return obj +def add_mds_events(reader: Reader, mds: model.MetadataStructureDefinition): + """Add parser events for structure-specific metadata.""" + + # TODO these persist after reading a particular message; avoid this + def _add_events_for_ma(ma: model.MetadataAttribute): + reader.start(f":{ma.id}", only=False)(_ra_start) + reader.end(f":{ma.id}", only=False)(_ra_end) + for child in ma.child: + _add_events_for_ma(child) + + for rs in mds.report_structure.values(): + for ma in rs.components: + _add_events_for_ma(ma) + + @start("md:ReportedAttribute", only=False) -def _ra_generic_start(reader: Reader, elem): +def _ra_start(reader: Reader, elem): # Avoid collecting previous/sibling ReportedAttribute as children of this one reader.stash(model.ReportedAttribute) @end("md:ReportedAttribute", only=False) -def _ra_generic_end(reader: Reader, elem): +def _ra_end(reader: Reader, elem): cls = reader.class_for_tag(elem.tag) + if cls is None: + cls = reader.class_for_tag("md:ReportedAttribute") + value_for = elem.tag + else: + value_for = elem.attrib["id"] - args = dict( - # Pop all child elements - child=reader.pop_all(cls, subclass=True), - value_for=elem.attrib["id"], - ) + # Pop all child elements + args = dict(child=reader.pop_all(cls, subclass=True), value_for=value_for) xhtml = reader.pop_single("StructuredText") if xhtml: diff --git a/sdmx/reader/xml/v30.py b/sdmx/reader/xml/v30.py index 7b9358043..889952899 100644 --- a/sdmx/reader/xml/v30.py +++ b/sdmx/reader/xml/v30.py @@ -71,7 +71,8 @@ class Reader(v21.Reader): v21._item_start ) end("str:GeoFeatureSetCode str:GeoGridCode str:ValueItem", only=False)(v21._item_end) -end("str:Measure str:MetadataAttribute")(v21._component) +start("str:Measure str:MetadataAttribute", only=False)(v21._component_start) +end("str:Measure str:MetadataAttribute", only=False)(v21._component_end) end("str:MetadataAttributeList")(v21._cl) end("str:DataConstraint")(v21._cc) end("str:KeyValue")(v21._ms) From 7cda033231c92b1ea5e048c905b0211bb73ccafe Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:37:54 +0100 Subject: [PATCH 093/103] Un-xfail read of esms_structured.xml --- sdmx/tests/reader/test_reader_xml.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sdmx/tests/reader/test_reader_xml.py b/sdmx/tests/reader/test_reader_xml.py index 4d16521fa..3f546bb71 100644 --- a/sdmx/tests/reader/test_reader_xml.py +++ b/sdmx/tests/reader/test_reader_xml.py @@ -21,12 +21,7 @@ def test_read_xml(path) -> None: # without additional assertions ("M.USD.EUR.SP00.A.xml", "ECB_EXR/1/structure.xml", "ECB_EXR1"), # Structure-specific metadata - pytest.param( - "esms_structured.xml", - "v21/xml/demography/esms.xml", - "ESMS_SIMPLE", - marks=pytest.mark.xfail(reason="Not implemented"), - ), + ("esms_structured.xml", "v21/xml/demography/esms.xml", "ESMS_SIMPLE"), ), ) def test_read_xml_ss(specimen, message_path, structure_path, structure_id) -> None: From 5b94ea2ed656afaf6abe4f2936b59b6b61c5977a Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:38:24 +0100 Subject: [PATCH 094/103] Exclude TYPE_CHECKING blocks from coverage --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 5f2ee008f..6c1900fae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,8 @@ omit = [ exclude_also = [ # Don't complain about abstract methods, they aren't run "@(abc\\.)?abstractmethod", + # Imports only used by type checkers + "if TYPE_CHECKING:", ] [tool.mypy] From f520d5985135347576714e2804242c44f8949377 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:52:58 +0100 Subject: [PATCH 095/103] Update .format.xml --- sdmx/format/xml/common.py | 3 --- sdmx/format/xml/v21.py | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 5e5660eb2..143625644 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -17,8 +17,6 @@ "CategoryScheme", "Code", "Codelist", - "CodelistMap", - "CodeMap", "Concept", "ConceptScheme", "CustomType", @@ -33,7 +31,6 @@ "NamePersonalisationScheme", "Ruleset", "RulesetScheme", - "StructureSet", "TimeDimension", "TransformationScheme", "UserDefinedOperatorScheme", diff --git a/sdmx/format/xml/v21.py b/sdmx/format/xml/v21.py index f215358e2..1edf74d4d 100644 --- a/sdmx/format/xml/v21.py +++ b/sdmx/format/xml/v21.py @@ -28,6 +28,8 @@ + [ (f"model.{name}", f"str:{name}") for name in """ + CodelistMap + CodeMap ContentConstraint HierarchicalCodelist Hierarchy @@ -38,6 +40,7 @@ PrimaryMeasure ReportPeriodTarget ReportStructure + StructureSet """.split() ], ) From 367fef4d203fc56f00c30381c1b8150c6131b4ed Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 9 Jan 2024 17:53:26 +0100 Subject: [PATCH 096/103] Update class lists in .test_model --- sdmx/tests/test_model.py | 47 ++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/sdmx/tests/test_model.py b/sdmx/tests/test_model.py index 528710343..95d9dae1c 100644 --- a/sdmx/tests/test_model.py +++ b/sdmx/tests/test_model.py @@ -11,6 +11,10 @@ "NameableArtefact", "VersionableArtefact", "MaintainableArtefact", + "ActionType", + "ConstraintRoleType", + "FacetValueType", + "ExtendedFacetValueType", "Item", "ItemScheme", "FacetType", @@ -51,12 +55,8 @@ "SeriesKey", "CodingFormat", "Level", + "Hierarchy", "HierarchicalCode", - "ItemAssociation", - "CodeMap", - "ItemSchemeMap", - "CodelistMap", - "StructureSet", "ConstraintRole", "ConstrainableArtefact", "SelectionValue", @@ -112,6 +112,9 @@ "Observation", "StructureSpecificDataSet", "MetadataStructureDefinition", + "TextAttributeValue", + "XHTMLAttributeValue", + "MetadataSet", ] V21_ONLY = [ @@ -125,9 +128,32 @@ "GenericDataSet", "GenericTimeSeriesDataSet", "StructureSpecificTimeSeriesDataSet", + "ReportingCategory", + "ReportingTaxonomy", + "TargetObject", + "DataSetTarget", + "DimensionDescriptorValuesTarget", + "IdentifiableObjectTarget", + "ReportPeriodTarget", + "MetadataTarget", + "ReportStructure", + "MetadataStructureDefinition", "MetadataflowDefinition", - "Hierarchy", + "TargetObjectValue", + "TargetReportPeriod", + "TargetIdentifiableObject", + "TargetObjectKey", + "ReportedAttribute", + "EnumeratedAttributeValue", + "NonEnumeratedAttributeValue", + "OtherNonEnumeratedAttributeValue", + "MetadataReport", "HierarchicalCodelist", + "ItemAssociation", + "CodeMap", + "ItemSchemeMap", + "CodelistMap", + "StructureSet", ] V30_ONLY = [ @@ -144,14 +170,21 @@ "MetadataProviderScheme", "Measure", "Dataflow", # Instead of DataflowDefinition - "Hierarchy", "HierarchyAssociation", "DataflowRelationship", "MeasureRelationship", "ObservationRelationship", "DataConstraint", "MetadataConstraint", + "MetadataAttributeDescriptor", + "IdentifiableObjectSelection", + "MetadataStructureDefinition", "Metadataflow", # Instead of MetadataflowDefinition + "MetadataAttributeValue", + "CodedMetadataAttributeValue", + "UncodedMetadataAttributeValue", + "OtherUncodedAttributeValue", + "TargetIdentifiableObject", ] From 6668c49f94ca5d9bc92d254de4ef98907cf98c04 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 10 Jan 2024 13:34:42 +0100 Subject: [PATCH 097/103] Test Structure.grouping, .replace_grouping() --- sdmx/model/common.py | 8 ++++++-- sdmx/tests/model/test_common.py | 29 ++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 724ba61be..5e11cff6c 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -1120,8 +1120,12 @@ def grouping(self) -> Sequence[ComponentList]: """A collection of all the ComponentLists associated with a subclass.""" result: List[ComponentList] = [] for f in fields(self): - if isinstance(f.type, ComponentList): - result.append(getattr(self.f.name)) + types = get_args(f.type) or (f.type,) + try: + if any(issubclass(t, ComponentList) for t in types): + result.append(getattr(self, f.name)) + except TypeError: + pass return result def replace_grouping(self, cl: ComponentList) -> None: diff --git a/sdmx/tests/model/test_common.py b/sdmx/tests/model/test_common.py index f5210fa64..e369a3ea6 100644 --- a/sdmx/tests/model/test_common.py +++ b/sdmx/tests/model/test_common.py @@ -3,6 +3,8 @@ import pytest +import sdmx.model as model +from sdmx.model import v21 from sdmx.model.common import ( Agency, AnnotableArtefact, @@ -15,7 +17,6 @@ NameableArtefact, Representation, ) -from sdmx.model.v21 import AttributeDescriptor, DataStructureDefinition class TestAnnotation: @@ -107,7 +108,7 @@ def test_general(self): assert hash(ia) == hash("foo") # Subclass is hashable - ad = AttributeDescriptor() + ad = model.AttributeDescriptor() assert hash(ad) == id(ad) def test_hash_subclass(self): @@ -131,7 +132,7 @@ def test_sort(self): "'DataStructureDefinition'" ), ): - sorted([DataStructureDefinition(id="c")] + items) + sorted([v21.DataStructureDefinition(id="c")] + items) class TestNameableArtefact: @@ -347,3 +348,25 @@ def test_init(self): c2.name = "foo" assert repr(c1) != repr(c2) + + +class TestStructure: + @pytest.fixture + def obj(self): + # Use BaseDataStructureDefinition as a concrete/less abstract subclass + return model.BaseDataStructureDefinition() + + def test_grouping(self, obj) -> None: + result = obj.grouping + # Class has an AttributeDescriptor, DimensionDescriptor, and dict-like of + # GroupDimensionDescriptor + assert 3 == len(result) + + def test_replace_grouping(self, obj) -> None: + class Foo(model.ComponentList): + pass + + # Cannot replace with an instance of Foo, because this does not correspond to + # the type of any member of the class + with pytest.raises(TypeError, match="No grouping of type"): + obj.replace_grouping(Foo()) From 85f7f822949913d7204c1f6bc834a3d6fcd86d5e Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 10 Jan 2024 13:41:25 +0100 Subject: [PATCH 098/103] Avoid "id" attribute to sort Structure.grouping DictLikes of ComponentList will not have this attribute. --- sdmx/model/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 5e11cff6c..a0a02f449 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -1146,13 +1146,14 @@ def replace_grouping(self, cl: ComponentList) -> None: setattr(self, field.name, cl) def compare(self, other: "Structure", strict: bool = True) -> bool: - from operator import attrgetter + # DictLike of ComponentList will not have an "id" attribute + def _key(item) -> str: + return getattr(item, "id", str(type(item))) return all( s.compare(o, strict) for s, o in zip( - sorted(self.grouping, key=attrgetter("id")), - sorted(other.grouping, key=attrgetter("id")), + sorted(self.grouping, key=_key), sorted(other.grouping, key=_key) ) ) From 24234ca8df0349977c2886ce0eb4150bc4bf89ce Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 10 Jan 2024 13:47:58 +0100 Subject: [PATCH 099/103] Avoid internal warning in StructureMessage definition --- sdmx/message.py | 4 ++-- sdmx/tests/model/test_v21.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdmx/message.py b/sdmx/message.py index 514598b92..b64ef2b10 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -193,7 +193,7 @@ class StructureMessage(Message): #: Collection of :class:`.Codelist`. codelist: DictLikeDescriptor[str, model.Codelist] = DictLikeDescriptor() #: Collection of :class:`.HierarchicalCodelist`. - hierarchical_code_list: DictLikeDescriptor[ + hierarchical_codelist: DictLikeDescriptor[ str, v21.HierarchicalCodelist ] = DictLikeDescriptor() #: Collection of :class:`.v30.Hierarchy`. @@ -216,7 +216,7 @@ class StructureMessage(Message): str, model.BaseDataStructureDefinition ] = DictLikeDescriptor() #: Collection of :class:`.StructureSet`. - structureset: DictLikeDescriptor[str, model.StructureSet] = DictLikeDescriptor() + structureset: DictLikeDescriptor[str, v21.StructureSet] = DictLikeDescriptor() #: Collection of :class:`.OrganisationScheme`. organisation_scheme: DictLikeDescriptor[ str, model.OrganisationScheme diff --git a/sdmx/tests/model/test_v21.py b/sdmx/tests/model/test_v21.py index 5b57a8328..67192d72d 100644 --- a/sdmx/tests/model/test_v21.py +++ b/sdmx/tests/model/test_v21.py @@ -611,11 +611,11 @@ def msg(self, specimen): return sdmx.read_sdmx(f) def test_hierarchy(self, msg: sdmx.message.StructureMessage) -> None: - for key, hcl in msg.hierarchical_code_list.items(): + for key, hcl in msg.hierarchical_codelist.items(): assert 1 == len(hcl.hierarchy) # print(f"{hcl = }") - hcl = msg.hierarchical_code_list["BIS:HCL_COUNTRY(1.0)"] + hcl = msg.hierarchical_codelist["BIS:HCL_COUNTRY(1.0)"] # Access a Hierarchy h = hcl.hierarchy[0] From 83531527d1736c4ae0026083be20fc808967f3d4 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 10 Jan 2024 16:26:50 +0100 Subject: [PATCH 100/103] Test Message.__repr__() with requests.Response --- sdmx/message.py | 1 + sdmx/tests/test_message.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/sdmx/message.py b/sdmx/message.py index b64ef2b10..fde7756f6 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -40,6 +40,7 @@ def _summarize(obj, include: Optional[List[str]] = None): elif isinstance(attr, datetime): s_attr = repr(attr.isoformat()) elif isinstance(attr, requests.Response): + # Don't use repr(), which displays the entire response body s_attr = str(attr) else: s_attr = repr(attr) diff --git a/sdmx/tests/test_message.py b/sdmx/tests/test_message.py index ae6bbac81..f8a01211d 100644 --- a/sdmx/tests/test_message.py +++ b/sdmx/tests/test_message.py @@ -81,6 +81,7 @@ def test_objects(self): sender: source: fr: Banque de données macro-économiques test: False + response: Categorisation (1): CAT_IPI-2010_IPI-2010-A21 CategoryScheme (1): CLASSEMENT_DATAFLOWS Codelist (7): CL_FREQ CL_NAF2_A21 CL_NATURE CL_UNIT CL_AREA CL_TIME_C... @@ -99,6 +100,7 @@ def test_objects(self): sender: source: fr: Banque de données macro-économiques test: False + response: DataflowDefinition (663): ACT-TRIM-ANC BPM6-CCAPITAL BPM6-CFINANCIER ... DataStructureDefinition (663): ACT-TRIM-ANC BPM6-CCAPITAL BPM6-CFINAN...""", ), @@ -113,6 +115,7 @@ def test_objects(self): source: """ """ test: False + response: DataSet (1) dataflow: observation_dimension: """, @@ -128,6 +131,7 @@ def test_objects(self): source: """ """ test: False + response: DataSet (2) dataflow: observation_dimension: []""", @@ -139,8 +143,14 @@ def test_objects(self): "pattern, expected", EXPECTED, ids=list(map(itemgetter(0), EXPECTED)) ) def test_message_repr(specimen, pattern, expected): + import requests + with specimen(pattern) as f: msg = sdmx.read_sdmx(f) + + # Attach a response object, as if the Message resulted from a requests query + msg.response = requests.Response() + if isinstance(expected, re.Pattern): assert expected.fullmatch(repr(msg)) else: From 60cc441f113a23eb480ca03fd2e18b13f73f8d31 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 10 Jan 2024 16:29:36 +0100 Subject: [PATCH 101/103] Test HierarchicalCodelist.__repr__() --- sdmx/tests/model/test_v21.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sdmx/tests/model/test_v21.py b/sdmx/tests/model/test_v21.py index 67192d72d..eb0e33170 100644 --- a/sdmx/tests/model/test_v21.py +++ b/sdmx/tests/model/test_v21.py @@ -5,6 +5,7 @@ import sdmx import sdmx.message +from sdmx.model import v21 as model from sdmx.model.v21 import ( AttributeDescriptor, AttributeValue, @@ -610,6 +611,10 @@ def msg(self, specimen): with specimen("BIS/hierarchicalcodelist-0.xml") as f: return sdmx.read_sdmx(f) + @pytest.fixture(scope="class") + def obj(self, msg) -> model.HierarchicalCodelist: + return msg.hierarchical_codelist["BIS:HCL_COUNTRY(1.0)"] + def test_hierarchy(self, msg: sdmx.message.StructureMessage) -> None: for key, hcl in msg.hierarchical_codelist.items(): assert 1 == len(hcl.hierarchy) @@ -641,3 +646,6 @@ def test_hierarchy(self, msg: sdmx.message.StructureMessage) -> None: c3 = c2.child[0] assert "6J" == c3.code assert c3.code.parent.urn.endswith("Codelist=BIS:CL_BIS_IF_REF_AREA(1.0)") + + def test_repr(self, obj: model.HierarchicalCodelist): + assert "" == repr(obj) From 2b515152e434158822c39a31c675f1875a92c095 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 10 Jan 2024 16:36:15 +0100 Subject: [PATCH 102/103] Restore coverage in .reader.xml.v21 --- sdmx/reader/xml/v21.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index b3eff71aa..ec1e34b55 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -739,7 +739,7 @@ def _message(reader: Reader, elem): elif isinstance(supplied_structure, model.MetadataStructureDefinition): add_mds_events(reader, supplied_structure) elif supplied_structure: - log.info("Use supplied dsd=… argument for non–structure-specific message") + log.info("Use supplied structure=… argument for non–structure-specific message") # Store values for other methods reader.push("SS without structure", ss_without_structure) @@ -840,7 +840,7 @@ def _header_structure(reader, elem): assert header_structure == structure elif header_su and not provided_structure: reader.push(structure) - elif structure is None: + elif structure is None: # pragma: no cover raise RuntimeError # Store on the data flow @@ -1804,19 +1804,19 @@ def _mds_start(reader, elem): # Get a reference to the MSD that structures the data set # Provided in the / - dsd = reader.get_single(id) - if not dsd: + msd = reader.get_single(id) + if not msd: # pragma: no cover # Fall back to a MSD provided as an argument to read_message() - dsd = reader.get_single(common.BaseMetadataStructureDefinition, subclass=True) + msd = reader.get_single(common.BaseMetadataStructureDefinition, subclass=True) - if not dsd: # pragma: no cover + if not msd: raise RuntimeError("No MSD when creating DataSet") log.debug( - f'Use provided {dsd!r} for structureRef="{id}" not defined in message' + f'Use provided {msd!r} for structureRef="{id}" not defined in message' ) - mds.structured_by = dsd + mds.structured_by = msd reader.push("MetadataSet", mds) @@ -1947,7 +1947,7 @@ def _hc(reader: Reader, elem): except KeyError: if cl.is_external_reference: code = cl.setdefault(id=code_id) - else: + else: # pragma: no cover raise # Create the HierarchicalCode @@ -2032,7 +2032,7 @@ def _ismap_end(reader: Reader, elem): if scheme.is_external_reference: # Externally-referenced ItemScheme → create the Item item = scheme.setdefault(id=id_) - else: + else: # pragma: no cover raise setattr(ia, name, item) From 577c7f76355fe06f0d36ca3f18ea9a9108810908 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Wed, 10 Jan 2024 18:20:58 +0100 Subject: [PATCH 103/103] Add #152 to docs, what's new --- doc/api/model-common-list.rst | 7 ++++ doc/api/model-v21-list.rst | 28 +++++++++++++++ doc/api/model-v30-list.rst | 13 +++++-- doc/conf.py | 2 -- doc/dev.rst | 8 +++-- doc/whatsnew.rst | 65 +++++++++++++++++++++++++++++++++-- sdmx/message.py | 4 ++- sdmx/model/v21.py | 8 +++-- sdmx/model/v30.py | 2 +- 9 files changed, 123 insertions(+), 14 deletions(-) diff --git a/doc/api/model-common-list.rst b/doc/api/model-common-list.rst index 7a0c2f5d9..5682534db 100644 --- a/doc/api/model-common-list.rst +++ b/doc/api/model-common-list.rst @@ -1,5 +1,6 @@ .. This file is auto-generated by doc/conf.py. +:obj:`~.common.ActionType` :obj:`~.common.Agency` :obj:`~.common.AgencyScheme` :obj:`~.common.AnnotableArtefact` @@ -12,12 +13,14 @@ :obj:`~.common.CategoryScheme` :obj:`~.common.Code` :obj:`~.common.Codelist` +:obj:`~.common.CodingFormat` :obj:`~.common.Component` :obj:`~.common.ComponentList` :obj:`~.common.Concept` :obj:`~.common.ConceptScheme` :obj:`~.common.ConstrainableArtefact` :obj:`~.common.ConstraintRole` +:obj:`~.common.ConstraintRoleType` :obj:`~.common.Contact` :obj:`~.common.CubeRegion` :obj:`~.common.CustomType` @@ -34,12 +37,15 @@ :obj:`~.common.DimensionDescriptor` :obj:`~.common.DimensionRelationship` :obj:`~.common.EndPeriod` +:obj:`~.common.ExtendedFacetValueType` :obj:`~.common.Facet` :obj:`~.common.FacetType` +:obj:`~.common.FacetValueType` :obj:`~.common.FromVTLSpaceKey` :obj:`~.common.GroupDimensionDescriptor` :obj:`~.common.GroupKey` :obj:`~.common.GroupRelationship` +:obj:`~.common.HierarchicalCode` :obj:`~.common.ISOConceptReference` :obj:`~.common.IdentifiableArtefact` :obj:`~.common.InternationalString` @@ -47,6 +53,7 @@ :obj:`~.common.ItemScheme` :obj:`~.common.Key` :obj:`~.common.KeyValue` +:obj:`~.common.Level` :obj:`~.common.MaintainableArtefact` :obj:`~.common.MetadataTargetRegion` :obj:`~.common.NamePersonalisation` diff --git a/doc/api/model-v21-list.rst b/doc/api/model-v21-list.rst index 4810a71bc..9d32e0985 100644 --- a/doc/api/model-v21-list.rst +++ b/doc/api/model-v21-list.rst @@ -2,27 +2,55 @@ :obj:`~.v21.AfterPeriod` :obj:`~.v21.BeforePeriod` +:obj:`~.v21.CodeMap` +:obj:`~.v21.CodelistMap` :obj:`~.v21.Constraint` :obj:`~.v21.ContentConstraint` :obj:`~.v21.DataKey` :obj:`~.v21.DataKeySet` +:obj:`~.v21.DataSetTarget` :obj:`~.v21.DataStructureDefinition` :obj:`~.v21.DataflowDefinition` +:obj:`~.v21.DimensionDescriptorValuesTarget` +:obj:`~.v21.EnumeratedAttributeValue` :obj:`~.v21.GenericDataSet` :obj:`~.v21.GenericTimeSeriesDataSet` +:obj:`~.v21.HierarchicalCodelist` +:obj:`~.v21.Hierarchy` +:obj:`~.v21.IdentifiableObjectTarget` +:obj:`~.v21.ItemAssociation` +:obj:`~.v21.ItemSchemeMap` :obj:`~.v21.MeasureDescriptor` :obj:`~.v21.MeasureDimension` :obj:`~.v21.MemberSelection` :obj:`~.v21.MemberValue` +:obj:`~.v21.MetadataReport` +:obj:`~.v21.MetadataSet` :obj:`~.v21.MetadataStructureDefinition` +:obj:`~.v21.MetadataTarget` :obj:`~.v21.MetadataflowDefinition` :obj:`~.v21.NoSpecifiedRelationship` +:obj:`~.v21.NonEnumeratedAttributeValue` :obj:`~.v21.Observation` +:obj:`~.v21.OtherNonEnumeratedAttributeValue` :obj:`~.v21.PrimaryMeasure` :obj:`~.v21.PrimaryMeasureRelationship` :obj:`~.v21.RangePeriod` +:obj:`~.v21.ReportPeriodTarget` +:obj:`~.v21.ReportStructure` +:obj:`~.v21.ReportedAttribute` +:obj:`~.v21.ReportingCategory` +:obj:`~.v21.ReportingTaxonomy` :obj:`~.v21.ReportingYearStartDay` :obj:`~.v21.SelectionValue` +:obj:`~.v21.StructureSet` :obj:`~.v21.StructureSpecificDataSet` :obj:`~.v21.StructureSpecificTimeSeriesDataSet` +:obj:`~.v21.TargetIdentifiableObject` +:obj:`~.v21.TargetObject` +:obj:`~.v21.TargetObjectKey` +:obj:`~.v21.TargetObjectValue` +:obj:`~.v21.TargetReportPeriod` +:obj:`~.v21.TextAttributeValue` :obj:`~.v21.TimeRangeValue` +:obj:`~.v21.XHTMLAttributeValue` diff --git a/doc/api/model-v30-list.rst b/doc/api/model-v30-list.rst index 53ee9f390..47cc613f9 100644 --- a/doc/api/model-v30-list.rst +++ b/doc/api/model-v30-list.rst @@ -2,8 +2,8 @@ :obj:`~.v30.AfterPeriod` :obj:`~.v30.BeforePeriod` +:obj:`~.v30.CodedMetadataAttributeValue` :obj:`~.v30.CodelistExtension` -:obj:`~.v30.CodingFormat` :obj:`~.v30.Constraint` :obj:`~.v30.DataConstraint` :obj:`~.v30.DataKey` @@ -17,25 +17,32 @@ :obj:`~.v30.GeoGridCodelist` :obj:`~.v30.GeoRefCode` :obj:`~.v30.GeographicCodelist` -:obj:`~.v30.HierarchicalCode` :obj:`~.v30.Hierarchy` :obj:`~.v30.HierarchyAssociation` -:obj:`~.v30.Level` +:obj:`~.v30.IdentifiableObjectSelection` :obj:`~.v30.Measure` :obj:`~.v30.MeasureDescriptor` :obj:`~.v30.MeasureRelationship` :obj:`~.v30.MemberSelection` :obj:`~.v30.MemberValue` +:obj:`~.v30.MetadataAttributeDescriptor` +:obj:`~.v30.MetadataAttributeValue` :obj:`~.v30.MetadataConstraint` :obj:`~.v30.MetadataProvider` :obj:`~.v30.MetadataProviderScheme` +:obj:`~.v30.MetadataSet` :obj:`~.v30.MetadataStructureDefinition` :obj:`~.v30.Metadataflow` :obj:`~.v30.Observation` :obj:`~.v30.ObservationRelationship` +:obj:`~.v30.OtherUncodedAttributeValue` :obj:`~.v30.RangePeriod` :obj:`~.v30.SelectionValue` :obj:`~.v30.StructureSpecificDataSet` +:obj:`~.v30.TargetIdentifiableObject` +:obj:`~.v30.TextAttributeValue` :obj:`~.v30.TimeRangeValue` +:obj:`~.v30.UncodedMetadataAttributeValue` :obj:`~.v30.ValueItem` :obj:`~.v30.ValueList` +:obj:`~.v30.XHTMLAttributeValue` diff --git a/doc/conf.py b/doc/conf.py index e3dccc525..f7dc54d58 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -87,13 +87,11 @@ def linkcode_resolve(domain, info): # If True, todo and todolist produce output, else they produce nothing todo_include_todos = True - # -- Options for IPython.sphinxext.ipython_directive ----------------------------------- # Specify if the embedded Sphinx shell should import Matplotlib and set the backend ipython_mplbackend = "" - # -- Dynamic configuration diff --git a/doc/dev.rst b/doc/dev.rst index f7a6e15f1..1858060be 100644 --- a/doc/dev.rst +++ b/doc/dev.rst @@ -137,6 +137,11 @@ Address any failures before releasing. Internal code reference ======================= +.. automodule:: sdmx.dictlike + :noindex: + :undoc-members: + :show-inheritance: + ``testing``: Testing utilities ------------------------------ @@ -147,13 +152,12 @@ Internal code reference ``util``: Utilities ------------------- + .. automodule:: sdmx.util :noindex: - :members: summarize_dictlike :undoc-members: :show-inheritance: - Inline TODOs ============ diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index b00c8e73c..098fa508f 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -3,8 +3,69 @@ What's new? *********** -.. Next release -.. ============ +Next release +============ + +- Expand :mod:`.model` and :mod:`.reader.xml` support for metadata structures and metadata sets (§7 of the Information Model in both SDMX 2.1 and 3.0) (:issue:`73`, :pull:`152`). + This includes the additional classes: + + - :mod:`.model.common`: + :class:`.CodingFormat` + :class:`.ExtendedFacetValueType` + :class:`.HierarchicalCode` + :class:`.Level`. + - :mod:`.model.v21`: + :class:`.CodelistMap` + :class:`.CodeMap` + :class:`.DataSetTarget` + :class:`.DimensionDescriptorValuesTarget` + :class:`.EnumeratedAttributeValue` + :class:`.IdentifiableObjectTarget` + :class:`.ItemAssociation` + :class:`.ItemSchemeMap` + :class:`.MetadataReport` + :class:`.MetadataSet` + :class:`.MetadataTarget` + :class:`.NonEnumeratedAttributeValue` + :class:`.OtherNonEnumeratedAttributeValue` + :class:`.ReportedAttribute` + :class:`.ReportingCategory` + :class:`.ReportingTaxonomy` + :class:`.ReportPeriodTarget` + :class:`.ReportStructure` + :class:`.StructureSet` + :class:`.TargetIdentifiableObject` + :class:`.TargetObject` + :class:`.TargetObjectKey` + :class:`.TargetObjectValue` + :class:`.TargetReportPeriod` + :class:`.TextAttributeValue` + :class:`.XHTMLAttributeValue`. + - :mod:`.model.v30`: + :class:`.CodedMetadataAttributeValue` + :class:`.IdentifiableObjectSelection` + :class:`.MetadataAttributeDescriptor` + :class:`.MetadataAttributeValue` + :class:`.Metadataflow` + :class:`.MetadataSet` + :class:`.MetadataStructureDefinition` + :class:`.OtherUncodedAttributeValue` + :class:`.TargetIdentifiableObject` + :class:`.TextAttributeValue` + :class:`.UncodedMetadataAttributeValue` + :class:`.XHTMLAttributeValue`. +- New collections on StructureMessage: + :attr:`.hierarchical_codelist`, + :attr:`.hierarchy`, + :attr:`.metadatastructure`. +- New class :class:`.MetadataMessage`. +- Improve :class:`.Structure`: + + - New attribute :attr:`~.Structure.grouping` per the information model. + - New convenience method :meth:`~.Structure.replace_grouping`. +- :mod:`.reader.xml` parses messages available from 'actualconstraint', 'allowedconstraint', 'contentconstraint', 'hierarchicalcodelist', 'metadatstructure', 'structure', and 'structureset' SDMX 2.1 REST API endpoints for all known data sources that support these. + + - Expand explicit marking of particular data sources that do not support the above endpoints. v2.12.1 (2023-12-20) ==================== diff --git a/sdmx/message.py b/sdmx/message.py index fde7756f6..062f12599 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -360,7 +360,7 @@ def __repr__(self): @dataclass class DataMessage(Message): - """Data Message. + """SDMX Data Message. .. note:: A DataMessage may contain zero or more :class:`.DataSet`, so :attr:`data` is a list. To retrieve the first (and possibly only) @@ -440,6 +440,8 @@ def compare(self, other, strict=True): @dataclass class MetadataMessage(DataMessage): + """SDMX Metadata Message.""" + @property def structure_type(self) -> Type[common.Structure]: return { diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 7b34427b7..48018f787 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -442,9 +442,11 @@ class EnumeratedAttributeValue(ReportedAttribute): value: str - #: .. note:: The SDMX 2.1 IM (2011-08) gives this as `valueFor`, but this name - #: duplicates :attr:`ReporterAttribute.value_for`. :mod:`sdmx` uses `value_of` - #: for consistency with :attr:`.v30.CodedMetadataAttributeValue.value_of`. + #: .. note:: + #: + #: The SDMX 2.1 IM (2011-08) gives this as `valueFor`, but this name duplicates + #: :attr:`.ReportedAttribute.value_for`. :mod:`sdmx` uses `value_of` for + #: consistency with :attr:`.v30.CodedMetadataAttributeValue.value_of`. value_of: common.Code diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index c3f317c14..871cf68b7 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -438,7 +438,7 @@ class MetadataAttributeValue: class CodedMetadataAttributeValue(MetadataAttributeValue): """SDMX 3.0 CodedMetadataAttributeValue. - Analogous to :class:`.v21.EnumeratedAttributeValue. + Analogous to :class:`.v21.EnumeratedAttributeValue`. """ value_of: common.Code