diff --git a/.gitignore b/.gitignore index bcb64ea3d..b4355d3ab 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ __pycache__ .coverage* .mypy_cache .pytest_cache +.ruff_cache build coverage.xml dist diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f04ac048..3a6f462d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.6.1 + rev: v1.8.0 hooks: - id: mypy additional_dependencies: @@ -15,7 +15,7 @@ repos: - types-requests args: [] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.2 + rev: v0.1.9 hooks: - id: ruff - id: ruff-format diff --git a/doc/api/model-common-list.rst b/doc/api/model-common-list.rst index 7a0c2f5d9..5682534db 100644 --- a/doc/api/model-common-list.rst +++ b/doc/api/model-common-list.rst @@ -1,5 +1,6 @@ .. This file is auto-generated by doc/conf.py. +:obj:`~.common.ActionType` :obj:`~.common.Agency` :obj:`~.common.AgencyScheme` :obj:`~.common.AnnotableArtefact` @@ -12,12 +13,14 @@ :obj:`~.common.CategoryScheme` :obj:`~.common.Code` :obj:`~.common.Codelist` +:obj:`~.common.CodingFormat` :obj:`~.common.Component` :obj:`~.common.ComponentList` :obj:`~.common.Concept` :obj:`~.common.ConceptScheme` :obj:`~.common.ConstrainableArtefact` :obj:`~.common.ConstraintRole` +:obj:`~.common.ConstraintRoleType` :obj:`~.common.Contact` :obj:`~.common.CubeRegion` :obj:`~.common.CustomType` @@ -34,12 +37,15 @@ :obj:`~.common.DimensionDescriptor` :obj:`~.common.DimensionRelationship` :obj:`~.common.EndPeriod` +:obj:`~.common.ExtendedFacetValueType` :obj:`~.common.Facet` :obj:`~.common.FacetType` +:obj:`~.common.FacetValueType` :obj:`~.common.FromVTLSpaceKey` :obj:`~.common.GroupDimensionDescriptor` :obj:`~.common.GroupKey` :obj:`~.common.GroupRelationship` +:obj:`~.common.HierarchicalCode` :obj:`~.common.ISOConceptReference` :obj:`~.common.IdentifiableArtefact` :obj:`~.common.InternationalString` @@ -47,6 +53,7 @@ :obj:`~.common.ItemScheme` :obj:`~.common.Key` :obj:`~.common.KeyValue` +:obj:`~.common.Level` :obj:`~.common.MaintainableArtefact` :obj:`~.common.MetadataTargetRegion` :obj:`~.common.NamePersonalisation` diff --git a/doc/api/model-v21-list.rst b/doc/api/model-v21-list.rst index 4810a71bc..9d32e0985 100644 --- a/doc/api/model-v21-list.rst +++ b/doc/api/model-v21-list.rst @@ -2,27 +2,55 @@ :obj:`~.v21.AfterPeriod` :obj:`~.v21.BeforePeriod` +:obj:`~.v21.CodeMap` +:obj:`~.v21.CodelistMap` :obj:`~.v21.Constraint` :obj:`~.v21.ContentConstraint` :obj:`~.v21.DataKey` :obj:`~.v21.DataKeySet` +:obj:`~.v21.DataSetTarget` :obj:`~.v21.DataStructureDefinition` :obj:`~.v21.DataflowDefinition` +:obj:`~.v21.DimensionDescriptorValuesTarget` +:obj:`~.v21.EnumeratedAttributeValue` :obj:`~.v21.GenericDataSet` :obj:`~.v21.GenericTimeSeriesDataSet` +:obj:`~.v21.HierarchicalCodelist` +:obj:`~.v21.Hierarchy` +:obj:`~.v21.IdentifiableObjectTarget` +:obj:`~.v21.ItemAssociation` +:obj:`~.v21.ItemSchemeMap` :obj:`~.v21.MeasureDescriptor` :obj:`~.v21.MeasureDimension` :obj:`~.v21.MemberSelection` :obj:`~.v21.MemberValue` +:obj:`~.v21.MetadataReport` +:obj:`~.v21.MetadataSet` :obj:`~.v21.MetadataStructureDefinition` +:obj:`~.v21.MetadataTarget` :obj:`~.v21.MetadataflowDefinition` :obj:`~.v21.NoSpecifiedRelationship` +:obj:`~.v21.NonEnumeratedAttributeValue` :obj:`~.v21.Observation` +:obj:`~.v21.OtherNonEnumeratedAttributeValue` :obj:`~.v21.PrimaryMeasure` :obj:`~.v21.PrimaryMeasureRelationship` :obj:`~.v21.RangePeriod` +:obj:`~.v21.ReportPeriodTarget` +:obj:`~.v21.ReportStructure` +:obj:`~.v21.ReportedAttribute` +:obj:`~.v21.ReportingCategory` +:obj:`~.v21.ReportingTaxonomy` :obj:`~.v21.ReportingYearStartDay` :obj:`~.v21.SelectionValue` +:obj:`~.v21.StructureSet` :obj:`~.v21.StructureSpecificDataSet` :obj:`~.v21.StructureSpecificTimeSeriesDataSet` +:obj:`~.v21.TargetIdentifiableObject` +:obj:`~.v21.TargetObject` +:obj:`~.v21.TargetObjectKey` +:obj:`~.v21.TargetObjectValue` +:obj:`~.v21.TargetReportPeriod` +:obj:`~.v21.TextAttributeValue` :obj:`~.v21.TimeRangeValue` +:obj:`~.v21.XHTMLAttributeValue` diff --git a/doc/api/model-v30-list.rst b/doc/api/model-v30-list.rst index 53ee9f390..47cc613f9 100644 --- a/doc/api/model-v30-list.rst +++ b/doc/api/model-v30-list.rst @@ -2,8 +2,8 @@ :obj:`~.v30.AfterPeriod` :obj:`~.v30.BeforePeriod` +:obj:`~.v30.CodedMetadataAttributeValue` :obj:`~.v30.CodelistExtension` -:obj:`~.v30.CodingFormat` :obj:`~.v30.Constraint` :obj:`~.v30.DataConstraint` :obj:`~.v30.DataKey` @@ -17,25 +17,32 @@ :obj:`~.v30.GeoGridCodelist` :obj:`~.v30.GeoRefCode` :obj:`~.v30.GeographicCodelist` -:obj:`~.v30.HierarchicalCode` :obj:`~.v30.Hierarchy` :obj:`~.v30.HierarchyAssociation` -:obj:`~.v30.Level` +:obj:`~.v30.IdentifiableObjectSelection` :obj:`~.v30.Measure` :obj:`~.v30.MeasureDescriptor` :obj:`~.v30.MeasureRelationship` :obj:`~.v30.MemberSelection` :obj:`~.v30.MemberValue` +:obj:`~.v30.MetadataAttributeDescriptor` +:obj:`~.v30.MetadataAttributeValue` :obj:`~.v30.MetadataConstraint` :obj:`~.v30.MetadataProvider` :obj:`~.v30.MetadataProviderScheme` +:obj:`~.v30.MetadataSet` :obj:`~.v30.MetadataStructureDefinition` :obj:`~.v30.Metadataflow` :obj:`~.v30.Observation` :obj:`~.v30.ObservationRelationship` +:obj:`~.v30.OtherUncodedAttributeValue` :obj:`~.v30.RangePeriod` :obj:`~.v30.SelectionValue` :obj:`~.v30.StructureSpecificDataSet` +:obj:`~.v30.TargetIdentifiableObject` +:obj:`~.v30.TextAttributeValue` :obj:`~.v30.TimeRangeValue` +:obj:`~.v30.UncodedMetadataAttributeValue` :obj:`~.v30.ValueItem` :obj:`~.v30.ValueList` +:obj:`~.v30.XHTMLAttributeValue` diff --git a/doc/conf.py b/doc/conf.py index e3dccc525..f7dc54d58 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -87,13 +87,11 @@ def linkcode_resolve(domain, info): # If True, todo and todolist produce output, else they produce nothing todo_include_todos = True - # -- Options for IPython.sphinxext.ipython_directive ----------------------------------- # Specify if the embedded Sphinx shell should import Matplotlib and set the backend ipython_mplbackend = "" - # -- Dynamic configuration diff --git a/doc/dev.rst b/doc/dev.rst index f7a6e15f1..1858060be 100644 --- a/doc/dev.rst +++ b/doc/dev.rst @@ -137,6 +137,11 @@ Address any failures before releasing. Internal code reference ======================= +.. automodule:: sdmx.dictlike + :noindex: + :undoc-members: + :show-inheritance: + ``testing``: Testing utilities ------------------------------ @@ -147,13 +152,12 @@ Internal code reference ``util``: Utilities ------------------- + .. automodule:: sdmx.util :noindex: - :members: summarize_dictlike :undoc-members: :show-inheritance: - Inline TODOs ============ diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index b00c8e73c..098fa508f 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -3,8 +3,69 @@ What's new? *********** -.. Next release -.. ============ +Next release +============ + +- Expand :mod:`.model` and :mod:`.reader.xml` support for metadata structures and metadata sets (§7 of the Information Model in both SDMX 2.1 and 3.0) (:issue:`73`, :pull:`152`). + This includes the additional classes: + + - :mod:`.model.common`: + :class:`.CodingFormat` + :class:`.ExtendedFacetValueType` + :class:`.HierarchicalCode` + :class:`.Level`. + - :mod:`.model.v21`: + :class:`.CodelistMap` + :class:`.CodeMap` + :class:`.DataSetTarget` + :class:`.DimensionDescriptorValuesTarget` + :class:`.EnumeratedAttributeValue` + :class:`.IdentifiableObjectTarget` + :class:`.ItemAssociation` + :class:`.ItemSchemeMap` + :class:`.MetadataReport` + :class:`.MetadataSet` + :class:`.MetadataTarget` + :class:`.NonEnumeratedAttributeValue` + :class:`.OtherNonEnumeratedAttributeValue` + :class:`.ReportedAttribute` + :class:`.ReportingCategory` + :class:`.ReportingTaxonomy` + :class:`.ReportPeriodTarget` + :class:`.ReportStructure` + :class:`.StructureSet` + :class:`.TargetIdentifiableObject` + :class:`.TargetObject` + :class:`.TargetObjectKey` + :class:`.TargetObjectValue` + :class:`.TargetReportPeriod` + :class:`.TextAttributeValue` + :class:`.XHTMLAttributeValue`. + - :mod:`.model.v30`: + :class:`.CodedMetadataAttributeValue` + :class:`.IdentifiableObjectSelection` + :class:`.MetadataAttributeDescriptor` + :class:`.MetadataAttributeValue` + :class:`.Metadataflow` + :class:`.MetadataSet` + :class:`.MetadataStructureDefinition` + :class:`.OtherUncodedAttributeValue` + :class:`.TargetIdentifiableObject` + :class:`.TextAttributeValue` + :class:`.UncodedMetadataAttributeValue` + :class:`.XHTMLAttributeValue`. +- New collections on StructureMessage: + :attr:`.hierarchical_codelist`, + :attr:`.hierarchy`, + :attr:`.metadatastructure`. +- New class :class:`.MetadataMessage`. +- Improve :class:`.Structure`: + + - New attribute :attr:`~.Structure.grouping` per the information model. + - New convenience method :meth:`~.Structure.replace_grouping`. +- :mod:`.reader.xml` parses messages available from 'actualconstraint', 'allowedconstraint', 'contentconstraint', 'hierarchicalcodelist', 'metadatstructure', 'structure', and 'structureset' SDMX 2.1 REST API endpoints for all known data sources that support these. + + - Expand explicit marking of particular data sources that do not support the above endpoints. v2.12.1 (2023-12-20) ==================== diff --git a/pyproject.toml b/pyproject.toml index 5f2ee008f..6c1900fae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,8 @@ omit = [ exclude_also = [ # Don't complain about abstract methods, they aren't run "@(abc\\.)?abstractmethod", + # Imports only used by type checkers + "if TYPE_CHECKING:", ] [tool.mypy] diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 070d8ca9c..143625644 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -25,6 +25,8 @@ "DataConsumerScheme", "DataProvider", "DataProviderScheme", + "HierarchicalCode", + "Level", "NamePersonalisation", "NamePersonalisationScheme", "Ruleset", @@ -39,17 +41,14 @@ ("model.Agency", "str:Agency"), # Order matters ("model.Agency", "mes:Receiver"), ("model.Agency", "mes:Sender"), + ("model.AttributeDescriptor", "str:AttributeList"), ("model.Concept", "str:ConceptIdentity"), ("model.Codelist", "str:Enumeration"), # This could possibly be ItemScheme ("model.Dimension", "str:Dimension"), # Order matters ("model.Dimension", "str:DimensionReference"), ("model.Dimension", "str:GroupDimension"), - ("model.StructureUsage", "com:StructureUsage"), - ("model.AttributeDescriptor", "str:AttributeList"), ("model.DataAttribute", "str:Attribute"), ("model.DataStructureDefinition", "str:DataStructure"), - ("model.DataStructureDefinition", "com:Structure"), - ("model.DataStructureDefinition", "str:Structure"), ("model.DimensionDescriptor", "str:DimensionList"), ("model.GroupDimensionDescriptor", "str:Group"), ("model.GroupDimensionDescriptor", "str:AttachmentGroup"), @@ -58,10 +57,14 @@ ("model.MeasureDescriptor", "str:MeasureList"), ("model.MetadataStructureDefinition", "str:MetadataStructure"), ("model.SeriesKey", "gen:SeriesKey"), + ("model.Structure", "com:Structure"), + ("model.Structure", "str:Structure"), ("model.StructureUsage", "com:StructureUsage"), ("model.VTLMappingScheme", "str:VtlMappingScheme"), # Message classes ("message.DataMessage", "mes:StructureSpecificData"), + ("message.MetadataMessage", "mes:GenericMetadata"), + ("message.MetadataMessage", "mes:StructureSpecificMetadata"), ("message.ErrorMessage", "mes:Error"), ("message.StructureMessage", "mes:Structure"), ] @@ -72,6 +75,7 @@ "xsi": "http://www.w3.org/2001/XMLSchema-instance", # To be formatted "com": "{}/common", + "md": "{}/metadata/generic", "data": "{}/data/structurespecific", "str": "{}/structure", "mes": "{}/message", @@ -123,15 +127,16 @@ def qname(self, ns_or_name, name=None) -> QName: else: if name is None: match = re.fullmatch( - r"(\{(?P.*)\}|(?P.*):)(?P.*)", ns_or_name + r"(\{(?P.*)\}|(?P.*):)?(?P.*)", ns_or_name ) assert match name = match.group("name") - ns_key = match.group("ns_key") - if ns_key: + if ns_key := match.group("ns_key"): ns = self.NS[ns_key] + elif ns := match.group("ns_full"): + pass else: - ns = match.group("ns_full") + ns = None else: ns = self.NS[ns_or_name] diff --git a/sdmx/format/xml/v21.py b/sdmx/format/xml/v21.py index 4fe90a6bf..1edf74d4d 100644 --- a/sdmx/format/xml/v21.py +++ b/sdmx/format/xml/v21.py @@ -12,11 +12,36 @@ ("message.DataMessage", "mes:StructureSpecificTimeSeriesData"), ("model.NoSpecifiedRelationship", "str:None"), ("model.DataflowDefinition", "str:Dataflow"), + ("model.DimensionDescriptorValuesTarget", "str:KeyDescriptorValuesTarget"), ("model.MetadataflowDefinition", "str:Metadataflow"), + ("model.MetadataSet", "mes:MetadataSet"), + ("model.ReportedAttribute", "md:ReportedAttribute"), + ("model.TargetIdentifiableObject", ":ObjectReference"), + ("model.TargetIdentifiableObject", "md:ObjectReference"), + ("model.TargetObjectKey", ":Target"), + ("model.TargetObjectKey", "md:Target"), + ("model.TargetReportPeriod", "ReportPeriod"), + ("model.TargetReportPeriod", "md:ReportPeriod"), + ("model.MetadataReport", ":Report"), + ("model.MetadataReport", "md:Report"), ] + [ (f"model.{name}", f"str:{name}") - for name in "ContentConstraint MeasureDimension PrimaryMeasure".split() + for name in """ + CodelistMap + CodeMap + ContentConstraint + HierarchicalCodelist + Hierarchy + IdentifiableObjectTarget + MeasureDimension + MetadataAttribute + MetadataTarget + PrimaryMeasure + ReportPeriodTarget + ReportStructure + StructureSet + """.split() ], ) diff --git a/sdmx/format/xml/v30.py b/sdmx/format/xml/v30.py index 52ad3d327..c3c982594 100644 --- a/sdmx/format/xml/v30.py +++ b/sdmx/format/xml/v30.py @@ -10,7 +10,9 @@ ("model.DataflowRelationship", "str:None"), ("model.ObservationRelationship", "str:Observation"), ("model.Dataflow", "str:Dataflow"), + ("model.MetadataAttributeDescriptor", "str:MetadataAttributeList"), ("model.Metadataflow", "str:Metadataflow"), + ("model.MetadataSet", "mes:MetadataSet"), ] + [ (f"model.{name}", f"str:{name}") @@ -23,7 +25,9 @@ GeographicCodelist GeoGridCode GeoGridCodelist + Hierarchy Measure + MetadataAttribute MetadataConstraint ValueItem ValueList diff --git a/sdmx/message.py b/sdmx/message.py index 92d2b2658..062f12599 100644 --- a/sdmx/message.py +++ b/sdmx/message.py @@ -10,31 +10,42 @@ from dataclasses import dataclass, field, fields from datetime import datetime from operator import attrgetter -from typing import Any, List, Optional, Text, Union, get_args +from typing import TYPE_CHECKING, List, Optional, Text, Type, Union, get_args from sdmx import model from sdmx.dictlike import DictLike, DictLikeDescriptor, summarize_dictlike from sdmx.format import Version -from sdmx.model import v21, v30 +from sdmx.model import common, v21, v30 from sdmx.model.internationalstring import ( InternationalString, InternationalStringDescriptor, ) from sdmx.util import compare, direct_fields +if TYPE_CHECKING: + import requests + log = logging.getLogger(__name__) def _summarize(obj, include: Optional[List[str]] = None): """Helper method for __repr__ on Header and Message (sub)classes.""" + import requests + include = include or list(map(attrgetter("name"), fields(obj))) for name in include: attr = getattr(obj, name) if attr is None: continue elif isinstance(attr, datetime): - attr = attr.isoformat() - yield f"{name}: {repr(attr)}" + s_attr = repr(attr.isoformat()) + elif isinstance(attr, requests.Response): + # Don't use repr(), which displays the entire response body + s_attr = str(attr) + else: + s_attr = repr(attr) + + yield f"{name}: {s_attr}" @dataclass @@ -137,7 +148,7 @@ class Message: footer: Optional[Footer] = None #: :class:`requests.Response` instance for the response to the HTTP request that #: returned the Message. This is not part of the SDMX standard. - response: Optional[Any] = None + response: Optional["requests.Response"] = None def __str__(self): return repr(self) @@ -182,18 +193,31 @@ class StructureMessage(Message): ] = DictLikeDescriptor() #: Collection of :class:`.Codelist`. codelist: DictLikeDescriptor[str, model.Codelist] = DictLikeDescriptor() + #: Collection of :class:`.HierarchicalCodelist`. + hierarchical_codelist: DictLikeDescriptor[ + str, v21.HierarchicalCodelist + ] = DictLikeDescriptor() + #: Collection of :class:`.v30.Hierarchy`. + hierarchy: DictLikeDescriptor[str, v30.Hierarchy] = DictLikeDescriptor() #: Collection of :class:`.ConceptScheme`. concept_scheme: DictLikeDescriptor[str, model.ConceptScheme] = DictLikeDescriptor() #: Collection of :class:`.ContentConstraint`. constraint: DictLikeDescriptor[str, model.BaseConstraint] = DictLikeDescriptor() #: Collection of :class:`Dataflow(Definition) <.BaseDataflow>`. dataflow: DictLikeDescriptor[str, model.BaseDataflow] = DictLikeDescriptor() - #: Collection of :class:`MetaDataflow(Definition) <.BaseMetaDataflow>`. + #: Collection of :class:`MetadataStructureDefinition + #: <.BaseMetadataStructureDefinition>`. + metadatastructure: DictLikeDescriptor[ + str, model.BaseMetadataStructureDefinition + ] = DictLikeDescriptor() + #: Collection of :class:`Metadataflow(Definition) <.BaseMetadataflow>`. metadataflow: DictLikeDescriptor[str, model.BaseMetadataflow] = DictLikeDescriptor() #: Collection of :class:`DataStructureDefinition <.BaseDataStructureDefinition>`. structure: DictLikeDescriptor[ str, model.BaseDataStructureDefinition ] = DictLikeDescriptor() + #: Collection of :class:`.StructureSet`. + structureset: DictLikeDescriptor[str, v21.StructureSet] = DictLikeDescriptor() #: Collection of :class:`.OrganisationScheme`. organisation_scheme: DictLikeDescriptor[ str, model.OrganisationScheme @@ -336,7 +360,7 @@ def __repr__(self): @dataclass class DataMessage(Message): - """Data Message. + """SDMX Data Message. .. note:: A DataMessage may contain zero or more :class:`.DataSet`, so :attr:`data` is a list. To retrieve the first (and possibly only) @@ -371,6 +395,14 @@ def structure(self): """DataStructureDefinition used in the :attr:`dataflow`.""" return self.dataflow.structure + @property + def structure_type(self) -> Type[common.Structure]: + """:class:`.Structure` subtype describing the contained (meta)data.""" + return { + Version["2.1"]: v21.DataStructureDefinition, + Version["3.0.0"]: v30.DataStructureDefinition, + }[self.version] + def __repr__(self): """String representation.""" lines = [super().__repr__()] @@ -404,3 +436,15 @@ def compare(self, other, strict=True): and len(self.data) == len(other.data) and all(ds[0].compare(ds[1], strict) for ds in zip(self.data, other.data)) ) + + +@dataclass +class MetadataMessage(DataMessage): + """SDMX Metadata Message.""" + + @property + def structure_type(self) -> Type[common.Structure]: + return { + Version["2.1"]: v21.MetadataStructureDefinition, + Version["3.0.0"]: v30.MetadataStructureDefinition, + }[self.version] diff --git a/sdmx/model/common.py b/sdmx/model/common.py index 3447a3f47..a0a02f449 100644 --- a/sdmx/model/common.py +++ b/sdmx/model/common.py @@ -21,7 +21,7 @@ from abc import ABC, abstractmethod from collections import ChainMap from copy import copy -from dataclasses import InitVar, dataclass, field +from dataclasses import InitVar, dataclass, field, fields from datetime import date, datetime, timedelta from enum import Enum from functools import lru_cache @@ -45,6 +45,7 @@ TypeVar, Union, get_args, + get_origin, ) from sdmx.dictlike import DictLikeDescriptor @@ -68,6 +69,10 @@ "NameableArtefact", "VersionableArtefact", "MaintainableArtefact", + "ActionType", + "ConstraintRoleType", + "FacetValueType", + "ExtendedFacetValueType", "Item", "ItemScheme", "FacetType", @@ -106,6 +111,9 @@ "Key", "GroupKey", "SeriesKey", + "CodingFormat", + "Level", + "HierarchicalCode", "ConstraintRole", "StartPeriod", "EndPeriod", @@ -502,6 +510,23 @@ def __contains__(self, name): "geospatialInformation", ) + +#: SDMX ExtendedFaceValueType. +#: +#: This enumeration is identical to :class:`.FacetValueType` except for one additional +#: member, "Xhtml". This member is used only in metadata. +ExtendedFacetValueType = Enum( + "ExtendedFacetValueType", + """string bigInteger integer long short decimal float double boolean uri count + inclusiveValueRange alpha alphaNumeric numeric exclusiveValueRange incremental + observationalTimePeriod standardTimePeriod basicTimePeriod gregorianTimePeriod + gregorianYear gregorianMonth gregorianYearMonth gregorianDay reportingTimePeriod + reportingYear reportingSemester reportingTrimester reportingQuarter reportingMonth + reportingWeek reportingDay dateTime timesRange month monthDay day time duration + keyValues identifiableReference dataSetReference Xhtml""", +) + + UsageStatus = Enum("UsageStatus", "mandatory conditional") @@ -1090,8 +1115,47 @@ class DataProviderScheme(OrganisationScheme[DataProvider]): @dataclass(repr=False) class Structure(MaintainableArtefact): - #: - grouping: Optional[ComponentList] = None + @property + def grouping(self) -> Sequence[ComponentList]: + """A collection of all the ComponentLists associated with a subclass.""" + result: List[ComponentList] = [] + for f in fields(self): + types = get_args(f.type) or (f.type,) + try: + if any(issubclass(t, ComponentList) for t in types): + result.append(getattr(self, f.name)) + except TypeError: + pass + return result + + def replace_grouping(self, cl: ComponentList) -> None: + """Replace existing component list with `cl`.""" + field = None + for f in fields(self): + is_dictlike = get_origin(f.type) is DictLikeDescriptor + if f.type == type(cl) or (is_dictlike and get_args(f.type)[1] is type(cl)): + field = f + break + + if not field: + raise TypeError(f"No grouping of type {type(cl)} on {type(self)}") + + if is_dictlike: + getattr(self, field.name).setdefault(cl.id, cl) + else: + setattr(self, field.name, cl) + + def compare(self, other: "Structure", strict: bool = True) -> bool: + # DictLike of ComponentList will not have an "id" attribute + def _key(item) -> str: + return getattr(item, "id", str(type(item))) + + return all( + s.compare(o, strict) + for s, o in zip( + sorted(self.grouping, key=_key), sorted(other.grouping, key=_key) + ) + ) class StructureUsage(MaintainableArtefact): @@ -1250,6 +1314,7 @@ class BaseDataStructureDefinition(Structure, ConstrainableArtefact): str, GroupDimensionDescriptor ] = DictLikeDescriptor() + # Specific types to be used in concrete subclasses MemberValue: ClassVar[Type["BaseMemberValue"]] MemberSelection: ClassVar[Type["BaseMemberSelection"]] ConstraintType: ClassVar[Type[BaseConstraint]] @@ -1488,28 +1553,21 @@ def dim(id): # noqa: F811 return key - def compare(self, other, strict=True): - """Return :obj:`True` if `self` is the same as `other`. - - Two DataStructureDefinitions are the same if each of :attr:`attributes`, - :attr:`dimensions`, :attr:`measures`, and :attr:`group_dimensions` compares - equal. - - Parameters - ---------- - strict : bool, optional - Passed to :meth:`.ComponentList.compare`. - """ - return all( - getattr(self, attr).compare(getattr(other, attr), strict) - for attr in ("attributes", "dimensions", "measures", "group_dimensions") - ) - +@dataclass(repr=False) class BaseDataflow(StructureUsage, ConstrainableArtefact): """Common features of SDMX 2.1 DataflowDefinition and 3.0 Dataflow.""" - structure: BaseDataStructureDefinition + structure: BaseDataStructureDefinition = field( + default_factory=BaseDataStructureDefinition + ) + + def __post_init__(self): + super().__post_init__() + + # Factory default `structure` inherits is_external_reference from the data flow + if self.structure.is_external_reference is None: + self.structure.is_external_reference = self.is_external_reference def iter_keys( self, constraint: Optional[BaseConstraint] = None, dims: List[str] = [] @@ -2046,6 +2104,25 @@ def compare(self, other, strict=True): # §7.3: Metadata Structure Definition +class AttributeComponent(Component): + """SDMX 3.0 AttributeComponent. + + .. note:: This intermediate, abstract class is not present in the SDMX 2.1 IM. + """ + + +@dataclass +class MetadataAttribute(AttributeComponent): + """SDMX MetadataAttribute.""" + + is_presentational: Optional[bool] = None + max_occurs: Optional[int] = None + min_occurs: Optional[int] = None + + parent: Optional["MetadataAttribute"] = None + child: List["MetadataAttribute"] = field(default_factory=list) + + class BaseMetadataStructureDefinition(Structure, ConstrainableArtefact): """ABC for SDMX 2.1 and 3.0 MetadataStructureDefinition.""" @@ -2054,6 +2131,76 @@ class BaseMetadataflow(StructureUsage, ConstrainableArtefact): """ABC for SDMX 2.1 MetadataflowDefinition and SDMX 3.0 Metadataflow.""" +# §7.4 MetadataSet + + +@dataclass +class BaseTextAttributeValue: + """ABC for SDMX 2.1 and 3.0 TextAttributeValue.""" + + text: InternationalStringDescriptor = InternationalStringDescriptor() + + +@dataclass +class BaseXHTMLAttributeValue: + """ABC for SDMX 2.1 and 3.0 XHTMLAttributeValue.""" + + value: str + + +@dataclass +class BaseMetadataSet: + """ABC for SDMX 2.1 and 3.0 MetadataSet.""" + + action: Optional[ActionType] = None + + reporting_begin: Optional[date] = None + reporting_end: Optional[date] = None + + publication_period: Optional[date] = None + publication_year: Optional[date] = None + + +# SDMX 2.1 §8: Hierarchical Code List +# SDMX 3.0 §8: Hierarchy + + +class CodingFormat: + """SDMX CodingFormat.""" + + coding_format: Facet + + +@dataclass +class Level(NameableArtefact): + """SDMX Level.""" + + parent: Optional[Union["Level", Any]] = None # NB second element is "Hierarchy" + child: Optional["Level"] = None + + code_format: CodingFormat = field(default_factory=CodingFormat) + + +@dataclass +class HierarchicalCode(IdentifiableArtefact): + """SDMX HierarchicalCode.""" + + #: Date from which the construct is valid. + valid_from: Optional[str] = None + #: Date from which the construct is superseded. + valid_to: Optional[str] = None + + #: The Code that is used at the specific point in the hierarchy. + code: Optional[Code] = None + + level: Optional[Level] = None + + parent: Optional[ + Union["HierarchicalCode", Any] + ] = None # NB second element is "Hierarchy" + child: List["HierarchicalCode"] = field(default_factory=list) + + # SDMX 2.1 §10.2: Constraint inheritance # SDMX 3.0 §12: Constraints @@ -2275,6 +2422,7 @@ class RESTDatasource(QueryDatasource): @dataclass +@MaintainableArtefact._preserve("hash") class ProvisionAgreement(MaintainableArtefact, ConstrainableArtefact): #: structure_usage: Optional[StructureUsage] = None @@ -2412,7 +2560,14 @@ class BaseContentConstraint: _PACKAGE_CLASS: Dict[str, set] = { "base": {"Agency", "AgencyScheme", "DataProvider", "DataProviderScheme"}, "categoryscheme": {"Category", "Categorisation", "CategoryScheme"}, - "codelist": {"Code", "Codelist"}, + "codelist": { + "Code", + "Codelist", + "HierarchicalCode", + "HierarchicalCodelist", # SDMX 2.1 + "Hierarchy", + "Level", + }, "conceptscheme": {"Concept", "ConceptScheme"}, "datastructure": { "DataflowDefinition", # SDMX 2.1 @@ -2420,6 +2575,7 @@ class BaseContentConstraint: "DataStructureDefinition", "StructureUsage", }, + "mapping": {"CodelistMap", "StructureSet"}, "metadatastructure": { "MetadataflowDefinition", # SDMX 2.1 "Metadataflow", # SDMX 3.0 diff --git a/sdmx/model/v21.py b/sdmx/model/v21.py index 7c372e4c6..48018f787 100644 --- a/sdmx/model/v21.py +++ b/sdmx/model/v21.py @@ -4,12 +4,24 @@ # TODO for complete implementation of the IM, enforce TimeKeyValue (instead of KeyValue) # for {Generic,StructureSpecific} TimeSeriesDataSet. from dataclasses import dataclass, field -from typing import Generator, List, Optional, Set, Union +from typing import ( + ClassVar, + Dict, + Generator, + Generic, + List, + Optional, + Set, + Type, + TypeVar, + Union, +) from sdmx.dictlike import DictLikeDescriptor from . import common from .common import ( + IT, AttributeRelationship, Component, ComponentList, @@ -48,8 +60,36 @@ "GenericDataSet", "GenericTimeSeriesDataSet", "StructureSpecificTimeSeriesDataSet", - "MetadataflowDefinition", + "ReportingCategory", + "ReportingTaxonomy", + "TargetObject", + "DataSetTarget", + "DimensionDescriptorValuesTarget", + "IdentifiableObjectTarget", + "ReportPeriodTarget", + "MetadataTarget", + "ReportStructure", "MetadataStructureDefinition", + "MetadataflowDefinition", + "TargetObjectValue", + "TargetReportPeriod", + "TargetIdentifiableObject", + "TargetObjectKey", + "ReportedAttribute", + "EnumeratedAttributeValue", + "NonEnumeratedAttributeValue", + "OtherNonEnumeratedAttributeValue", + "TextAttributeValue", + "XHTMLAttributeValue", + "MetadataReport", + "MetadataSet", + "Hierarchy", + "HierarchicalCodelist", + "ItemAssociation", + "CodeMap", + "ItemSchemeMap", + "CodelistMap", + "StructureSet", ] log = logging.getLogger(__name__) @@ -283,22 +323,267 @@ class StructureSpecificTimeSeriesDataSet(DataSet): # §7.3 Metadata Structure Definition +class ReportingCategory(common.Item): + """SDMX 2.1 ReportingCategory.""" + + +class ReportingTaxonomy(common.ItemScheme): + """SDMX 2.1 ReportingTaxonomy.""" + + +class TargetObject(common.Component): + """SDMX 2.1 TargetObject.""" + + +class DataSetTarget(TargetObject): + """SDMX 2.1 DataSetTarget.""" + + +class DimensionDescriptorValuesTarget(TargetObject): + """SDMX 2.1 DimensionDescriptorValuesTarget.""" + + +class IdentifiableObjectTarget(TargetObject): + """SDMX 2.1 IdentifiableObjectTarget.""" + + +class ReportPeriodTarget(TargetObject): + """SDMX 2.1 ReportPeriodTarget.""" + + +class MetadataTarget(ComponentList): + """SDMX 2.1 MetadataTarget.""" + + _Component = TargetObject + + +@dataclass +class ReportStructure(ComponentList): + """SDMX 2.1 ReportStructure.""" + + _Component = common.MetadataAttribute + + report_for: List[MetadataTarget] = field(default_factory=list) + + +@dataclass +@common.MaintainableArtefact._preserve("hash") class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): """SDMX 2.1 MetadataStructureDefinition.""" + report_structure: DictLikeDescriptor[str, ReportStructure] = DictLikeDescriptor() + + #: Association to 1 or more :class:`.MetadataTarget` + target: DictLikeDescriptor[str, MetadataTarget] = DictLikeDescriptor() + +@dataclass class MetadataflowDefinition(common.BaseMetadataflow): """SDMX 2.1 MetadataflowDefinition.""" + # NB narrows the type of common.StructureUsage.structure + structure: Optional[MetadataStructureDefinition] = None + + +# §7.4: Metadata Set + + +@dataclass +class TargetObjectValue: + """SDMX 2.1 TargetObjectValue.""" + + value_for: TargetObject + + +@dataclass +class TargetReportPeriod(TargetObjectValue): + """SDMX 2.1 TargetReportPeriod.""" + + report_period: str + + +@dataclass +class TargetIdentifiableObject(TargetObjectValue): + """SDMX 2.1 TargetIdentifiableObject.""" + + obj: IdentifiableArtefact + + +@dataclass +class TargetObjectKey: + """SDMX 2.1 TargetObjectKey.""" + + key_values: DictLikeDescriptor[str, TargetObjectValue] = DictLikeDescriptor() + + +@dataclass +class ReportedAttribute: + """SDMX 2.1 ReportedAttribute. + + Analogous to :class:`.v30.MetadataAttributeValue`. + """ + + value_for: common.MetadataAttribute + parent: Optional["ReportedAttribute"] = None + child: List["ReportedAttribute"] = field(default_factory=list) + + def __getitem__(self, index: int) -> "ReportedAttribute": + return self.child[index] + + def __len__(self) -> int: + return len(self.child) + + +class EnumeratedAttributeValue(ReportedAttribute): + """SDMX 2.1 EnumeratedAttributeValue. + + Analogous to :class:`.v30.CodedMetadataAttributeValue`. + """ + + value: str + + #: .. note:: + #: + #: The SDMX 2.1 IM (2011-08) gives this as `valueFor`, but this name duplicates + #: :attr:`.ReportedAttribute.value_for`. :mod:`sdmx` uses `value_of` for + #: consistency with :attr:`.v30.CodedMetadataAttributeValue.value_of`. + value_of: common.Code + + +class NonEnumeratedAttributeValue(ReportedAttribute): + """SDMX 2.1 NonEnumeratedAttributeValue.""" + + +class OtherNonEnumeratedAttributeValue(NonEnumeratedAttributeValue): + """SDMX 2.1 OtherNonEnumeratedAttributeValue.""" + + value: str + + +class TextAttributeValue(NonEnumeratedAttributeValue, common.BaseTextAttributeValue): + """SDMX 2.1 TextAttributeValue.""" + + +@dataclass +class XHTMLAttributeValue(NonEnumeratedAttributeValue, common.BaseXHTMLAttributeValue): + """SDMX 2.1 XHTMLAttributeValue.""" + + value: str + + +@dataclass +class MetadataReport: + """SDMX 2.1 MetadataReport.""" + + metadata: List[ReportedAttribute] = field(default_factory=list) + target: Optional[MetadataTarget] = None + attaches_to: Optional[TargetObjectKey] = None + + +@dataclass +class MetadataSet(NameableArtefact, common.BaseMetadataSet): + """SDMX 2.1 MetadataSet. + + .. note:: Contrast :class:`.v30.MetadataSet`, which is a + :class:`.MaintainableArtefact`. + """ + + described_by: Optional[MetadataflowDefinition] = None + # described_by: Optional[ReportStructure] = None + structured_by: Optional[MetadataStructureDefinition] = None + + #: Analogous to :attr:`.v30.MetadataSet.provided_by`. + published_by: Optional[common.DataProvider] = None + + report: List[MetadataReport] = field(default_factory=list) + + +# §8 Hierarchical Code List + + +@dataclass +class Hierarchy(NameableArtefact): + """SDMX 2.1 Hierarchy.""" + + has_formal_levels: bool = False + + #: Hierarchical codes in the hierarchy. + codes: Dict[str, common.HierarchicalCode] = field(default_factory=dict) + + level: Optional[common.Level] = None + + +@dataclass +class HierarchicalCodelist(common.MaintainableArtefact): + """SDMX 2.1 HierarchicalCodelist.""" + + hierarchy: List[Hierarchy] = field(default_factory=list) + + def __repr__(self) -> str: + tmp = super(NameableArtefact, self).__repr__()[:-1] + return f"{tmp}: {len(self.hierarchy)} hierarchies>" + + +# §9: Structure Set and Mappings + + +@dataclass +class ItemAssociation(common.AnnotableArtefact, Generic[IT]): + """SDMX 2.1 ItemAssociation.""" + + _Item: ClassVar[Type[common.Item]] = common.Item + + source: Optional[IT] = None + target: Optional[IT] = None + + +class CodeMap(ItemAssociation[common.Code]): + """SDMX 2.1 CodeMap.""" + + _Item = common.Code + + +IAT = TypeVar("IAT", bound="ItemAssociation") +IST = TypeVar("IST", bound="common.ItemScheme") + + +@dataclass +class ItemSchemeMap(NameableArtefact, Generic[IST, IAT]): + """SDMX 2.1 ItemSchemeMap.""" + + _ItemAssociation: ClassVar[Type[ItemAssociation]] = ItemAssociation + + source: Optional[IST] = None + target: Optional[IST] = None + + item_association: List[IAT] = field(default_factory=list) + + +class CodelistMap(ItemSchemeMap[common.Codelist, CodeMap]): + """SDMX 2.1 CodelistMap.""" + + _ItemAssociation = CodeMap + + +@dataclass +class StructureSet(common.MaintainableArtefact): + """SDMX 2.1 StructureSet.""" + + item_scheme_map: List[ItemSchemeMap] = field(default_factory=list) + CF = common.ClassFinder( __name__, name_map={ "Dataflow": "DataflowDefinition", + "DataStructure": "DataStructureDefinition", + "MetadataStructure": "MetadataStructureDefinition", "Metadataflow": "MetadataflowDefinition", }, parent_map={ + common.HierarchicalCode: Hierarchy, PrimaryMeasure: MeasureDescriptor, + MetadataTarget: MetadataStructureDefinition, }, ) get_class = CF.get_class diff --git a/sdmx/model/v30.py b/sdmx/model/v30.py index a32a16f1b..871cf68b7 100644 --- a/sdmx/model/v30.py +++ b/sdmx/model/v30.py @@ -1,8 +1,9 @@ """SDMX 3.0 Information Model.""" from dataclasses import dataclass, field +from datetime import date from enum import Enum -from typing import Any, ClassVar, List, Optional, Set +from typing import Any, ClassVar, Dict, List, Optional, Set from . import common from .common import ( @@ -13,7 +14,6 @@ ConstrainableArtefact, ConstraintRole, ConstraintRoleType, - Facet, IdentifiableArtefact, MaintainableArtefact, NameableArtefact, @@ -44,11 +44,18 @@ "Dataflow", "Observation", "StructureSpecificDataSet", + "MetadataAttributeDescriptor", + "IdentifiableObjectSelection", "MetadataStructureDefinition", "Metadataflow", - "CodingFormat", - "Level", - "HierarchicalCode", + "MetadataAttributeValue", + "CodedMetadataAttributeValue", + "UncodedMetadataAttributeValue", + "OtherUncodedAttributeValue", + "TextAttributeValue", + "XHTMLAttributeValue", + "TargetIdentifiableObject", + "MetadataSet", "Hierarchy", "HierarchyAssociation", "SelectionValue", @@ -379,58 +386,134 @@ class StructureSpecificDataSet(DataSet): # §7.3 Metadata Structure Definition +class MetadataAttributeDescriptor(common.ComponentList): + """SDMX 3.0 MetadataAttributeDescriptor.""" + + _Component = common.MetadataAttribute + + +class IdentifiableObjectSelection: + """SDMX 3.0 IdentifiableObjectSelection.""" + + +@dataclass +@MaintainableArtefact._preserve("hash") class MetadataStructureDefinition(common.BaseMetadataStructureDefinition): """SDMX 3.0 MetadataStructureDefinition.""" + #: A :class:`MetadataAttributeDescriptor` that describes the attributes of the + #: metadata structure. + #: + #: .. note:: The SDMX 3.0.0 IM (version 1.0 / 2021-10) does not give a name for this + #: association. :mod:`sdmx` uses `attributes` for consistency with + #: :class:`.DataStructureDefinition`. + attributes: MetadataAttributeDescriptor = field( + default_factory=MetadataAttributeDescriptor + ) + class Metadataflow(common.BaseMetadataflow): """SDMX 3.0 MetadataflowDefinition.""" + structure: MetadataStructureDefinition -# §8: Hierarchy + +# §7.4: Metadata Set -class CodingFormat: - """SDMX 3.0 CodingFormat.""" +class MetadataAttributeValue: + """SDMX 3.0 MetadataAttributeValue. - coding_format: Facet + Analogous to :class:`.v21.ReportedAttribute`. + """ + # NB the IM specifies this is a subclass of common.AttributeValue, but the + # implementation in .common has both Coded- and UncodedAttributeValue, which + # offends mypy. + + parent: Optional["MetadataAttributeValue"] = None + child: List["MetadataAttributeValue"] = field(default_factory=list) + + +class CodedMetadataAttributeValue(MetadataAttributeValue): + """SDMX 3.0 CodedMetadataAttributeValue. + + Analogous to :class:`.v21.EnumeratedAttributeValue`. + """ + + value_of: common.Code -@dataclass -class Level(NameableArtefact): - child: Optional["Level"] = None - parent: Optional["Level"] = None - code_format: CodingFormat = field(default_factory=CodingFormat) +class UncodedMetadataAttributeValue(MetadataAttributeValue): + """SDMX 3.0 UncodedMetadataAttributeValue.""" + + pass + + +class OtherUncodedAttributeValue(UncodedMetadataAttributeValue): + """SDMX 3.0 OtherUncodedAttributeValue.""" + + value: str + start_time: date + + +class TextAttributeValue(UncodedMetadataAttributeValue, common.BaseTextAttributeValue): + """SDMX 3.0 TextAttributeValue.""" + + +class XHTMLAttributeValue( + UncodedMetadataAttributeValue, common.BaseXHTMLAttributeValue +): + """SDMX 3.0 XHTMLAttributeValue.""" + + +class TargetIdentifiableObject: + """SDMX 3.0 TargetIdentifiableObject.""" @dataclass -class HierarchicalCode(IdentifiableArtefact): - #: Date from which the construct is valid. +class MetadataSet(MaintainableArtefact, common.BaseMetadataSet): + """SDMX 3.0 MetadataSet. + + .. note:: Contrast :class:`.v21.MetadataSet`, which is a :class:`.NameableArtefact`. + """ + + # NB Would prefer to type as datetime.date, but VersionableArtefact currently uses + # str valid_from: Optional[str] = None - #: Date from which the construct is superseded. + # NB Would prefer to type as datetime.date, but VersionableArtefact currently uses + # str valid_to: Optional[str] = None + set_id: Optional[str] = None + + described_by: Optional[Metadataflow] = None + + # described_by: Optional[MetadataProvisionAgreement] = None + + structured_by: Optional[MetadataAttributeDescriptor] = None - child: List["HierarchicalCode"] = field(default_factory=list) - parent: List["HierarchicalCode"] = field(default_factory=list) + #: Analogous to :attr:`.v21.MetadataSet.published_by`. + provided_by: Optional[MetadataProvider] = None - #: The Code that is used at the specific point in the hierarchy. - code: Optional[Code] = None + attaches_to: List[TargetIdentifiableObject] = field(default_factory=list) - level: Optional[Level] = None + metadata: List[MetadataAttributeValue] = field(default_factory=list) + + +# §8: Hierarchy @dataclass class Hierarchy(MaintainableArtefact): """SDMX 3.0 Hierarchy.""" - has_format_levels: bool = False + has_formal_levels: bool = False #: The top :class:`Level` in the hierarchy. - level: Optional[Level] = None + level: Optional[common.Level] = None #: The top-level :class:`HierarchicalCodes ` in the hierarchy. - codes: List[HierarchicalCode] = field(default_factory=list) + codes: Dict[str, common.HierarchicalCode] = field(default_factory=dict) @dataclass @@ -445,7 +528,13 @@ class HierarchyAssociation(MaintainableArtefact): linked_hierarchy: Optional[Hierarchy] = None -CF = common.ClassFinder(__name__, parent_map={Measure: MeasureDescriptor}) +CF = common.ClassFinder( + __name__, + parent_map={ + Measure: MeasureDescriptor, + common.MetadataAttribute: MetadataAttributeDescriptor, + }, +) get_class = CF.get_class parent_class = CF.parent_class __dir__ = CF.dir diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 27f1aef8c..ec1e34b55 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -36,7 +36,7 @@ from sdmx import message from sdmx.exceptions import XMLParseError # noqa: F401 from sdmx.format import Version, list_media_types -from sdmx.model import common +from sdmx.model import common, v21 from sdmx.model import v21 as model from sdmx.reader.base import BaseReader @@ -231,6 +231,7 @@ def detect(cls, content): def read_message( self, source, + structure: Optional[common.Structure] = None, dsd: Optional[common.BaseDataStructureDefinition] = None, _events=None, ) -> message.Message: @@ -242,8 +243,10 @@ def read_message( # Elements to ignore when parsing finishes self.ignore = set() - # If calling code provided a DSD, add it to a stack, and let it be ignored when - # parsing finishes + # If calling code provided a {Metad,D}ataStructureDefinition, add it to a stack, + # and let it be ignored when parsing finishes + self.push(structure) + self.ignore.add(id(structure)) self.push(dsd) self.ignore.add(id(dsd)) @@ -266,6 +269,8 @@ def read_message( # Retrieve the parsing function for this element & event func = self.parser[element.tag, event] except KeyError: # pragma: no cover + if QName(element.tag).namespace == "http://www.w3.org/1999/xhtml": + continue # Don't know what to do for this (element, event) raise NotImplementedError(element.tag, event) from None @@ -294,7 +299,7 @@ def read_message( # parsing errors # Remove some internal items - self.pop_single("SS without DSD") + self.pop_single("SS without structure") self.pop_single("DataSetClass") # Count only non-ignored items @@ -336,6 +341,46 @@ def decorator(func): return decorator + @classmethod + def possible_reference(cls, cls_hint: Optional[type] = None, unstash: bool = False): + """Decorator for a function where the `elem` parsed may be a Reference. + + Before calling the decorated function, attempt to parse the `elem` as a + :class:`.Reference`. If successful, return the reference instead of calling the + function. If `elem` does not contain a reference, call the decorated function. + + Parameters + ---------- + cls_hint : + Passed to :class:`.Reference`. + unstash : bool, optional + If :data:`True`, call :meth:`.unstash` after successfully resolving a + reference. + """ + + def decorator(func): + def wrapped(reader: "Reader", elem): + try: + # Identify a reference + result = reader.Reference( + reader, + elem, + cls_hint=cls_hint or reader.class_for_tag(elem.tag), + ) + except NotReference: + # Call the wrapped function + result = func(reader, elem) + else: + # Successful; unstash if configured + if unstash: + reader.unstash() + + return result + + return wrapped + + return decorator + # Stack handling def _clean(self): # pragma: no cover @@ -488,7 +533,7 @@ def pop_resolved_ref(self, cls_or_name: Union[Type, str]): """Pop a reference to `cls_or_name` and resolve it.""" return self.resolve(self.pop_single(cls_or_name)) - def reference(self, elem, cls_hint=None): + def reference(self, elem, cls_hint=None) -> Reference: return self.Reference(self, elem, cls_hint=cls_hint) def resolve(self, ref): @@ -638,6 +683,7 @@ def maintainable(self, cls, elem, **kwargs): # Shorthand start = Reader.start end = Reader.end +possible_reference = Reader.possible_reference # Tags to skip entirely start( @@ -646,6 +692,7 @@ def maintainable(self, cls, elem, **kwargs): "gen:ObsDimension gen:ObsValue gen:Value " # Tags that are bare containers for other XML elements """ + :AttributeSet md:AttributeSet str:Categorisations str:CategorySchemes str:Codelists str:Concepts str:ConstraintAttachment str:Constraints str:CustomTypes str:Dataflows str:DataStructureComponents str:DataStructures str:FromVtlSuperSpace @@ -664,8 +711,11 @@ def maintainable(self, cls, elem, **kwargs): @start( - "mes:Error mes:GenericData mes:GenericTimeSeriesData mes:StructureSpecificData " - "mes:StructureSpecificTimeSeriesData" + """ + mes:Error mes:GenericData mes:GenericMetadata mes:GenericTimeSeriesData + mes:StructureSpecificData mes:StructureSpecificMetadata + mes:StructureSpecificTimeSeriesData + """ ) @start("mes:Structure", only=False) def _message(reader: Reader, elem): @@ -675,23 +725,25 @@ def _message(reader: Reader, elem): if getattr(elem.getparent(), "tag", None) == reader.qname("mes", "Header"): return - ss_without_dsd = False + ss_without_structure = False - # With 'dsd' argument, the message should be structure-specific - if ( - "StructureSpecific" in elem.tag - and reader.get_single(common.BaseDataStructureDefinition, subclass=True) is None - ): - log.warning(f"xml.Reader got no dsd=… argument for {QName(elem).localname}") - ss_without_dsd = True - elif "StructureSpecific" not in elem.tag and reader.get_single( - common.BaseDataStructureDefinition - ): - log.info("Use supplied dsd=… argument for non–structure-specific message") + # Retrieve any {Metad,D}ataStructure definition given to Reader.read_message() + supplied_structure = reader.get_single(common.Structure, subclass=True) + + # Handle + qname = QName(elem) + if "StructureSpecific" in elem.tag: + if supplied_structure is None: + log.warning(f"xml.Reader got no structure=… argument for {qname.localname}") + ss_without_structure = True + elif isinstance(supplied_structure, model.MetadataStructureDefinition): + add_mds_events(reader, supplied_structure) + elif supplied_structure: + log.info("Use supplied structure=… argument for non–structure-specific message") # Store values for other methods - reader.push("SS without DSD", ss_without_dsd) - if "Data" in elem.tag: + reader.push("SS without structure", ss_without_structure) + if elem.tag.endswith("Data"): reader.push("DataSetClass", model.get_class(f"{QName(elem).localname}Set")) # Handle namespaces mapped on `elem` but not part of the standard set @@ -745,18 +797,19 @@ def _header_org(reader, elem): @end("mes:Structure", only=False) def _header_structure(reader, elem): - """ within of a DataMessage.""" + """ within of a {Metad,D}ataMessage.""" # The root node of a structure message is handled by _message(), above. if elem.getparent() is None: return - msg = reader.get_single(message.DataMessage) + msg = reader.get_single(message.DataMessage, subclass=True) + assert msg is not None - # Retrieve a DSD supplied to the parser, e.g. for a structure specific message - provided_dsd = reader.get_single(common.BaseDataStructureDefinition, subclass=True) + # Retrieve a structure supplied to the reader, e.g. for a structure specific message + provided_structure = reader.get_single(common.Structure, subclass=True) - # Resolve the child to a DSD, maybe is_external_reference=True - header_dsd = reader.pop_resolved_ref("Structure") + # Resolve the child to an object, maybe is_external_reference=True + header_structure = reader.pop_resolved_ref("Structure") # The header may give either a StructureUsage, or a specific reference to a subclass # like BaseDataflow. Resolve the child, if any, and remove it @@ -770,34 +823,34 @@ def _header_structure(reader, elem): # DSD to use: the provided one; the one referenced by ; or a # candidate constructed using the information contained in `header_su` (if any) - dsd = provided_dsd or ( + structure = provided_structure or ( reader.maintainable( - reader.model.DataStructureDefinition, + msg.structure_type, None, id=header_su.id, maintainer=header_su.maintainer, version=header_su.version, # NB this may not always be the case ) if header_su - else header_dsd + else header_structure ) - if header_dsd and header_su: + if header_structure and header_su: # Ensure the constructed candidate and the one given directly are equivalent - assert header_dsd == dsd - elif header_su and not provided_dsd: - reader.push(dsd) - elif dsd is None: + assert header_structure == structure + elif header_su and not provided_structure: + reader.push(structure) + elif structure is None: # pragma: no cover raise RuntimeError # Store on the data flow - msg.dataflow.structure = dsd + msg.dataflow.structure = structure # Store under the structure ID, so it can be looked up by that ID - reader.push(elem.attrib["structureID"], dsd) + reader.push(elem.attrib["structureID"], structure) - # Store as an object that won't cause a parsing error if it is left over - reader.ignore.add(id(dsd)) + # Store as objects that won't cause a parsing error if it is left over + reader.ignore.update({id(structure), id(header_structure)}) try: # Information about the 'dimension at observation level' @@ -809,12 +862,12 @@ def _header_structure(reader, elem): if dim_at_obs == "AllDimensions": # Use a singleton object dim = model.AllDimensions - elif provided_dsd: + elif provided_structure: # Use existing dimension from the provided DSD - dim = dsd.dimensions.get(dim_at_obs) + dim = structure.dimensions.get(dim_at_obs) else: # Force creation of the 'dimension at observation' level - dim = dsd.dimensions.getdefault( + dim = structure.dimensions.getdefault( dim_at_obs, cls=( model.TimeDimension @@ -876,11 +929,11 @@ def _structures(reader, elem): @end( """ - com:AnnotationTitle com:AnnotationType com:AnnotationURL com:None com:URN - com:Value mes:DataSetAction mes:DataSetID mes:Email mes:ID mes:Test mes:Timezone - str:DataType str:Email str:Expression str:NullValue str:OperatorDefinition - str:PersonalisedName str:Result str:RulesetDefinition str:Telephone str:URI - str:VtlDefaultName str:VtlScalarType + com:AnnotationTitle com:AnnotationType com:AnnotationURL com:None com:URN com:Value + mes:DataSetAction :ReportPeriod md:ReportPeriod mes:DataSetID mes:Email mes:ID + mes:Test mes:Timezone str:CodelistAliasRef str:DataType str:Email str:Expression + str:NullValue str:OperatorDefinition str:PersonalisedName str:Result + str:RulesetDefinition str:Telephone str:URI str:VtlDefaultName str:VtlScalarType """ ) def _text(reader, elem): @@ -888,6 +941,12 @@ def _text(reader, elem): reader.push(elem, elem.text or NoText) +@start("com:StructuredText") +def _st(reader, elem): + """Contained XHTML.""" + reader.push(elem, etree.tostring(elem[0], pretty_print=True)) + + @end("mes:Extracted mes:Prepared mes:ReportingBegin mes:ReportingEnd") def _datetime(reader, elem): text, n = re.subn(r"(.*\.)(\d{6})\d+(\+.*)", r"\1\2\3", elem.text) @@ -898,8 +957,10 @@ def _datetime(reader, elem): @end( - "com:AnnotationText com:Name com:Description com:Text mes:Source mes:Department " - "mes:Role str:Department str:Role" + """ + com:AnnotationText com:Name com:Description com:Text mes:Source mes:Department + mes:Role str:Department str:Role + """ ) def _localization(reader, elem): reader.push( @@ -910,16 +971,28 @@ def _localization(reader, elem): @end( """ - com:Structure com:StructureUsage str:AttachmentGroup str:ConceptIdentity - str:ConceptRole str:DimensionReference str:Parent str:Source str:Structure - str:StructureUsage str:Target str:Enumeration + com:Structure com:StructureUsage :ObjectReference md:ObjectReference + str:AttachmentGroup str:CodeID str:ConceptIdentity str:ConceptRole + str:DimensionReference str:Enumeration str:Parent str:Source str:Structure + str:StructureUsage str:Target """ ) def _ref(reader: Reader, elem): - cls_hint = None - if QName(elem).localname in ("Parent", "Target"): + cls_hint = reader.peek("ItemAssociation class") or None + + if not cls_hint and QName(elem).localname in ("CodeID", "Parent", "Target"): # Use the *grand*-parent of the or for a class hint cls_hint = reader.class_for_tag(elem.getparent().tag) + elif not cls_hint and QName(elem).localname == "Structure": + # /: use message property for a class hint + msg = reader.get_single(message.DataMessage, subclass=True) + if msg: + cls_hint = cast(Type[message.DataMessage], type(msg))( + version=reader.xml_version + ).structure_type + elif QName(elem.getparent()).localname == "Dataflow": + # In a StructureMessage + cls_hint = reader.model.DataStructureDefinition reader.push(QName(elem).localname, reader.reference(elem, cls_hint)) @@ -973,17 +1046,10 @@ def _item_start(reader, elem): """, only=False, ) +# is a reference, e.g. in +# Restore "Name" and "Description" that may have been stashed by _item_start +@possible_reference(unstash=True) def _item_end(reader: Reader, elem): - try: - # may be a reference, e.g. in - item = reader.reference(elem, cls_hint=reader.class_for_tag(elem.tag)) - except NotReference: - pass - else: - # Restore "Name" and "Description" that may have been stashed by _item_start - reader.unstash() - return item - cls = reader.class_for_tag(elem.tag) item = reader.nameable(cls, elem) @@ -1019,13 +1085,8 @@ def _item_end(reader: Reader, elem): str:VtlMappingScheme """ ) +@possible_reference() # in def _itemscheme(reader: Reader, elem): - try: - # may be a reference, e.g. in - return reader.reference(elem, cls_hint=reader.class_for_tag(elem.tag)) - except NotReference: - pass - cls: Type[common.ItemScheme] = reader.class_for_tag(elem.tag) try: @@ -1066,7 +1127,12 @@ def _facet(reader, elem): # in XML, first letter is uppercase; in the spec and Python enum, lowercase. SDMX-ML # default is "String". tt = args.pop("text_type", "String") - fvt = model.FacetValueType[f"{tt[0].lower()}{tt[1:]}"] + try: + fvt = model.FacetValueType[f"{tt[0].lower()}{tt[1:]}"] + except KeyError: + # ExtendedFacetValueType instead. Convert case of the value: in XML, the string + # is "XHTML", upper case; in the spec and Python enum, "Xhtml", title case. + fvt = model.ExtendedFacetValueType[f"{tt[0]}{tt[1:].lower()}"] # NB Erratum: "isMultiLingual" appears in XSD schemas ("The isMultiLingual attribute # indicates for a text format of type 'string', whether the value should allow @@ -1102,18 +1168,21 @@ def _concept(reader, elem): # §3.3: Basic Inheritance +COMPONENT = """ + str:Attribute str:Dimension str:GroupDimension str:IdentifiableObjectTarget + str:KeyDescriptorValuesTarget str:MeasureDimension str:MetadataAttribute + str:PrimaryMeasure str:ReportPeriodTarget str:TimeDimension + """ -@end( - "str:Attribute str:Dimension str:GroupDimension str:MeasureDimension " - "str:PrimaryMeasure str:TimeDimension" -) -def _component(reader: Reader, elem): - try: - # May be a reference - return reader.reference(elem) - except NotReference: - pass +@start(COMPONENT, only=False) +def _component_start(reader: Reader, elem): + reader.stash(reader.class_for_tag(elem.tag)) + + +@end(COMPONENT, only=False) +@possible_reference(unstash=True) +def _component_end(reader: Reader, elem): # Object class: {,Measure,Time}Dimension or DataAttribute cls = reader.class_for_tag(elem.tag) @@ -1141,6 +1210,12 @@ def _component(reader: Reader, elem): assert len(ar) == 1, ar args["related_to"] = ar[0] + # MetadataAttribute.child only + if children := reader.pop_all(cls): + args["child"] = children + + reader.unstash() + # SDMX 2.1 spec §3A, part III, p.140: “The id attribute holds an explicit # identification of the component. If this identifier is not supplied, then it is # assumed to be the same as the identifier of the concept referenced from the @@ -1154,35 +1229,43 @@ def _component(reader: Reader, elem): return reader.identifiable(cls, elem, **args) -@end("str:AttributeList str:DimensionList str:Group str:MeasureList") +@end( + """ + str:AttributeList str:DimensionList str:Group str:MetadataTarget str:MeasureList + str:ReportStructure + """ +) +@possible_reference(cls_hint=model.GroupDimensionDescriptor) # def _cl(reader: Reader, elem): - try: - # may be a reference - return reader.reference(elem, cls_hint=model.GroupDimensionDescriptor) - except NotReference: - pass - - # Retrieve the DSD - dsd = reader.peek("current DSD") + # Retrieve the DSD (or MSD) + dsd: common.Structure = reader.peek("current DSD") assert dsd is not None - # Retrieve the components - args = dict(components=reader.pop_all(model.Component, subclass=True)) - # Determine the class - localname = QName(elem).localname - if localname == "Group": - cls: Type = model.GroupDimensionDescriptor + cls = reader.class_for_tag(elem.tag) + + args = dict( + # Retrieve the components + components=reader.pop_all(model.Component, subclass=True), + # SDMX-ML spec for, e.g. DimensionList: "The id attribute is provided in this + # case for completeness. However, its value is fixed to 'DimensionDescriptor'." + id=elem.attrib.get("id", cls.__name__), + ) + if cls is common.GroupDimensionDescriptor: + assert isinstance(dsd, common.BaseDataStructureDefinition) # Replace components with references args["components"] = [ dsd.dimensions.get(ref.target_id) for ref in reader.pop_all("DimensionReference") ] + elif cls is v21.ReportStructure: + assert isinstance(dsd, v21.MetadataStructureDefinition) + # Assemble MetadataTarget references for the `report_for` field + args["report_for"] = list() + for target_ref in reader.pop_all(reader.Reference): + args["report_for"].append(dsd.target[target_ref.id]) else: - # SDMX-ML spec for, e.g. DimensionList: "The id attribute is provided in this - # case for completeness. However, its value is fixed to 'DimensionDescriptor'." - cls = reader.class_for_tag(elem.tag) args["id"] = elem.attrib.get("id", cls.__name__) cl = reader.identifiable(cls, elem, **args) @@ -1196,16 +1279,7 @@ def _cl(reader: Reader, elem): # Assign to the DSD eagerly (instead of in _dsd_end()) for reference by next # ComponentList e.g. so that AttributeRelationship can reference the # DimensionDescriptor - attr = { - common.DimensionDescriptor: "dimensions", - common.AttributeDescriptor: "attributes", - reader.model.MeasureDescriptor: "measures", - common.GroupDimensionDescriptor: "group_dimensions", - }[cl.__class__] - if attr == "group_dimensions": - getattr(dsd, attr)[cl.id] = cl - else: - setattr(dsd, attr, cl) + dsd.replace_grouping(cl) # §4.5: Category Scheme @@ -1294,31 +1368,48 @@ def _tr(reader, elem): def _ms_component(reader, elem, kind): """Identify the Component for a ValueSelection.""" - try: - # Navigate from the current ContentConstraint to a ConstrainableArtefact - cc_content = reader.stack[reader.Reference] - assert len(cc_content) == 1, (cc_content, reader.stack, elem.attrib) - obj = reader.resolve(next(iter(cc_content.values()))) - - if isinstance(obj, model.DataflowDefinition): - # The constrained DFD has a corresponding DSD, which has a Dimension- or - # AttributeDescriptor - cl = getattr(obj.structure, kind[0]) - elif isinstance(obj, model.DataStructureDefinition): - # The DSD is constrained directly - cl = getattr(obj, kind[0]) - else: - log.warning(f"Not implemented: constraints attached to {type(obj)}") - cl = None + # Navigate from the current ContentConstraint to a ConstrainableArtefact + cc_content = reader.stack[reader.Reference] + if len(cc_content) > 1: + log.info( + f"Resolve reference to <{kind[1].__name__} {elem.attrib['id']}> using first" + f" of {len(cc_content)} constrained objects" + ) + obj = reader.resolve(next(iter(cc_content.values()))) + + if isinstance(obj, model.DataflowDefinition): + # The constrained DFD has a corresponding DSD, which has a Dimension- or + # AttributeDescriptor + dsd = obj.structure + elif isinstance(obj, model.DataStructureDefinition): + # The DSD is constrained directly + dsd = obj + else: + log.warning(f"Not implemented: constraints attached to {type(obj)}") + dsd = None - # Get the Component - return cl, cl.get(elem.attrib["id"]) + try: + # Get the component list + cl = getattr(dsd, kind[0]) except AttributeError: # Failed because the ContentConstraint is attached to something, e.g. # DataProvider, that does not provide an association to a DSD. Try to get a # Component from the current scope with matching ID. return None, reader.get_single(kind[1], id=elem.attrib["id"], subclass=True) + # Get the Component + try: + c = cl.get(elem.attrib["id"]) + except KeyError: + if dsd.is_external_reference: + # No component with the given ID exists, but the DSD is an external + # reference → create the component automatically + c = cl.getdefault(elem.attrib["id"]) + else: + raise + + return cl, c + def _ms_agency_id(elem): """Return the MemberSelection → CubeRegion → ContentConstraint → agencyID.""" @@ -1359,7 +1450,7 @@ def _ms(reader, elem): # Convert to SelectionValue mvs = reader.pop_all("Value") - trv = reader.pop_all(model.TimeRangeValue) + trv = reader.pop_all(model.TimeRangeValue, subclass=True) if mvs: arg["values"] = list(map(lambda v: model.MemberValue(value=v), mvs)) elif trv: @@ -1459,44 +1550,35 @@ def _ar(reader, elem): return common.GroupRelationship(**args) -@start("str:DataStructure", only=False) -def _dsd_start(reader: Reader, elem): - try: - # may be a reference, e.g. in - return reader.reference(elem) - except NotReference: - pass - - # Get any external reference created earlier, or instantiate a new object. - dsd = reader.maintainable(reader.model.DataStructureDefinition, elem) +@start("str:DataStructure str:MetadataStructure", only=False) +@possible_reference() # in +def _structure_start(reader: Reader, elem): + # Get any external reference created earlier, or instantiate a new object + cls = reader.class_for_tag(elem.tag) + obj = reader.maintainable(cls, elem) - if dsd not in reader.stack[reader.model.DataStructureDefinition]: + if obj not in reader.stack[cls]: # A new object was created - reader.push(dsd) + reader.push(obj) # Store a separate reference to the current DSD - reader.push("current DSD", dsd) + reader.push("current DSD", obj) -@end("str:DataStructure", only=False) -def _dsd_end(reader, elem): - dsd = reader.pop_single("current DSD") +@end("str:DataStructure str:MetadataStructure", only=False) +def _structure_end(reader, elem): + obj = reader.pop_single("current DSD") - if dsd: + if obj: # Collect annotations, name, and description - dsd.annotations = list(reader.pop_all(model.Annotation)) - add_localizations(dsd.name, reader.pop_all("Name")) - add_localizations(dsd.description, reader.pop_all("Description")) + obj.annotations = list(reader.pop_all(model.Annotation)) + add_localizations(obj.name, reader.pop_all("Name")) + add_localizations(obj.description, reader.pop_all("Description")) @end("str:Dataflow str:Metadataflow") +@possible_reference() # in def _dfd(reader: Reader, elem): - try: - # may be a reference, e.g. in - return reader.reference(elem) - except NotReference: - pass - structure = reader.pop_resolved_ref("Structure") if structure is None: log.warning( @@ -1550,7 +1632,7 @@ def _series_ss(reader, elem): ds.add_obs( reader.pop_all(model.Observation), ds.structured_by.make_key( - model.SeriesKey, elem.attrib, extend=reader.peek("SS without DSD") + model.SeriesKey, elem.attrib, extend=reader.peek("SS without structure") ), ) @@ -1574,7 +1656,7 @@ def _group_ss(reader, elem): group_id = attrib.pop(reader.qname("xsi", "type"), None) gk = ds.structured_by.make_key( - model.GroupKey, attrib, extend=reader.peek("SS without DSD") + model.GroupKey, attrib, extend=reader.peek("SS without structure") ) if group_id: @@ -1586,7 +1668,7 @@ def _group_ss(reader, elem): try: gk.described_by = ds.structured_by.group_dimensions[group_id] except KeyError: - if not reader.peek("SS without DSD"): + if not reader.peek("SS without structure"): raise ds.group[gk] = [] @@ -1620,7 +1702,7 @@ def _obs(reader, elem): @end(":Obs") def _obs_ss(reader, elem): # True if the user failed to provide a DSD to use in parsing structure-specific data - extend = reader.peek("SS without DSD") + extend = reader.peek("SS without structure") # Retrieve the PrimaryMeasure from the DSD for the current data set dsd = reader.get_single("DataSet").structured_by @@ -1707,23 +1789,288 @@ def _ds_end(reader, elem): # §7.3: Metadata Structure Definition +# §7.4: Metadata Set -@end("str:MetadataTarget") -def _mdt(reader: Reader, elem): # pragma: no cover - raise NotImplementedError +@start("mes:MetadataSet", only=False) +def _mds_start(reader, elem): + # Create an instance of a MetadataSet + mds = reader.class_for_tag(elem.tag)() + + # Retrieve the (message-local) ID referencing a data structure definition + id = elem.attrib.get("structureRef", None) or elem.attrib.get( + reader.qname("metadata:structureRef"), None + ) + + # Get a reference to the MSD that structures the data set + # Provided in the / + msd = reader.get_single(id) + if not msd: # pragma: no cover + # Fall back to a MSD provided as an argument to read_message() + msd = reader.get_single(common.BaseMetadataStructureDefinition, subclass=True) + + if not msd: + raise RuntimeError("No MSD when creating DataSet") + + log.debug( + f'Use provided {msd!r} for structureRef="{id}" not defined in message' + ) + + mds.structured_by = msd + + reader.push("MetadataSet", mds) + + +@end("mes:MetadataSet", only=False) +def _mds_end(reader, elem): + mds = reader.pop_single("MetadataSet") + + # Collect the contained MetadataReports + mds.report.extend(reader.pop_all(v21.MetadataReport)) + + # Add the data set to the message + reader.get_single(message.MetadataMessage).data.append(mds) -@end("str:MetadataStructure") -def _msd(reader: Reader, elem): # pragma: no cover - cls = reader.class_for_tag(elem) - log.warning(f"Not parsed: {elem.tag} -> {cls}") - return NotImplemented + +@end(":Report md:Report") +def _md_report(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + obj = cls( + attaches_to=reader.pop_single(model.TargetObjectKey), + metadata=reader.pop_all(model.ReportedAttribute, subclass=True), + ) + return obj + + +@end(":Target md:Target") +def _tov(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + obj = cls( + key_values={ + v.value_for: v for v in reader.pop_all(v21.TargetObjectValue, subclass=True) + } + ) + return obj + + +@end(":ReferenceValue md:ReferenceValue") +def _rv(reader: Reader, elem): + cls = reader.class_for_tag(elem[0].tag) + + mds = reader.get_single(common.BaseMetadataStructureDefinition, subclass=True) + + # TODO resolve the TargetObject + del mds + + if QName(elem).namespace is None: + # Structure-specific: the TargetObject ID is stored in the "xsi:type" attribute + # as the last part of a value like "esms:CATEGORY_TARGET.ReportPeriodTarget" + args = dict(value_for=elem.attrib[reader.qname("xsi", "type")].split(".")[-1]) + else: + args = dict(value_for=elem.attrib["id"]) + + if cls is v21.TargetReportPeriod: + args["report_period"] = reader.pop_single("ReportPeriod") + elif cls is model.TargetIdentifiableObject: + args["obj"] = reader.pop_single("ObjectReference") + + obj = cls(**args) + + return obj + + +def add_mds_events(reader: Reader, mds: model.MetadataStructureDefinition): + """Add parser events for structure-specific metadata.""" + + # TODO these persist after reading a particular message; avoid this + def _add_events_for_ma(ma: model.MetadataAttribute): + reader.start(f":{ma.id}", only=False)(_ra_start) + reader.end(f":{ma.id}", only=False)(_ra_end) + for child in ma.child: + _add_events_for_ma(child) + + for rs in mds.report_structure.values(): + for ma in rs.components: + _add_events_for_ma(ma) + + +@start("md:ReportedAttribute", only=False) +def _ra_start(reader: Reader, elem): + # Avoid collecting previous/sibling ReportedAttribute as children of this one + reader.stash(model.ReportedAttribute) + + +@end("md:ReportedAttribute", only=False) +def _ra_end(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + if cls is None: + cls = reader.class_for_tag("md:ReportedAttribute") + value_for = elem.tag + else: + value_for = elem.attrib["id"] + + # Pop all child elements + args = dict(child=reader.pop_all(cls, subclass=True), value_for=value_for) + + xhtml = reader.pop_single("StructuredText") + if xhtml: + cls = v21.XHTMLAttributeValue + args["value"] = xhtml + + obj = cls(**args) + + reader.unstash() + return obj + + +# §8: Hierarchical Code List + + +@end("str:HierarchicalCode") +def _hc(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + code = reader.resolve(reader.pop_single(reader.Reference)) + + if code is None: + # Retrieve and resolve the reference to the Codelist + cl_alias = reader.pop_single("CodelistAliasRef") + cl_ref = reader.peek("CodelistAlias")[cl_alias] + cl = reader.resolve(cl_ref) + + # Manually resolve the CodeID + code_id = reader.pop_single("CodeID").id + try: + code = cl[code_id] + except KeyError: + if cl.is_external_reference: + code = cl.setdefault(id=code_id) + else: # pragma: no cover + raise + + # Create the HierarchicalCode + obj = reader.identifiable(cls, elem, code=code) + + # Count children represented as XML sub-elements of the parent + n_child = sum(e.tag == elem.tag for e in elem) + # Collect this many children and append them to `obj` + obj.child.extend(reversed([reader.pop_single(cls) for i in range(n_child)])) + + return obj + + +@end("str:Level") +def _l(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + + return reader.nameable(cls, elem, child=reader.pop_single(cls)) + + +@end("str:Hierarchy") +def _h(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + return reader.nameable( + cls, + elem, + has_formal_levels=eval(elem.attrib["leveled"].title()), + codes={c.id: c for c in reader.pop_all(model.HierarchicalCode)}, + level=reader.pop_single(common.Level), + ) + + +@end("str:IncludedCodelist") +def _icl(reader: Reader, elem): + obj = reader.reference(elem, common.Codelist) + + if reader.peek("CodelistAlias") is None: + reader.push("CodelistAlias", dict()) + reader.peek("CodelistAlias")[elem.attrib["alias"]] = obj + + return None + + +@end("str:HierarchicalCodelist") +def _hcl(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + reader.pop_all("CodelistAlias") + return reader.maintainable(cls, elem, hierarchy=reader.pop_all(model.Hierarchy)) + + +# §9: Structure Set and Mappings + + +@start("str:CodelistMap", only=False) +def _ismap_start(reader: Reader, elem): + cls: Type[model.ItemSchemeMap] = reader.class_for_tag(elem.tag) + # Push class for reference while parsing sub-elements + reader.push("ItemAssociation class", cls._ItemAssociation._Item) + + +@end("str:CodelistMap", only=False) +def _ismap_end(reader: Reader, elem): + cls: Type[model.ItemSchemeMap] = reader.class_for_tag(elem.tag) + + # Remove class from stacks + reader.pop_single("ItemAssociation class") + + # Retrieve the source and target ItemSchemes + source: model.ItemScheme = reader.pop_resolved_ref("Source") + target: model.ItemScheme = reader.pop_resolved_ref("Target") + + # Iterate over the ItemAssociation instances + ia_all = list() + for ia in reader.pop_all(cls._ItemAssociation): + for name, scheme in ("source", source), ("target", target): + # ia.source is a Reference; retrieve its ID + id_ = getattr(ia, name).id + try: + # Use the ID to look up an Item in the ItemScheme + item = scheme[id_] + except KeyError: + if scheme.is_external_reference: + # Externally-referenced ItemScheme → create the Item + item = scheme.setdefault(id=id_) + else: # pragma: no cover + raise + setattr(ia, name, item) + + ia_all.append(ia) + + return reader.nameable( + cls, elem, source=source, target=target, item_association=ia_all + ) + + +@end("str:CodeMap") +def _item_map(reader: Reader, elem): + cls: Type[model.ItemAssociation] = reader.class_for_tag(elem.tag) + + # Store Source and Target as Reference instances + return reader.annotable( + cls, + elem, + source=reader.pop_single("Source"), + target=reader.pop_single("Target"), + ) + + +@end("str:StructureSet") +def _ss(reader: Reader, elem): + return reader.maintainable( + model.StructureSet, + elem, + # Collect all ItemSchemeMaps + item_scheme_map=reader.pop_all(model.ItemSchemeMap, subclass=True), + ) # §11: Data Provisioning @end("str:ProvisionAgreement") +@possible_reference() # in def _pa(reader, elem): return reader.maintainable( model.ProvisionAgreement, diff --git a/sdmx/reader/xml/v30.py b/sdmx/reader/xml/v30.py index 15a1441af..889952899 100644 --- a/sdmx/reader/xml/v30.py +++ b/sdmx/reader/xml/v30.py @@ -58,9 +58,9 @@ class Reader(v21.Reader): start( """ str:AgencySchemes str:ConceptSchemes str:CustomTypeSchemes str:DataConstraints - str:GeographicCodelists str:GeoGridCodelists str:NamePersonalisationSchemes - str:RulesetSchemes str:TransformationSchemes str:UserDefinedOperatorSchemes - str:ValueLists str:VtlMappingSchemes + str:GeographicCodelists str:GeoGridCodelists str:Hierarchies + str:NamePersonalisationSchemes str:RulesetSchemes str:TransformationSchemes + str:UserDefinedOperatorSchemes str:ValueLists str:VtlMappingSchemes """ )(None) @@ -71,7 +71,9 @@ class Reader(v21.Reader): v21._item_start ) end("str:GeoFeatureSetCode str:GeoGridCode str:ValueItem", only=False)(v21._item_end) -end("str:Measure")(v21._component) +start("str:Measure str:MetadataAttribute", only=False)(v21._component_start) +end("str:Measure str:MetadataAttribute", only=False)(v21._component_end) +end("str:MetadataAttributeList")(v21._cl) end("str:DataConstraint")(v21._cc) end("str:KeyValue")(v21._ms) end("str:Observation")(v21._ar_kind) @@ -183,3 +185,18 @@ def _complex(reader: Reader, elem): reader.stack["Attributes"][-1][da.id] = model.AttributeValue( value=reader.pop_all("ComplexValue"), value_for=da ) + + +# §8: Hierarchy + + +@end("str:Hierarchy") +def _h(reader: Reader, elem): + cls = reader.class_for_tag(elem.tag) + return reader.maintainable( + cls, + elem, + has_formal_levels=eval(elem.attrib["hasFormalLevels"].title()), + codes={c.id: c for c in reader.pop_all(model.HierarchicalCode)}, + level=reader.pop_single(common.Level), + ) diff --git a/sdmx/rest.py b/sdmx/rest.py index 62440e6d7..660f9b62c 100644 --- a/sdmx/rest.py +++ b/sdmx/rest.py @@ -48,30 +48,30 @@ class Resource(str, Enum): ``codelist`` :class:`.Codelist` ``conceptscheme`` :class:`.ConceptScheme` ``contentconstraint`` :class:`.ContentConstraint` + ``customtypescheme`` :class:`.CustomTypeScheme`. ``data`` :class:`.DataSet` ``dataflow`` :class:`Dataflow(Definition) <.BaseDataflow>` ``dataconsumerscheme`` :class:`.DataConsumerScheme` ``dataproviderscheme`` :class:`.DataProviderScheme` ``datastructure`` :class:`DataStructureDefinition <.BaseDataStructureDefinition>` + ``hierarchicalcodelist`` :class:`.v21.HierarchicalCodelist`. + ``metadata`` :class:`MetadataSet <.BaseMetadataSet>`. ``metadataflow`` :class:`Metadataflow(Definition) <.Metadataflow>` ``metadatastructure`` :class:`MetadataStructureDefinition <.BaseMetadataStructureDefinition>` + ``namepersonalisationscheme`` :class:`.NamePersonalisationScheme`. ``organisationscheme`` :class:`.OrganisationScheme` ``provisionagreement`` :class:`.ProvisionAgreement` + ``rulesetscheme`` :class:`.RulesetScheme`. ``structure`` Mixed. + ``structureset`` :class:`.StructureSet`. + ``transformationscheme`` :class:`.TransformationScheme`. + ``userdefinedoperatorscheme`` :class:`.UserdefinedoperatorScheme`. + ``vtlmappingscheme`` :class:`.VTLMappingScheme`. ----------------------------- ------------------------------------------------------ - ``customtypescheme`` Not implemented. - ``hierarchicalcodelist`` Not implemented. - ``metadata`` Not implemented. - ``namepersonalisationscheme`` Not implemented. ``organisationunitscheme`` Not implemented. ``process`` Not implemented. ``reportingtaxonomy`` Not implemented. - ``rulesetscheme`` Not implemented. ``schema`` Not implemented. - ``structureset`` Not implemented. - ``transformationscheme`` Not implemented. - ``userdefinedoperatorscheme`` Not implemented. - ``vtlmappingscheme`` Not implemented. ============================= ====================================================== """ # noqa: E501 @@ -130,7 +130,7 @@ def describe(cls): @dataclass class URL: - """Utility class to build SDMX REST URLs. + """Utility class to build SDMX 2.1 REST web service URLs. See also -------- diff --git a/sdmx/source/__init__.py b/sdmx/source/__init__.py index ac2ee0798..50048579f 100644 --- a/sdmx/source/__init__.py +++ b/sdmx/source/__init__.py @@ -21,10 +21,10 @@ #: endpoints that are described in the standards but are not implemented by any source #: currently in :file:`sources.json`; these all return 404. SDMX_ML_SUPPORTS = { - Resource.data: True, Resource.attachementconstraint: False, Resource.customtypescheme: False, - Resource.metadata: False, + Resource.data: True, + Resource.metadata: True, Resource.namepersonalisationscheme: False, Resource.organisationunitscheme: False, Resource.process: False, diff --git a/sdmx/sources.json b/sdmx/sources.json index 1eb2188e9..7fbf83823 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -69,7 +69,17 @@ { "id": "COMP", "name": "European Commission Directorate General for Competition", - "url": "https://webgate.ec.europa.eu/comp/redisstat/api/dissemination/sdmx/2.1" + "url": "https://webgate.ec.europa.eu/comp/redisstat/api/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "structure": false, + "structureset": false + } }, { "id": "ECB", @@ -90,7 +100,17 @@ { "id": "EMPL", "name": "European Commission Directorate General for Employment, Social Affairs, and Inclusion", - "url": "https://webgate.ec.europa.eu/empl/redisstat/api/dissemination/sdmx/2.1" + "url": "https://webgate.ec.europa.eu/empl/redisstat/api/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "structure": false, + "structureset": false + } }, { "id": "ESTAT", @@ -115,12 +135,32 @@ { "id": "ESTAT_COMEXT", "name": "Eurostat (Comext and Prodcom datasets)", - "url": "https://ec.europa.eu/eurostat/api/comext/dissemination/sdmx/2.1" + "url": "https://ec.europa.eu/eurostat/api/comext/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "structure": false, + "structureset": false + } }, { "id": "GROW", "name": "European Commission Directorate General for Internal Market, Industry, Entrepreneurship and SMEs", - "url": "https://webgate.ec.europa.eu/grow/redisstat/api/dissemination/sdmx/2.1" + "url": "https://webgate.ec.europa.eu/grow/redisstat/api/dissemination/sdmx/2.1", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "contentconstraint": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "structure": false, + "structureset": false + } }, { "id": "ILO", @@ -148,7 +188,6 @@ "name": "International Monetary Fund", "supports": { "actualconstraint": false, - "allowedconstraint": false, "hierarchicalcodelist": false, "metadataflow": false, "metadatastructure": false, @@ -288,8 +327,8 @@ "name": "SDMX Global Registry", "supports": { "actualconstraint": false, - "allowedconstraint": false, - "metadataflow": false + "metadataflow": false, + "structureset": false } }, { @@ -299,7 +338,6 @@ "supports": { "dataconsumerscheme": false, "dataproviderscheme": false, - "hierarchicalcodelist": false, "metadataflow": false, "provisionagreement": false, "structureset": false @@ -361,6 +399,7 @@ "metadataflow": false, "metadatastructure": false, "provisionagreement": false, + "structure": false, "structureset": false, "preview": true } diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index e60eab714..c70b72ba2 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -259,22 +259,40 @@ def __init__(self, base_path): self.specimens.extend( (base_path.joinpath(*parts), "xml", "structure") for parts in [ + ("BIS", "actualconstraint-0.xml"), + ("BIS", "hierarchicalcodelist-0.xml"), ("ECB", "orgscheme.xml"), + ("ECB", "structureset-0.xml"), ("ESTAT", "apro_mk_cola-structure.xml"), ("ESTAT", "GOV_10Q_GGNFA.xml"), + ("ESTAT", "HCL_WSTATUS_SCL_BNSPART.xml"), + ("ESTAT", "HCL_WSTATUS_SCL_WSTATUSPR.xml"), + ("IAEG-SDGs", "metadatastructure-0.xml"), ("IMF", "1PI-structure.xml"), ("IMF", "CL_AREA-structure.xml"), # Manually reduced subset of the response for this DSD. Test for # containing both and ("IMF", "ECOFIN_DSD-structure.xml"), + ("IMF", "hierarchicalcodelist-0.xml"), + ("IMF", "structureset-0.xml"), ("INSEE", "CNA-2010-CONSO-SI-A17-structure.xml"), ("INSEE", "dataflow.xml"), ("INSEE", "IPI-2010-A21-structure.xml"), ("ISTAT", "22_289-structure.xml"), ("ISTAT", "47_850-structure.xml"), + ("ISTAT", "actualconstraint-0.xml"), + ("ISTAT", "metadataflow-0.xml"), + ("ISTAT", "metadatastructure-0.xml"), + ("OECD", "actualconstraint-0.xml"), + ("OECD", "metadatastructure-0.xml"), ("UNICEF", "GLOBAL_DATAFLOW-structure.xml"), ("UNSD", "codelist_partial.xml"), + ("SDMX", "HCL_TEST_AREA.xml"), ("SGR", "common-structure.xml"), + ("SGR", "hierarchicalcodelist-0.xml"), + ("SGR", "metadatastructure-0.xml"), + ("SPC", "actualconstraint-0.xml"), + ("SPC", "metadatastructure-0.xml"), ("TEST", "gh-142.xml"), ("TEST", "gh-149.xml"), ] diff --git a/sdmx/tests/model/test_common.py b/sdmx/tests/model/test_common.py index f5210fa64..e369a3ea6 100644 --- a/sdmx/tests/model/test_common.py +++ b/sdmx/tests/model/test_common.py @@ -3,6 +3,8 @@ import pytest +import sdmx.model as model +from sdmx.model import v21 from sdmx.model.common import ( Agency, AnnotableArtefact, @@ -15,7 +17,6 @@ NameableArtefact, Representation, ) -from sdmx.model.v21 import AttributeDescriptor, DataStructureDefinition class TestAnnotation: @@ -107,7 +108,7 @@ def test_general(self): assert hash(ia) == hash("foo") # Subclass is hashable - ad = AttributeDescriptor() + ad = model.AttributeDescriptor() assert hash(ad) == id(ad) def test_hash_subclass(self): @@ -131,7 +132,7 @@ def test_sort(self): "'DataStructureDefinition'" ), ): - sorted([DataStructureDefinition(id="c")] + items) + sorted([v21.DataStructureDefinition(id="c")] + items) class TestNameableArtefact: @@ -347,3 +348,25 @@ def test_init(self): c2.name = "foo" assert repr(c1) != repr(c2) + + +class TestStructure: + @pytest.fixture + def obj(self): + # Use BaseDataStructureDefinition as a concrete/less abstract subclass + return model.BaseDataStructureDefinition() + + def test_grouping(self, obj) -> None: + result = obj.grouping + # Class has an AttributeDescriptor, DimensionDescriptor, and dict-like of + # GroupDimensionDescriptor + assert 3 == len(result) + + def test_replace_grouping(self, obj) -> None: + class Foo(model.ComponentList): + pass + + # Cannot replace with an instance of Foo, because this does not correspond to + # the type of any member of the class + with pytest.raises(TypeError, match="No grouping of type"): + obj.replace_grouping(Foo()) diff --git a/sdmx/tests/model/test_v21.py b/sdmx/tests/model/test_v21.py index e673211aa..eb0e33170 100644 --- a/sdmx/tests/model/test_v21.py +++ b/sdmx/tests/model/test_v21.py @@ -3,10 +3,14 @@ import pytest +import sdmx +import sdmx.message +from sdmx.model import v21 as model from sdmx.model.v21 import ( AttributeDescriptor, AttributeValue, Code, + Codelist, Component, ComponentList, ComponentValue, @@ -578,3 +582,70 @@ def test_init(self): ds1 = DataSet(action="information") assert ds0.action == ds1.action + + +class TestMetadataSet: + @pytest.fixture(scope="class") + def msg(self, specimen) -> sdmx.message.MetadataMessage: + with specimen("esms_generic.xml") as f: + return sdmx.read_sdmx(f) + + def test_report_hierarchy(self, msg: sdmx.message.MetadataMessage) -> None: + # Access message → metadata set → report + r = msg.data[0].report[0] + + # Number of top-level ReportedAttribute + assert 3 == len(r.metadata) + # Number of ReportedAttribute in tree branches + assert 4 == len(r.metadata[0]) + assert 0 == len(r.metadata[0][0]) + assert 4 == len(r.metadata[0][2]) + assert 0 == len(r.metadata[0][2][0]) + assert 3 == len(r.metadata[1]) + assert 1 == len(r.metadata[2]) + + +class TestHierarchicalCodelist: + @pytest.fixture(scope="class") + def msg(self, specimen): + with specimen("BIS/hierarchicalcodelist-0.xml") as f: + return sdmx.read_sdmx(f) + + @pytest.fixture(scope="class") + def obj(self, msg) -> model.HierarchicalCodelist: + return msg.hierarchical_codelist["BIS:HCL_COUNTRY(1.0)"] + + def test_hierarchy(self, msg: sdmx.message.StructureMessage) -> None: + for key, hcl in msg.hierarchical_codelist.items(): + assert 1 == len(hcl.hierarchy) + # print(f"{hcl = }") + + hcl = msg.hierarchical_codelist["BIS:HCL_COUNTRY(1.0)"] + + # Access a Hierarchy + h = hcl.hierarchy[0] + assert "HIERARCHY_COUNTRY" == h.id + assert False is h.has_formal_levels + assert 2 == len(h.codes) + + c1 = h.codes["1"] + c2 = h.codes["2"] + + assert 4 == len(c1.child) + + assert 56 == len(c2.child) + # HierarchicalCode has a `code` attribute + assert isinstance(c2.code, Code) + assert "OC" == c2.code + + # This Code is contained within a code list + assert isinstance(c2.code.parent, Codelist) + assert c2.code.parent.urn.endswith("Codelist=BIS:CL_WEBSTATS_CODES(1.0)") + + # The code has a child associated with a different code list + c3 = c2.child[0] + assert "6J" == c3.code + assert c3.code.parent.urn.endswith("Codelist=BIS:CL_BIS_IF_REF_AREA(1.0)") + + def test_repr(self, obj: model.HierarchicalCodelist): + assert "" == repr(obj) diff --git a/sdmx/tests/reader/test_reader_xml.py b/sdmx/tests/reader/test_reader_xml.py index 5528d50ab..3f546bb71 100644 --- a/sdmx/tests/reader/test_reader_xml.py +++ b/sdmx/tests/reader/test_reader_xml.py @@ -5,7 +5,50 @@ @pytest.mark.parametrize_specimens("path", format="xml") -def test_read_xml(path): +def test_read_xml(path) -> None: """XML specimens can be read.""" + if "esms_structured" in path.name: + pytest.xfail("Not implemented") + result = sdmx.read_sdmx(path) assert isinstance(result, Message) + + +@pytest.mark.parametrize( + "message_path, structure_path, structure_id", + ( + # Structure-specific data; same as test_reader_xml_v21.test_read_ss_xml but + # without additional assertions + ("M.USD.EUR.SP00.A.xml", "ECB_EXR/1/structure.xml", "ECB_EXR1"), + # Structure-specific metadata + ("esms_structured.xml", "v21/xml/demography/esms.xml", "ESMS_SIMPLE"), + ), +) +def test_read_xml_ss(specimen, message_path, structure_path, structure_id) -> None: + """Structure-specific (meta)data specimens can be read *using* their structures. + + Note that :func:`.test_read_xml` tests some of the same {Metad,D}ataMessage + specimens, but *without* supplying the [MD]SD; in those cases, the [MD]SD contents + are inferred while parsing. This test, in contrast, parses with the [MD]SD + available. + """ + # Read the structure message and retrieve the structure object + with specimen(structure_path) as f: + sm = sdmx.read_sdmx(f) + + # Structure may be for data or metadata + for name in "structure", "metadatastructure": + try: + s = getattr(sm, name)[structure_id] + except KeyError: + pass + else: + break + + assert s + + # The (meta)data message can be read using its associated structure + with specimen(message_path) as f: + result = sdmx.read_sdmx(f, dsd=s) + + assert isinstance(result, Message) diff --git a/sdmx/tests/test_message.py b/sdmx/tests/test_message.py index ae6bbac81..f8a01211d 100644 --- a/sdmx/tests/test_message.py +++ b/sdmx/tests/test_message.py @@ -81,6 +81,7 @@ def test_objects(self): sender: source: fr: Banque de données macro-économiques test: False + response: Categorisation (1): CAT_IPI-2010_IPI-2010-A21 CategoryScheme (1): CLASSEMENT_DATAFLOWS Codelist (7): CL_FREQ CL_NAF2_A21 CL_NATURE CL_UNIT CL_AREA CL_TIME_C... @@ -99,6 +100,7 @@ def test_objects(self): sender: source: fr: Banque de données macro-économiques test: False + response: DataflowDefinition (663): ACT-TRIM-ANC BPM6-CCAPITAL BPM6-CFINANCIER ... DataStructureDefinition (663): ACT-TRIM-ANC BPM6-CCAPITAL BPM6-CFINAN...""", ), @@ -113,6 +115,7 @@ def test_objects(self): source: """ """ test: False + response: DataSet (1) dataflow: observation_dimension: """, @@ -128,6 +131,7 @@ def test_objects(self): source: """ """ test: False + response: DataSet (2) dataflow: observation_dimension: []""", @@ -139,8 +143,14 @@ def test_objects(self): "pattern, expected", EXPECTED, ids=list(map(itemgetter(0), EXPECTED)) ) def test_message_repr(specimen, pattern, expected): + import requests + with specimen(pattern) as f: msg = sdmx.read_sdmx(f) + + # Attach a response object, as if the Message resulted from a requests query + msg.response = requests.Response() + if isinstance(expected, re.Pattern): assert expected.fullmatch(repr(msg)) else: diff --git a/sdmx/tests/test_model.py b/sdmx/tests/test_model.py index 37e6e98eb..95d9dae1c 100644 --- a/sdmx/tests/test_model.py +++ b/sdmx/tests/test_model.py @@ -11,6 +11,10 @@ "NameableArtefact", "VersionableArtefact", "MaintainableArtefact", + "ActionType", + "ConstraintRoleType", + "FacetValueType", + "ExtendedFacetValueType", "Item", "ItemScheme", "FacetType", @@ -49,6 +53,10 @@ "Key", "GroupKey", "SeriesKey", + "CodingFormat", + "Level", + "Hierarchy", + "HierarchicalCode", "ConstraintRole", "ConstrainableArtefact", "SelectionValue", @@ -104,6 +112,9 @@ "Observation", "StructureSpecificDataSet", "MetadataStructureDefinition", + "TextAttributeValue", + "XHTMLAttributeValue", + "MetadataSet", ] V21_ONLY = [ @@ -117,7 +128,32 @@ "GenericDataSet", "GenericTimeSeriesDataSet", "StructureSpecificTimeSeriesDataSet", + "ReportingCategory", + "ReportingTaxonomy", + "TargetObject", + "DataSetTarget", + "DimensionDescriptorValuesTarget", + "IdentifiableObjectTarget", + "ReportPeriodTarget", + "MetadataTarget", + "ReportStructure", + "MetadataStructureDefinition", "MetadataflowDefinition", + "TargetObjectValue", + "TargetReportPeriod", + "TargetIdentifiableObject", + "TargetObjectKey", + "ReportedAttribute", + "EnumeratedAttributeValue", + "NonEnumeratedAttributeValue", + "OtherNonEnumeratedAttributeValue", + "MetadataReport", + "HierarchicalCodelist", + "ItemAssociation", + "CodeMap", + "ItemSchemeMap", + "CodelistMap", + "StructureSet", ] V30_ONLY = [ @@ -134,17 +170,21 @@ "MetadataProviderScheme", "Measure", "Dataflow", # Instead of DataflowDefinition - "CodingFormat", - "Level", - "HierarchicalCode", - "Hierarchy", "HierarchyAssociation", "DataflowRelationship", "MeasureRelationship", "ObservationRelationship", "DataConstraint", "MetadataConstraint", + "MetadataAttributeDescriptor", + "IdentifiableObjectSelection", + "MetadataStructureDefinition", "Metadataflow", # Instead of MetadataflowDefinition + "MetadataAttributeValue", + "CodedMetadataAttributeValue", + "UncodedMetadataAttributeValue", + "OtherUncodedAttributeValue", + "TargetIdentifiableObject", ] diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 49ff2fd8f..8c6e29831 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -33,15 +33,7 @@ class DataSourceTest: source_id: str #: Failures affecting **all** data sources, internal to :mod:`sdmx`. - xfail_common = { - "actualconstraint": (XMLParseError, NI), # KeyError - "allowedconstraint": (XMLParseError, NI), # KeyError - "contentconstraint": (XMLParseError, NI), # KeyError - "hierarchicalcodelist": (XMLParseError, NI), # - "metadatastructure": (XMLParseError, NI), # not parsed - "structure": (XMLParseError, NI), # not parsed - "structureset": (XMLParseError, NI), # not implemented - } + xfail_common: Dict[str, Any] = {} #: Mapping of endpoint → Exception subclass. Tests of these endpoints are expected #: to fail with the given kind of exception. @@ -156,6 +148,10 @@ class TestBBK(DataSourceTest): class TestBIS(DataSourceTest): source_id = "BIS" + endpoint_args = { + "actualconstraint": dict(resource_id="CBP_D_24D"), + } + class TestECB(DataSourceTest): source_id = "ECB" @@ -363,6 +359,9 @@ class TestISTAT(DataSourceTest): "organisationscheme": HTTPError, # 400 "structure": NotImplementedError, # 501 } + endpoint_args = { + "actualconstraint": dict(resource_id="CONS_92_143"), + } @pytest.mark.network def test_gh_75(self, client): @@ -480,10 +479,11 @@ class TestNBB(DataSourceTest): class TestOECD(DataSourceTest): source_id = "OECD" endpoint_args = { + "actualconstraint": dict(resource_id="CR_A_DSD_DEBT_TRANS_COLL@DF_MICRO"), "data": dict( resource_id="DSD_MSTI@DF_MSTI", headers={"Accept-Encoding": "compress, gzip"}, - ) + ), } @@ -519,11 +519,12 @@ class TestSPC(DataSourceTest): "structure": NotImplementedError, # 501 } endpoint_args = { + "actualconstraint": dict(resource_id="CR_A_DF_ADBKI"), "data": dict( resource_id="DF_CPI", key="A.CK+FJ..", params=dict(startPeriod=2010, endPeriod=2015), - ) + ), } diff --git a/sdmx/writer/pandas.py b/sdmx/writer/pandas.py index d6d3bc018..08dac0b70 100644 --- a/sdmx/writer/pandas.py +++ b/sdmx/writer/pandas.py @@ -1,5 +1,5 @@ from itertools import chain -from typing import Any, Dict, Hashable, Set, Union, cast +from typing import Any, Dict, Hashable, Set, Union import numpy as np import pandas as pd @@ -196,15 +196,19 @@ def _cc(obj: model.ContentConstraint, **kwargs): def _cr(obj: model.CubeRegion, **kwargs): """Convert :class:`.CubeRegion`.""" result: DictLike[str, pd.Series] = DictLike() - for dim, memberselection in obj.member.items(): + for dim, ms in obj.member.items(): result[dim.id] = pd.Series( - # cast(): as of PR#30, only MemberValue is supported here - [cast(model.MemberValue, mv).value for mv in memberselection.values], - name=dim.id, + [writer.recurse(sv, **kwargs) for sv in ms.values], name=dim.id ) return result +@writer +def _rp(obj: model.RangePeriod, **kwargs): + """Convert :class:`.RangePeriod`.""" + return f"{obj.start.period}–{obj.end.period}" + + @writer def write_dataset( obj: model.DataSet,