From ffc9b7968f0fadce20b05d2faa1bcf000e7bb124 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 14 Jan 2025 11:49:57 +0000 Subject: [PATCH 01/11] Add mask to TiledDataset's array of datasets --- dkist/dataset/tiled_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dkist/dataset/tiled_dataset.py b/dkist/dataset/tiled_dataset.py index 29f69140..9c8427fd 100644 --- a/dkist/dataset/tiled_dataset.py +++ b/dkist/dataset/tiled_dataset.py @@ -82,8 +82,8 @@ def _from_components(cls, shape, file_managers, wcses, header_tables, inventory) return cls(datasets, inventory) - def __init__(self, dataset_array, inventory=None): - self._data = np.array(dataset_array, dtype=object) + def __init__(self, dataset_array, inventory=None, mask=False): + self._data = np.ma.masked_array(dataset_array, dtype=object, mask=mask) self._inventory = inventory or {} self._validate_component_datasets(self._data, inventory) @@ -105,7 +105,7 @@ def __getitem__(self, aslice): @staticmethod def _validate_component_datasets(datasets, inventory): - datasets = datasets.flat + datasets = datasets.compressed() inv_1 = datasets[0].meta["inventory"] if inv_1 and inv_1 is not inventory: raise ValueError("The inventory record of the first dataset does not match the one passed to TiledDataset") @@ -122,7 +122,7 @@ def flat(self): """ Represent this `.TiledDataset` as a 1D array. """ - return type(self)(self._data.flat, self.inventory) + return type(self)(self._data.compressed(), self.inventory) @property def inventory(self): @@ -137,7 +137,7 @@ def combined_headers(self): A single `astropy.table.Table` containing all the FITS headers for all files in this dataset. """ - return vstack([ds.meta["headers"] for ds in self._data.flat]) + return vstack([ds.meta["headers"] for ds in self._data.compressed()]) @property def shape(self): From 6a72839b95d8aefd2d0c4ca2b2450a3715dd1678 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 14 Jan 2025 11:51:14 +0000 Subject: [PATCH 02/11] Parametrize tiled dataset fixtures to test with a mask as well --- dkist/conftest.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dkist/conftest.py b/dkist/conftest.py index d19c6ff7..7b88d795 100644 --- a/dkist/conftest.py +++ b/dkist/conftest.py @@ -299,16 +299,23 @@ def eit_dataset(): return f.tree["dataset"] -@pytest.fixture -def simple_tiled_dataset(dataset): +@pytest.fixture(params=[False, + [[False, False], + [True, False]]], + ids=["simple-nomask", "simple-masked"]) +def simple_tiled_dataset(dataset, request): datasets = [copy.deepcopy(dataset) for i in range(4)] for ds in datasets: ds.meta["inventory"] = dataset.meta["inventory"] dataset_array = np.array(datasets).reshape((2,2)) - return TiledDataset(dataset_array, dataset.meta["inventory"]) + return TiledDataset(dataset_array, dataset.meta["inventory"], mask=request.param) -@pytest.fixture +@pytest.fixture(params=[False, + [[False, True, False], + [True, False, True], + [False, True, False]]], + ids=["large-nomask", "large-masked"]) def large_tiled_dataset(tmp_path_factory): vbidir = tmp_path_factory.mktemp("data") with gzip.open(Path(rootdir) / "large_vbi.asdf.gz", mode="rb") as gfo: From 06aae9515bca25a00ed515e49448d4c7319fc137 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 14 Jan 2025 14:17:41 +0000 Subject: [PATCH 03/11] Make parametrized fixture within parameter work --- dkist/io/asdf/tests/test_dataset.py | 3 ++- pyproject.toml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dkist/io/asdf/tests/test_dataset.py b/dkist/io/asdf/tests/test_dataset.py index f475e6b6..b2ff4563 100644 --- a/dkist/io/asdf/tests/test_dataset.py +++ b/dkist/io/asdf/tests/test_dataset.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from pytest_lazy_fixtures import lf import asdf import astropy.table @@ -68,7 +69,7 @@ def test_roundtrip_tiled_dataset(simple_tiled_dataset): @pytest.mark.parametrize("tagobj", [ "dataset", - "simple_tiled_dataset", + lf("simple_tiled_dataset"), ], indirect=True) def test_save_dataset_without_file_schema(tagobj, tmp_path): diff --git a/pyproject.toml b/pyproject.toml index 79a421da..4823c2ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ tests = [ "pytest-filter-subpackage", "pytest-benchmark", "pytest-xdist", + "pytest-lazy-fixtures", "hypothesis", "tox", "pydot", From 4cffe7a2c856ce182a840fa2138ec2cceb19455e Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Mon, 20 Jan 2025 14:09:19 +0000 Subject: [PATCH 04/11] Add new yaml schema for masked TiledDataset --- dkist/io/asdf/converters/tiled_dataset.py | 9 ++++- .../schemas/tiled_dataset-1.1.0.yaml | 35 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml diff --git a/dkist/io/asdf/converters/tiled_dataset.py b/dkist/io/asdf/converters/tiled_dataset.py index 588754fa..19aa0d2f 100644 --- a/dkist/io/asdf/converters/tiled_dataset.py +++ b/dkist/io/asdf/converters/tiled_dataset.py @@ -5,16 +5,23 @@ class TiledDatasetConverter(Converter): tags = [ "tag:dkist.nso.edu:dkist/tiled_dataset-0.1.0", "asdf://dkist.nso.edu/tags/tiled_dataset-1.0.0", + "asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0", ] types = ["dkist.dataset.tiled_dataset.TiledDataset"] def from_yaml_tree(cls, node, tag, ctx): from dkist.dataset.tiled_dataset import TiledDataset - return TiledDataset(node["datasets"], node["inventory"]) + try: + mask = node["mask"] + except KeyError: + mask = None + + return TiledDataset(node["datasets"], node["inventory"], mask) def to_yaml_tree(cls, tiled_dataset, tag, ctx): tree = {} tree["inventory"] = tiled_dataset._inventory tree["datasets"] = tiled_dataset._data.tolist() + tree["mask"] = tiled_dataset._data.mask.tolist() return tree diff --git a/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml new file mode 100644 index 00000000..adf212ea --- /dev/null +++ b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml @@ -0,0 +1,35 @@ +%YAML 1.1 +--- +$schema: "http://stsci.edu/schemas/yaml-schema/draft-01" +id: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.0.0" + +title: | + A DKIST Tiled Dataset object. +description: + The container for a set of Dataset objects. + +type: object +properties: + datasets: + description: A nested structure of Dataset objects + type: array + items: + type: array + items: + anyOf: + - tag: "asdf://dkist.nso.edu/tags/dataset-1.*" + - null: true + inventory: + description: A copy of the inventory record for this dataset. + type: object + mask: + description: A mask to indicate if invalid or missing Datasets should be ignored. + type: array + items: + type: array + items: + - type: boolean + +required: [datasets, inventory, mask] +additionalProperties: false +... From d2423134f281d3e3242304fd5ff06ee59b8e33a6 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 21 Jan 2025 09:59:49 +0000 Subject: [PATCH 05/11] Update dkist/io/asdf/converters/tiled_dataset.py Co-authored-by: Stuart Mumford --- dkist/io/asdf/converters/tiled_dataset.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dkist/io/asdf/converters/tiled_dataset.py b/dkist/io/asdf/converters/tiled_dataset.py index 19aa0d2f..a76d5fe8 100644 --- a/dkist/io/asdf/converters/tiled_dataset.py +++ b/dkist/io/asdf/converters/tiled_dataset.py @@ -12,10 +12,7 @@ class TiledDatasetConverter(Converter): def from_yaml_tree(cls, node, tag, ctx): from dkist.dataset.tiled_dataset import TiledDataset - try: - mask = node["mask"] - except KeyError: - mask = None + mask = node.get("mask", None) return TiledDataset(node["datasets"], node["inventory"], mask) From e1a8037bb38ff9ed9e62dd80e8fe87862513118b Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 21 Jan 2025 12:06:08 +0000 Subject: [PATCH 06/11] Add new manifest --- dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml diff --git a/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml b/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml new file mode 100644 index 00000000..4107302c --- /dev/null +++ b/dkist/io/asdf/resources/manifests/dkist-1.3.0.yaml @@ -0,0 +1,14 @@ +%YAML 1.1 +--- +id: asdf://dkist.nso.edu/manifests/dkist-1.3.0 +extension_uri: asdf://dkist.nso.edu/dkist/extensions/dkist-1.3.0 +title: DKIST extension +description: ASDF schemas and tags for DKIST classes. + +tags: + - schema_uri: "asdf://dkist.nso.edu/schemas/file_manager-1.0.0" + tag_uri: "asdf://dkist.nso.edu/tags/file_manager-1.0.0" + - schema_uri: "asdf://dkist.nso.edu/schemas/dataset-1.1.0" + tag_uri: "asdf://dkist.nso.edu/tags/dataset-1.2.0" + - schema_uri: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0" + tag_uri: "asdf://dkist.nso.edu/tags/tiled_dataset-1.1.0" From c81ec9df3a1ef722f32c20a716316075b1dbb9bc Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 21 Jan 2025 12:08:08 +0000 Subject: [PATCH 07/11] Update schema id --- dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml index adf212ea..3f09161e 100644 --- a/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml +++ b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml @@ -1,7 +1,7 @@ %YAML 1.1 --- $schema: "http://stsci.edu/schemas/yaml-schema/draft-01" -id: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.0.0" +id: "asdf://dkist.nso.edu/schemas/tiled_dataset-1.1.0" title: | A DKIST Tiled Dataset object. From 07a6b6687551cd8eb3643a9e125e6048678e342b Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 21 Jan 2025 14:37:18 +0000 Subject: [PATCH 08/11] Add manifest to entry points --- dkist/io/asdf/entry_points.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dkist/io/asdf/entry_points.py b/dkist/io/asdf/entry_points.py index 8d86cf0f..6994bfe3 100644 --- a/dkist/io/asdf/entry_points.py +++ b/dkist/io/asdf/entry_points.py @@ -39,6 +39,8 @@ def get_extensions(): dkist_converters = [FileManagerConverter(), DatasetConverter(), TiledDatasetConverter()] wcs_converters = [VaryingCelestialConverter(), CoupledCompoundConverter(), RavelConverter(), AsymmetricMappingConverter()] return [ + ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.3.0", + converters=dkist_converters), ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.2.0", converters=dkist_converters), ManifestExtension.from_uri("asdf://dkist.nso.edu/manifests/dkist-1.1.0", From 32131984c93bd22dfe96a650ecb697862a7fbbac Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 21 Jan 2025 16:06:04 +0000 Subject: [PATCH 09/11] This was breaking things but I'm not mad, you're mad --- dkist/io/asdf/tests/test_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dkist/io/asdf/tests/test_dataset.py b/dkist/io/asdf/tests/test_dataset.py index b2ff4563..4fdc04f2 100644 --- a/dkist/io/asdf/tests/test_dataset.py +++ b/dkist/io/asdf/tests/test_dataset.py @@ -71,7 +71,7 @@ def test_roundtrip_tiled_dataset(simple_tiled_dataset): "dataset", lf("simple_tiled_dataset"), ], - indirect=True) + indirect=False) def test_save_dataset_without_file_schema(tagobj, tmp_path): tree = {"dataset": tagobj} with asdf.AsdfFile(tree) as afile: @@ -97,9 +97,9 @@ def test_asdf_tags(dataset, tmp_path): @pytest.mark.parametrize("tagobj", [ "dataset", - "simple_tiled_dataset", + lf("simple_tiled_dataset"), ], - indirect=True) + indirect=False) def test_save_dataset_with_file_schema(tagobj, tmpdir): tree = {"dataset": tagobj} with importlib_resources.as_file(importlib_resources.files("dkist.io") / "level_1_dataset_schema.yaml") as schema_path: From 672c33670a710183b46f192e9dba333d619206fb Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Tue, 21 Jan 2025 16:24:27 +0000 Subject: [PATCH 10/11] Changelog --- changelog/487.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/487.feature.rst diff --git a/changelog/487.feature.rst b/changelog/487.feature.rst new file mode 100644 index 00000000..29fea312 --- /dev/null +++ b/changelog/487.feature.rst @@ -0,0 +1 @@ +Add support to TiledDataset for mosaic datasets with missing tiles or where tiles are irregularly arranged. From c8ef2242f1a821a4ed4daaff8dcc32ba1e48597e Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Thu, 23 Jan 2025 12:13:24 +0000 Subject: [PATCH 11/11] Save mask out to yaml as array instead of list --- dkist/io/asdf/converters/tiled_dataset.py | 2 +- dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dkist/io/asdf/converters/tiled_dataset.py b/dkist/io/asdf/converters/tiled_dataset.py index a76d5fe8..a156fe66 100644 --- a/dkist/io/asdf/converters/tiled_dataset.py +++ b/dkist/io/asdf/converters/tiled_dataset.py @@ -20,5 +20,5 @@ def to_yaml_tree(cls, tiled_dataset, tag, ctx): tree = {} tree["inventory"] = tiled_dataset._inventory tree["datasets"] = tiled_dataset._data.tolist() - tree["mask"] = tiled_dataset._data.mask.tolist() + tree["mask"] = tiled_dataset._data.mask return tree diff --git a/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml index 3f09161e..b2084a4b 100644 --- a/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml +++ b/dkist/io/asdf/resources/schemas/tiled_dataset-1.1.0.yaml @@ -24,11 +24,7 @@ properties: type: object mask: description: A mask to indicate if invalid or missing Datasets should be ignored. - type: array - items: - type: array - items: - - type: boolean + datatype: bool8 required: [datasets, inventory, mask] additionalProperties: false