Merge pull request #55 from HiPCTProject/sort-meta

Make sure downsample group levels are sorted
HiPCTProject · Sep 9, 2024 · 3dc2801 · 3dc2801
2 parents 4614277 + 122ebad
commit 3dc2801
Show file tree

Hide file tree

Showing 4 changed files with 74 additions and 2 deletions.
diff --git a/docs/guide.rst b/docs/guide.rst
@@ -18,3 +18,8 @@ Third-party multi-threading
 ``stack-to-chunk`` turns off third-party multi-threading in ``blosc`` when running.
 This allows the ``n_processes`` argument to be respected when set to ``1``, and
 prevents issues when ``stack_to_chunk`` uses a larger number of parallel processes.
+
+Zarr group layout
+-----------------
+The zarr groups produced by ``stack-to-chunk`` contain zarr arrays that are labelled 0, 1, 2, 3... etc.
+The array at ``0`` is the full-resolution dataset, and each subsequent array is downsampled by a factor of :math:`2^{i}`.
diff --git a/src/stack_to_chunk/main.py b/src/stack_to_chunk/main.py
@@ -14,7 +14,7 @@
 from numcodecs.abc import Codec
 
 from stack_to_chunk._array_helpers import _copy_slab, _downsample_block
-from stack_to_chunk.ome_ngff import SPATIAL_UNIT
+from stack_to_chunk.ome_ngff import SPATIAL_UNIT, DatasetDict
 
 
 def memory_per_process(input_data: Array, *, chunk_size: int) -> int:
@@ -325,6 +325,11 @@ def _add_level_metadata(self, level: int = 0) -> None:
             return
 
         multiscales["datasets"].append(new_dataset)
+
+        def get_level(dataset_meta: DatasetDict) -> int:
+            return int(dataset_meta["path"])
+
+        multiscales["datasets"] = sorted(multiscales["datasets"], key=get_level)
         self._group.attrs["multiscales"] = [multiscales]
 
 

diff --git a/src/stack_to_chunk/ome_ngff.py b/src/stack_to_chunk/ome_ngff.py
@@ -1,6 +1,6 @@
 """Info for working with OME-NGFF."""
 
-from typing import Literal
+from typing import Literal, TypedDict
 
 SPATIAL_UNIT = Literal[
     "angstrom",
@@ -30,3 +30,12 @@
     "zeptometer",
     "zettameter",
 ]
+
+
+class DatasetDict(TypedDict):
+    """
+    An OME-zarr dataset.
+    """
+
+    coordinateTransformations: list[dict[str, str | list[int]]]
+    path: str
diff --git a/src/stack_to_chunk/tests/test_main.py b/src/stack_to_chunk/tests/test_main.py
@@ -306,3 +306,56 @@ def test_padding(tmp_path: Path) -> None:
     group.add_downsample_level(1, n_processes=1)
     arr_downsammpled = group[1]
     np.testing.assert_equal(arr_downsammpled[:], [[[3]], [[12]]])
+
+
+def test_metadata_sorting(tmp_path: Path) -> None:
+    # Check that metadata levels added in the wrong order (for some reason...)
+    # are sorted from low to high.
+    zarr_path = tmp_path / "group.ome.zarr"
+    group = MultiScaleGroup(
+        zarr_path,
+        name="my_zarr_group",
+        spatial_unit="centimeter",
+        voxel_size=(3, 4, 5),
+    )
+    group._add_level_metadata(1)  # noqa: SLF001
+    group._add_level_metadata(0)  # noqa: SLF001
+    check_zattrs(
+        zarr_path,
+        {
+            "multiscales": [
+                {
+                    "axes": [
+                        {"name": "x", "type": "space", "unit": "centimeter"},
+                        {"name": "y", "type": "space", "unit": "centimeter"},
+                        {"name": "z", "type": "space", "unit": "centimeter"},
+                    ],
+                    "datasets": [
+                        {
+                            "coordinateTransformations": [
+                                {"translation": [0.5, 0.5, 0.5], "type": "translation"},
+                                {"scale": [3.0, 4.0, 5.0], "type": "scale"},
+                            ],
+                            "path": "0",
+                        },
+                        {
+                            "coordinateTransformations": [
+                                {"translation": [0.5, 0.5, 0.5], "type": "translation"},
+                                {"scale": [6.0, 8.0, 10.0], "type": "scale"},
+                            ],
+                            "path": "1",
+                        },
+                    ],
+                    "metadata": {
+                        "description": "Downscaled using local mean in 2x2x2 blocks.",
+                        "kwargs": {"block_size": 2, "func": "np.mean"},
+                        "method": "skimage.measure.block_reduce",
+                        "version": "0.24.0",
+                    },
+                    "name": "my_zarr_group",
+                    "type": "local mean",
+                    "version": "0.4",
+                }
+            ]
+        },
+    )