Skip to content

Commit

Permalink
Merge pull request #55 from HiPCTProject/sort-meta
Browse files Browse the repository at this point in the history
Make sure downsample group levels are sorted
  • Loading branch information
dstansby authored Sep 9, 2024
2 parents 4614277 + 122ebad commit 3dc2801
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 2 deletions.
5 changes: 5 additions & 0 deletions docs/guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,8 @@ Third-party multi-threading
``stack-to-chunk`` turns off third-party multi-threading in ``blosc`` when running.
This allows the ``n_processes`` argument to be respected when set to ``1``, and
prevents issues when ``stack_to_chunk`` uses a larger number of parallel processes.

Zarr group layout
-----------------
The zarr groups produced by ``stack-to-chunk`` contain zarr arrays that are labelled 0, 1, 2, 3... etc.
The array at ``0`` is the full-resolution dataset, and each subsequent array is downsampled by a factor of :math:`2^{i}`.
7 changes: 6 additions & 1 deletion src/stack_to_chunk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from numcodecs.abc import Codec

from stack_to_chunk._array_helpers import _copy_slab, _downsample_block
from stack_to_chunk.ome_ngff import SPATIAL_UNIT
from stack_to_chunk.ome_ngff import SPATIAL_UNIT, DatasetDict


def memory_per_process(input_data: Array, *, chunk_size: int) -> int:
Expand Down Expand Up @@ -325,6 +325,11 @@ def _add_level_metadata(self, level: int = 0) -> None:
return

multiscales["datasets"].append(new_dataset)

def get_level(dataset_meta: DatasetDict) -> int:
return int(dataset_meta["path"])

multiscales["datasets"] = sorted(multiscales["datasets"], key=get_level)
self._group.attrs["multiscales"] = [multiscales]


Expand Down
11 changes: 10 additions & 1 deletion src/stack_to_chunk/ome_ngff.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Info for working with OME-NGFF."""

from typing import Literal
from typing import Literal, TypedDict

SPATIAL_UNIT = Literal[
"angstrom",
Expand Down Expand Up @@ -30,3 +30,12 @@
"zeptometer",
"zettameter",
]


class DatasetDict(TypedDict):
"""
An OME-zarr dataset.
"""

coordinateTransformations: list[dict[str, str | list[int]]]
path: str
53 changes: 53 additions & 0 deletions src/stack_to_chunk/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,56 @@ def test_padding(tmp_path: Path) -> None:
group.add_downsample_level(1, n_processes=1)
arr_downsammpled = group[1]
np.testing.assert_equal(arr_downsammpled[:], [[[3]], [[12]]])


def test_metadata_sorting(tmp_path: Path) -> None:
# Check that metadata levels added in the wrong order (for some reason...)
# are sorted from low to high.
zarr_path = tmp_path / "group.ome.zarr"
group = MultiScaleGroup(
zarr_path,
name="my_zarr_group",
spatial_unit="centimeter",
voxel_size=(3, 4, 5),
)
group._add_level_metadata(1) # noqa: SLF001
group._add_level_metadata(0) # noqa: SLF001
check_zattrs(
zarr_path,
{
"multiscales": [
{
"axes": [
{"name": "x", "type": "space", "unit": "centimeter"},
{"name": "y", "type": "space", "unit": "centimeter"},
{"name": "z", "type": "space", "unit": "centimeter"},
],
"datasets": [
{
"coordinateTransformations": [
{"translation": [0.5, 0.5, 0.5], "type": "translation"},
{"scale": [3.0, 4.0, 5.0], "type": "scale"},
],
"path": "0",
},
{
"coordinateTransformations": [
{"translation": [0.5, 0.5, 0.5], "type": "translation"},
{"scale": [6.0, 8.0, 10.0], "type": "scale"},
],
"path": "1",
},
],
"metadata": {
"description": "Downscaled using local mean in 2x2x2 blocks.",
"kwargs": {"block_size": 2, "func": "np.mean"},
"method": "skimage.measure.block_reduce",
"version": "0.24.0",
},
"name": "my_zarr_group",
"type": "local mean",
"version": "0.4",
}
]
},
)

0 comments on commit 3dc2801

Please sign in to comment.