From c9b42b2bc6268cf0c736dc0430c2075c02d0fb5e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 26 Oct 2023 19:07:26 -0700 Subject: [PATCH 1/3] Allow writing to zarr with differently ordered dims --- doc/whats-new.rst | 3 +++ xarray/backends/common.py | 5 +++-- xarray/backends/zarr.py | 29 +++++++++++++++++------------ xarray/tests/test_backends.py | 14 ++++++++++++++ 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c88f685b0ba..af9316fa4e9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,9 @@ v2023.10.2 (unreleased) New Features ~~~~~~~~~~~~ +- Writing to an existing zarr file with differently ordered, but identically + named, dimensions is supported. + By `Maximilian Roos `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 5b8f9a6840f..6f0896435f0 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -4,7 +4,7 @@ import os import time import traceback -from collections.abc import Iterable +from collections.abc import Hashable, Iterable, Mapping from glob import glob from typing import TYPE_CHECKING, Any, ClassVar @@ -15,6 +15,7 @@ from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri +from xarray.core.variable import Variable if TYPE_CHECKING: from io import BufferedIOBase @@ -271,7 +272,7 @@ def sync(self, compute=True, chunkmanager_store_kwargs=None): class AbstractWritableDataStore(AbstractDataStore): __slots__ = () - def encode(self, variables, attributes): + def encode(self, variables: Mapping[Hashable, Variable], attributes: Mapping): """ Encode the variables and attributes in this store diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d6ad15f4f87..d07db54c6d2 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -320,14 +320,19 @@ def encode_zarr_variable(var, needs_copy=True, name=None): return var -def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim): +def _validate_existing_dims( + var_name, new_var, existing_var, region, append_dim +) -> Variable: if new_var.dims != existing_var.dims: - raise ValueError( - f"variable {var_name!r} already exists with different " - f"dimension names {existing_var.dims} != " - f"{new_var.dims}, but changing variable " - f"dimensions is not supported by to_zarr()." - ) + if set(new_var.dims) == set(existing_var.dims): + new_var = new_var.transpose(*existing_var.dims) + else: + raise ValueError( + f"variable {var_name!r} already exists with different " + f"dimension names {existing_var.dims} != " + f"{new_var.dims}, but changing variable " + f"dimensions is not supported by to_zarr()." + ) existing_sizes = {} for dim, size in existing_var.sizes.items(): @@ -347,6 +352,8 @@ def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim) f"explicitly appending, but append_dim={append_dim!r}." ) + return new_var + def _put_attrs(zarr_obj, attrs): """Raise a more informative error message for invalid attrs.""" @@ -614,12 +621,10 @@ def store( variables_encoded.update(vars_with_encoding) for var_name in existing_variable_names: - new_var = variables_encoded[var_name] - existing_var = existing_vars[var_name] - _validate_existing_dims( + variables_encoded[var_name] = _validate_existing_dims( var_name, - new_var, - existing_var, + variables_encoded[var_name], + existing_vars[var_name], self._write_region, self._append_dim, ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 59e9f655b2e..a508400da73 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2382,6 +2382,7 @@ def test_append_with_new_variable(self) -> None: xr.open_dataset(store_target, engine="zarr", **self.version_kwargs), ) + @requires_dask @requires_dask def test_to_zarr_compute_false_roundtrip(self) -> None: from dask.delayed import Delayed @@ -2439,6 +2440,19 @@ def test_to_zarr_append_compute_false_roundtrip(self) -> None: with self.open(store) as actual: assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) + def test_to_zarr_append_with_transposed_dims_works(self) -> None: + original = create_test_data().chunk() + + with self.create_zarr_target() as store: + self.save(original, store) + + to_append = original.transpose(*reversed(list(original.dims))) + + self.save(to_append, store, mode="a") + + with self.open(store) as actual: + assert_identical(original, actual) + @pytest.mark.parametrize("chunk", [False, True]) def test_save_emptydim(self, chunk) -> None: if chunk and not has_dask: From 9dcf9e3bf068eb2efcc6fd15aec5385b7078846c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 26 Oct 2023 23:49:00 -0700 Subject: [PATCH 2/3] --- xarray/tests/test_backends.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a508400da73..180bcb61c6c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2382,7 +2382,6 @@ def test_append_with_new_variable(self) -> None: xr.open_dataset(store_target, engine="zarr", **self.version_kwargs), ) - @requires_dask @requires_dask def test_to_zarr_compute_false_roundtrip(self) -> None: from dask.delayed import Delayed From eef89b11faa890dc61114a7ace13826b06fe060c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Oct 2023 06:49:32 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index af9316fa4e9..099412a065c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,7 @@ New Features ~~~~~~~~~~~~ - Writing to an existing zarr file with differently ordered, but identically - named, dimensions is supported. + named, dimensions is supported. By `Maximilian Roos `_. Breaking changes