diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c88f685b0ba..099412a065c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,9 @@ v2023.10.2 (unreleased) New Features ~~~~~~~~~~~~ +- Writing to an existing zarr file with differently ordered, but identically + named, dimensions is supported. + By `Maximilian Roos `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 5b8f9a6840f..6f0896435f0 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -4,7 +4,7 @@ import os import time import traceback -from collections.abc import Iterable +from collections.abc import Hashable, Iterable, Mapping from glob import glob from typing import TYPE_CHECKING, Any, ClassVar @@ -15,6 +15,7 @@ from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri +from xarray.core.variable import Variable if TYPE_CHECKING: from io import BufferedIOBase @@ -271,7 +272,7 @@ def sync(self, compute=True, chunkmanager_store_kwargs=None): class AbstractWritableDataStore(AbstractDataStore): __slots__ = () - def encode(self, variables, attributes): + def encode(self, variables: Mapping[Hashable, Variable], attributes: Mapping): """ Encode the variables and attributes in this store diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d6ad15f4f87..d07db54c6d2 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -320,14 +320,19 @@ def encode_zarr_variable(var, needs_copy=True, name=None): return var -def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim): +def _validate_existing_dims( + var_name, new_var, existing_var, region, append_dim +) -> Variable: if new_var.dims != existing_var.dims: - raise ValueError( - f"variable {var_name!r} already exists with different " - f"dimension names {existing_var.dims} != " - f"{new_var.dims}, but changing variable " - f"dimensions is not supported by to_zarr()." - ) + if set(new_var.dims) == set(existing_var.dims): + new_var = new_var.transpose(*existing_var.dims) + else: + raise ValueError( + f"variable {var_name!r} already exists with different " + f"dimension names {existing_var.dims} != " + f"{new_var.dims}, but changing variable " + f"dimensions is not supported by to_zarr()." + ) existing_sizes = {} for dim, size in existing_var.sizes.items(): @@ -347,6 +352,8 @@ def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim) f"explicitly appending, but append_dim={append_dim!r}." ) + return new_var + def _put_attrs(zarr_obj, attrs): """Raise a more informative error message for invalid attrs.""" @@ -614,12 +621,10 @@ def store( variables_encoded.update(vars_with_encoding) for var_name in existing_variable_names: - new_var = variables_encoded[var_name] - existing_var = existing_vars[var_name] - _validate_existing_dims( + variables_encoded[var_name] = _validate_existing_dims( var_name, - new_var, - existing_var, + variables_encoded[var_name], + existing_vars[var_name], self._write_region, self._append_dim, ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 59e9f655b2e..180bcb61c6c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2439,6 +2439,19 @@ def test_to_zarr_append_compute_false_roundtrip(self) -> None: with self.open(store) as actual: assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) + def test_to_zarr_append_with_transposed_dims_works(self) -> None: + original = create_test_data().chunk() + + with self.create_zarr_target() as store: + self.save(original, store) + + to_append = original.transpose(*reversed(list(original.dims))) + + self.save(to_append, store, mode="a") + + with self.open(store) as actual: + assert_identical(original, actual) + @pytest.mark.parametrize("chunk", [False, True]) def test_save_emptydim(self, chunk) -> None: if chunk and not has_dask: