From 19708fa82c57b1275e613b77e45abcbe54369287 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 13:23:58 -0500 Subject: [PATCH 01/21] raise FutureWarning --- xarray/core/dataset.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c65bbd6b849..a7e9805904f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -777,13 +777,20 @@ def dims(self) -> Frozen[Hashable, int]: Note that type of this object differs from `DataArray.dims`. See `Dataset.sizes` and `DataArray.sizes` for consistently named - properties. + properties. This property will be changed to return a type more consistent with + `DataArray.dims` in the future, i.e. a set of dimension names. See Also -------- Dataset.sizes DataArray.dims """ + warnings.warn( + "The return type of `Dataset.dims` will be changed to return a set of dimension names in future, " + "in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, " + "please use `Dataset.sizes`.", + FutureWarning, + ) return Frozen(self._dims) @property From 9747fe772d6ea59932190438dad91b9150567296 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 13:24:59 -0500 Subject: [PATCH 02/21] change some internal instances of ds.dims -> ds.sizes --- xarray/core/dataset.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a7e9805904f..b578eadeb63 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -806,7 +806,7 @@ def sizes(self) -> Frozen[Hashable, int]: -------- DataArray.sizes """ - return self.dims + return Frozen(self._dims) @property def dtypes(self) -> Frozen[Hashable, np.dtype]: @@ -1417,7 +1417,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> Self: variables[name] = self._variables[name] except KeyError: ref_name, var_name, var = _get_virtual_variable( - self._variables, name, self.dims + self._variables, name, self.sizes ) variables[var_name] = var if ref_name in self._coord_names or ref_name in self.dims: @@ -1454,7 +1454,7 @@ def _construct_dataarray(self, name: Hashable) -> DataArray: try: variable = self._variables[name] except KeyError: - _, name, variable = _get_virtual_variable(self._variables, name, self.dims) + _, name, variable = _get_virtual_variable(self._variables, name, self.sizes) needed_dims = set(variable.dims) @@ -1481,7 +1481,7 @@ def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]: yield HybridMappingProxy(keys=self._coord_names, mapping=self.coords) # virtual coordinates - yield HybridMappingProxy(keys=self.dims, mapping=self) + yield HybridMappingProxy(keys=self.sizes, mapping=self) def __contains__(self, key: object) -> bool: """The 'in' operator will return true or false depending on whether @@ -2558,7 +2558,7 @@ def info(self, buf: IO | None = None) -> None: lines = [] lines.append("xarray.Dataset {") lines.append("dimensions:") - for name, size in self.dims.items(): + for name, size in self.sizes(): lines.append(f"\t{name} = {size} ;") lines.append("\nvariables:") for name, da in self.variables.items(): @@ -2686,10 +2686,10 @@ def chunk( else: chunks_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") - bad_dims = chunks_mapping.keys() - self.dims.keys() + bad_dims = chunks_mapping.keys() - self.sizes.keys() if bad_dims: raise ValueError( - f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}" + f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.sizes.keys())}" ) chunkmanager = guess_chunkmanager(chunked_array_type) @@ -5157,7 +5157,7 @@ def _get_stack_index( if dim in self._variables: var = self._variables[dim] else: - _, _, var = _get_virtual_variable(self._variables, dim, self.dims) + _, _, var = _get_virtual_variable(self._variables, dim, self.sizes) # dummy index (only `stack_coords` will be used to construct the multi-index) stack_index = PandasIndex([0], dim) stack_coords = {dim: var} From 0362233710073708247d33d8373258a2559cb1dd Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 13:27:36 -0500 Subject: [PATCH 03/21] improve clarity of which unexpected errors were raised --- xarray/tests/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index f7f8f823d78..704b84a9944 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -222,11 +222,18 @@ def source_ndarray(array): return base +def format_record(record) -> str: + """Format warning record like `FutureWarning('Function will be deprecated...')`""" + return f"{str(record.category)[8:-2]}('{record.message}'))" + + @contextmanager def assert_no_warnings(): with warnings.catch_warnings(record=True) as record: yield record - assert len(record) == 0, "got unexpected warning(s)" + assert ( + len(record) == 0 + ), f"Got {len(record)} unexpected warning(s): {[format_record(r) for r in record]}" # Internal versions of xarray's test functions that validate additional From 99d7e16f14f9dfc5665bfd138faaad5c95e4bba4 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 13:35:54 -0500 Subject: [PATCH 04/21] whatsnew --- doc/whats-new.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 817ea2c8235..605701664cf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -51,6 +51,12 @@ Deprecations currently ``PendingDeprecationWarning``, which are silenced by default. We'll convert these to ``DeprecationWarning`` in a future release. By `Maximilian Roos `_. +- Raise a ``FutureWarning`` warning that the type of :py:meth:`Dataset.dims` will be changed + from a mapping of dimension names to lengths to a set of dimension names. + This is to increase consistency with :py:meth:`DataArray.dims`. + To access a mapping of dimension names to lengths please use :py:meth:`Dataset.sizes`. + (:issue:`8496`, :pull:`8500`) + By `Tom Nicholas `_. Bug fixes ~~~~~~~~~ From 3e05777b9cbf211086559e7b73e6d0ad19ef347d Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 14:33:51 -0500 Subject: [PATCH 05/21] return a class which warns if treated like a Mapping --- xarray/core/dataset.py | 9 ++------- xarray/core/utils.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b578eadeb63..cca63afeca5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -105,6 +105,7 @@ from xarray.core.utils import ( Default, Frozen, + FrozenMappingWarningOnValuesAccess, HybridMappingProxy, OrderedSet, _default, @@ -785,13 +786,7 @@ def dims(self) -> Frozen[Hashable, int]: Dataset.sizes DataArray.dims """ - warnings.warn( - "The return type of `Dataset.dims` will be changed to return a set of dimension names in future, " - "in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, " - "please use `Dataset.sizes`.", - FutureWarning, - ) - return Frozen(self._dims) + return FrozenMappingWarningOnValuesAccess(self._dims) @property def sizes(self) -> Frozen[Hashable, int]: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 9ba4a43f6d9..d49f922b59f 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -50,12 +50,15 @@ Collection, Container, Hashable, + ItemsView, Iterable, Iterator, + KeysView, Mapping, MutableMapping, MutableSet, Sequence, + ValuesView, ) from enum import Enum from typing import ( @@ -473,6 +476,42 @@ def FrozenDict(*args, **kwargs) -> Frozen: return Frozen(dict(*args, **kwargs)) +class FrozenMappingWarningOnValuesAccess(Frozen[K, V]): + """ + Class which behaves like a Mapping but warns if the values are accessed. + + Temporary object to aid in deprecation cycle of `Dataset.dims` (see GH issue #8496). + `Dataset.dims` is being changed from returning a mapping of dimension names to lengths to just + returning a frozen set of dimension names (to increase consistency with `DataArray.dims`). + This class retains backwards compatibility but raises a warning only if the return value + of ds.dims is used like a dictionary. + """ + + def _warn(self) -> None: + warnings.warn( + "The return type of `Dataset.dims` will be changed to return a set of dimension names in future, " + "in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, " + "please use `Dataset.sizes`.", + FutureWarning, + ) + + def __getitem__(self, key: K) -> V: + self._warn() + return super().__getitem__(key) + + def keys(self) -> KeysView[K]: + self._warn() + return super().keys() + + def items(self) -> ItemsView[K, V]: + self._warn() + return super().items() + + def values(self) -> ValuesView[V]: + self._warn() + return super().values() + + class HybridMappingProxy(Mapping[K, V]): """Implements the Mapping interface. Uses the wrapped mapping for item lookup and a separate wrapped keys collection for iteration. From 5a22dca0a43da78599b23d13815c1d82f6686f01 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 14:48:54 -0500 Subject: [PATCH 06/21] fix failing tests --- xarray/core/common.py | 2 +- xarray/core/dataset.py | 16 ++++++++-------- xarray/core/utils.py | 3 ++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index cebd8f2a95b..9956113d541 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1169,7 +1169,7 @@ def _dataset_indexer(dim: Hashable) -> DataArray: cond_wdim = cond.drop_vars( var for var in cond if dim not in cond[var].dims ) - keepany = cond_wdim.any(dim=(d for d in cond.dims.keys() if d != dim)) + keepany = cond_wdim.any(dim=(d for d in cond.dims if d != dim)) return keepany.to_dataarray().any("variable") _get_indexer = ( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index cca63afeca5..a6e4da4a184 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1427,7 +1427,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> Self: for v in variables.values(): needed_dims.update(v.dims) - dims = {k: self.dims[k] for k in needed_dims} + dims = {k: self.sizes[k] for k in needed_dims} # preserves ordering of coordinates for k in self._variables: @@ -2553,7 +2553,7 @@ def info(self, buf: IO | None = None) -> None: lines = [] lines.append("xarray.Dataset {") lines.append("dimensions:") - for name, size in self.sizes(): + for name, size in self.sizes.items(): lines.append(f"\t{name} = {size} ;") lines.append("\nvariables:") for name, da in self.variables.items(): @@ -4160,7 +4160,7 @@ def _rename_vars( return variables, coord_names def _rename_dims(self, name_dict: Mapping[Any, Hashable]) -> dict[Hashable, int]: - return {name_dict.get(k, k): v for k, v in self.dims.items()} + return {name_dict.get(k, k): v for k, v in self.sizes.items()} def _rename_indexes( self, name_dict: Mapping[Any, Hashable], dims_dict: Mapping[Any, Hashable] @@ -5179,7 +5179,7 @@ def _stack_once( if any(d in var.dims for d in dims): add_dims = [d for d in dims if d not in var.dims] vdims = list(var.dims) + add_dims - shape = [self.dims[d] for d in vdims] + shape = [self.sizes[d] for d in vdims] exp_var = var.set_dims(vdims, shape) stacked_var = exp_var.stack(**{new_dim: dims}) new_variables[name] = stacked_var @@ -6321,7 +6321,7 @@ def dropna( if subset is None: subset = iter(self.data_vars) - count = np.zeros(self.dims[dim], dtype=np.int64) + count = np.zeros(self.sizes[dim], dtype=np.int64) size = np.int_(0) # for type checking for k in subset: @@ -6329,7 +6329,7 @@ def dropna( if dim in array.dims: dims = [d for d in array.dims if d != dim] count += np.asarray(array.count(dims)) - size += math.prod([self.dims[d] for d in dims]) + size += math.prod([self.sizes[d] for d in dims]) if thresh is not None: mask = count >= thresh @@ -7106,7 +7106,7 @@ def _normalize_dim_order( f"Dataset: {list(self.dims)}" ) - ordered_dims = {k: self.dims[k] for k in dim_order} + ordered_dims = {k: self.sizes[k] for k in dim_order} return ordered_dims @@ -7366,7 +7366,7 @@ def to_dask_dataframe( var = self.variables[name] except KeyError: # dimension without a matching coordinate - size = self.dims[name] + size = self.sizes[name] data = da.arange(size, chunks=size, dtype=np.int64) var = Variable((name,), data) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index d49f922b59f..c0537ad5cba 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -484,7 +484,8 @@ class FrozenMappingWarningOnValuesAccess(Frozen[K, V]): `Dataset.dims` is being changed from returning a mapping of dimension names to lengths to just returning a frozen set of dimension names (to increase consistency with `DataArray.dims`). This class retains backwards compatibility but raises a warning only if the return value - of ds.dims is used like a dictionary. + of ds.dims is used like a dictionary (i.e. it doesn't raise a warning if used in a way that + would also be valid for a FrozenSet, e.g. iteration). """ def _warn(self) -> None: From 72eb62d2c753801add64e529c2104a13bd351f84 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 15:07:58 -0500 Subject: [PATCH 07/21] avoid some warnings in the docs --- doc/gallery/plot_cartopy_facetgrid.py | 2 +- doc/user-guide/interpolation.rst | 4 ++-- doc/user-guide/terminology.rst | 9 ++++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/doc/gallery/plot_cartopy_facetgrid.py b/doc/gallery/plot_cartopy_facetgrid.py index d8f5e73ee56..a4ab23c42a6 100644 --- a/doc/gallery/plot_cartopy_facetgrid.py +++ b/doc/gallery/plot_cartopy_facetgrid.py @@ -30,7 +30,7 @@ transform=ccrs.PlateCarree(), # the data's projection col="time", col_wrap=1, # multiplot settings - aspect=ds.dims["lon"] / ds.dims["lat"], # for a sensible figsize + aspect=ds.sizes["lon"] / ds.sizes["lat"], # for a sensible figsize subplot_kws={"projection": map_proj}, # the plot's projection ) diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 7b40962e826..311e1bf0129 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -292,8 +292,8 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.sizes["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.sizes["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst index ce7e55546a4..55937310827 100644 --- a/doc/user-guide/terminology.rst +++ b/doc/user-guide/terminology.rst @@ -47,9 +47,9 @@ complete examples, please consult the relevant documentation.* all but one of these degrees of freedom is fixed. We can think of each dimension axis as having a name, for example the "x dimension". In xarray, a ``DataArray`` object's *dimensions* are its named dimension - axes, and the name of the ``i``-th dimension is ``arr.dims[i]``. If an - array is created without dimension names, the default dimension names are - ``dim_0``, ``dim_1``, and so forth. + axes ``da.dims``, and the name of the ``i``-th dimension is ``da.dims[i]``. + If an array is created without specifying dimension names, the default dimension + names will be ``dim_0``, ``dim_1``, and so forth. Coordinate An array that labels a dimension or set of dimensions of another @@ -61,8 +61,7 @@ complete examples, please consult the relevant documentation.* ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be labeled by multiple coordinate arrays. However, only one coordinate array can be a assigned - as a particular dimension's dimension coordinate array. As a - consequence, ``len(arr.dims) <= len(arr.coords)`` in general. + as a particular dimension's dimension coordinate array. Dimension coordinate A one-dimensional coordinate array assigned to ``arr`` with both a name From e911155de4719c2189d2afc8272085243da7b2ae Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 15:11:48 -0500 Subject: [PATCH 08/21] silence warning caused by #8491 --- xarray/tests/test_variable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0bea3f63673..a2ae1e61cf2 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1705,6 +1705,7 @@ def test_broadcasting_math(self): v * w[0], Variable(["a", "b", "c", "d"], np.einsum("ab,cd->abcd", x, y[0])) ) + @pytest.mark.filterwarnings("ignore:Duplicate dimension names") def test_broadcasting_failures(self): a = Variable(["x"], np.arange(10)) b = Variable(["x"], np.arange(5)) From 2c9cb7de0b83c1d713f97b589ab0c539406d9130 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 15:19:47 -0500 Subject: [PATCH 09/21] fix another warning --- xarray/core/concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 8c558b38848..c7a7c178bd8 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -315,7 +315,7 @@ def _calc_concat_over(datasets, dim, dim_names, data_vars: T_DataVars, coords, c if dim in ds: ds = ds.set_coords(dim) concat_over.update(k for k, v in ds.variables.items() if dim in v.dims) - concat_dim_lengths.append(ds.dims.get(dim, 1)) + concat_dim_lengths.append(ds.sizes.get(dim, 1)) def process_subset_opt(opt, subset): if isinstance(opt, str): @@ -431,7 +431,7 @@ def _parse_datasets( variables_order: dict[Hashable, Variable] = {} # variables in order of appearance for ds in datasets: - dims_sizes.update(ds.dims) + dims_sizes.update(ds.sizes) all_coord_names.update(ds.coords) data_vars.update(ds.data_vars) variables_order.update(ds.variables) From d862f891f0114667c89e963840212df40e71b22f Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 15:51:48 -0500 Subject: [PATCH 10/21] typing of .get --- xarray/core/utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c0537ad5cba..61372513b2a 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -488,6 +488,8 @@ class FrozenMappingWarningOnValuesAccess(Frozen[K, V]): would also be valid for a FrozenSet, e.g. iteration). """ + __slots__ = ("mapping",) + def _warn(self) -> None: warnings.warn( "The return type of `Dataset.dims` will be changed to return a set of dimension names in future, " @@ -500,6 +502,18 @@ def __getitem__(self, key: K) -> V: self._warn() return super().__getitem__(key) + @overload + def get(self, key: K, /) -> V | None: + ... + + @overload + def get(self, key: K, /, default: V | T) -> V | T: + ... + + def get(self, key: K, default: T | None = None) -> V | T | None: + self._warn() + return super().get(key, default) + def keys(self) -> KeysView[K]: self._warn() return super().keys() From 225414cf8fec2edb168781a88d3c1555ee7a7835 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 16:19:56 -0500 Subject: [PATCH 11/21] fix various uses of ds.dims in tests --- xarray/tests/test_computation.py | 2 +- xarray/tests/test_dataset.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 396507652c6..0d9b7c88ae1 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1378,7 +1378,7 @@ def func(da): expected = extract(ds) actual = extract(ds.chunk()) - assert actual.dims == {"lon_new": 3, "lat_new": 6} + assert actual.sizes == {"lon_new": 3, "lat_new": 6} assert_identical(expected.chunk(), actual) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a53d81e36af..fe4c7f84570 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -694,10 +694,11 @@ def test_properties(self) -> None: # These exact types aren't public API, but this makes sure we don't # change them inadvertently: assert isinstance(ds.dims, utils.Frozen) + # TODO change after deprecation cycle in GH #8500 is complete assert isinstance(ds.dims.mapping, dict) assert type(ds.dims.mapping) is dict # noqa: E721 - assert ds.dims == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20} - assert ds.sizes == ds.dims + assert ds.dims == ds.sizes + assert ds.sizes == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20} # dtypes assert isinstance(ds.dtypes, utils.Frozen) @@ -804,7 +805,7 @@ def test_modify_inplace(self) -> None: b = Dataset() b["x"] = ("x", vec, attributes) assert_identical(a["x"], b["x"]) - assert a.dims == b.dims + assert a.sizes == b.sizes # this should work a["x"] = ("x", vec[:5]) a["z"] = ("x", np.arange(5)) @@ -865,7 +866,7 @@ def test_coords_properties(self) -> None: assert expected == actual # dims - assert coords.dims == {"x": 2, "y": 3} + assert coords.sizes == {"x": 2, "y": 3} # dtypes assert coords.dtypes == { @@ -1215,9 +1216,9 @@ def test_isel(self) -> None: assert list(data.dims) == list(ret.dims) for d in data.dims: if d in slicers: - assert ret.dims[d] == np.arange(data.dims[d])[slicers[d]].size + assert ret.sizes[d] == np.arange(data.sizes[d])[slicers[d]].size else: - assert data.dims[d] == ret.dims[d] + assert data.sizes[d] == ret.sizes[d] # Verify that the data is what we expect for v in data.variables: assert data[v].dims == ret[v].dims @@ -4971,7 +4972,7 @@ def test_pickle(self) -> None: roundtripped = pickle.loads(pickle.dumps(data)) assert_identical(data, roundtripped) # regression test for #167: - assert data.dims == roundtripped.dims + assert data.sizes == roundtripped.sizes def test_lazy_load(self) -> None: store = InaccessibleVariableDataStore() From 3391c832762857593593a20f6f564dff885601e2 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 16:22:55 -0500 Subject: [PATCH 12/21] fix some warnings --- xarray/core/formatting.py | 2 +- xarray/core/utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ea0e6275fb6..60cf2daf045 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -739,7 +739,7 @@ def dataset_repr(ds): def diff_dim_summary(a, b): - if a.dims != b.dims: + if a.sizes != b.sizes: return f"Differing dimensions:\n ({dim_summary(a)}) != ({dim_summary(b)})" else: return "" diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 61372513b2a..66af2bcad76 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -497,6 +497,7 @@ def _warn(self) -> None: "please use `Dataset.sizes`.", FutureWarning, ) + raise NotImplementedError() def __getitem__(self, key: K) -> V: self._warn() From 93f8caa6abcfbc66caa93a98f5685fd5ba4f2216 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 16:25:26 -0500 Subject: [PATCH 13/21] add test that FutureWarnings are correctly raised --- xarray/tests/test_dataset.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index fe4c7f84570..bb27091f5ee 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -750,6 +750,27 @@ def test_properties(self) -> None: == 16 ) + def test_warn_ds_dims_deprecation(self) -> None: + # TODO remove after deprecation cycle in GH #8500 is complete + ds = create_test_data() + + with pytest.warns(FutureWarning, match="return type"): + ds.dims["dim1"] + + with pytest.warns(FutureWarning, match="return type"): + ds.dims.keys() + + with pytest.warns(FutureWarning, match="return type"): + ds.dims.values() + + with pytest.warns(FutureWarning, match="return type"): + ds.dims.items() + + with assert_no_warnings(): + len(ds.dims) + ds.dims.__iter__() + "dim1" in ds.dims + def test_asarray(self) -> None: ds = Dataset({"x": 0}) with pytest.raises(TypeError, match=r"cannot directly convert"): From c0aa49195c9f1dc9a99544641eb4677cc16317d5 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 16:32:44 -0500 Subject: [PATCH 14/21] more fixes to avoid warnings --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a6e4da4a184..5a260fa6f1e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7439,7 +7439,7 @@ def to_dict( d: dict = { "coords": {}, "attrs": decode_numpy_dict_values(self.attrs), - "dims": dict(self.dims), + "dims": dict(self.sizes), "data_vars": {}, } for k in self.coords: From e4d45ac1bf39bc12293d53302ac2136c15652828 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 16:33:52 -0500 Subject: [PATCH 15/21] update tests to avoid warnings --- xarray/tests/test_concat.py | 4 ++-- xarray/tests/test_coordinates.py | 3 ++- xarray/tests/test_dataset.py | 12 ++++++------ xarray/tests/test_missing.py | 2 +- xarray/tests/test_rolling.py | 12 ++++++------ 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 92415631748..d1fc085bf0f 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -509,7 +509,7 @@ def test_concat_coords_kwarg(self, data, dim, coords) -> None: actual = concat(datasets, data[dim], coords=coords) if coords == "all": - expected = np.array([data["extra"].values for _ in range(data.dims[dim])]) + expected = np.array([data["extra"].values for _ in range(data.sizes[dim])]) assert_array_equal(actual["extra"].values, expected) else: @@ -1214,7 +1214,7 @@ def test_concat_preserve_coordinate_order() -> None: # check dimension order for act, exp in zip(actual.dims, expected.dims): assert act == exp - assert actual.dims[act] == expected.dims[exp] + assert actual.sizes[act] == expected.sizes[exp] # check coordinate order for act, exp in zip(actual.coords, expected.coords): diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index ef73371dfe4..68ce55b05da 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -79,9 +79,10 @@ def test_from_pandas_multiindex(self) -> None: for name in ("x", "one", "two"): assert_identical(expected[name], coords.variables[name]) + @pytest.mark.filterwarnings("ignore:return type") def test_dims(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) - assert coords.dims == {"x": 3} + assert set(coords.dims) == {"x"} def test_sizes(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index bb27091f5ee..c02977ec995 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1273,19 +1273,19 @@ def test_isel(self) -> None: assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore")) ret = data.isel(dim1=0) - assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims + assert {"time": 20, "dim2": 9, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) - assert {"time": 2, "dim2": 5, "dim3": 10} == ret.dims + assert {"time": 2, "dim2": 5, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=0, dim1=0, dim2=slice(5)) - assert {"dim2": 5, "dim3": 10} == ret.dims + assert {"dim2": 5, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(list(ret.xindexes) + ["time"]) @@ -5451,7 +5451,7 @@ def test_reduce_non_numeric(self) -> None: data2 = create_test_data(seed=44) add_vars = {"var4": ["dim1", "dim2"], "var5": ["dim1"]} for v, dims in sorted(add_vars.items()): - size = tuple(data1.dims[d] for d in dims) + size = tuple(data1.sizes[d] for d in dims) data = np.random.randint(0, 100, size=size).astype(np.str_) data1[v] = (dims, data, {"foo": "variable"}) @@ -6500,7 +6500,7 @@ def test_pad(self) -> None: assert padded["var1"].shape == (8, 11) assert padded["var2"].shape == (8, 11) assert padded["var3"].shape == (10, 8) - assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} + assert dict(padded.sizes) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) @@ -7195,7 +7195,7 @@ def test_clip(ds) -> None: assert all((result.max(...) <= 0.75).values()) result = ds.clip(min=ds.mean("y"), max=ds.mean("y")) - assert result.dims == ds.dims + assert result.sizes == ds.sizes class TestDropDuplicates: diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 20a54c3ed53..45a649605f3 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -548,7 +548,7 @@ def test_ffill_limit(): def test_interpolate_dataset(ds): actual = ds.interpolate_na(dim="time") # no missing values in var1 - assert actual["var1"].count("time") == actual.dims["time"] + assert actual["var1"].count("time") == actual.sizes["time"] # var2 should be the same as it was assert_array_equal(actual["var2"], ds["var2"]) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index cb7b723a208..4e1a7f9501b 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -217,7 +217,7 @@ def test_rolling_reduce(self, da, center, min_periods, window, name) -> None: actual = rolling_obj.reduce(getattr(np, "nan%s" % name)) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @@ -237,7 +237,7 @@ def test_rolling_reduce_nonnumeric(self, center, min_periods, window, name) -> N actual = rolling_obj.reduce(getattr(np, "nan%s" % name)) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes def test_rolling_count_correct(self) -> None: da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") @@ -291,7 +291,7 @@ def test_ndrolling_reduce(self, da, center, min_periods, name) -> None: )() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes if name in ["mean"]: # test our reimplementation of nanmean using np.nanmean @@ -700,7 +700,7 @@ def test_rolling_reduce(self, ds, center, min_periods, window, name) -> None: actual = rolling_obj.reduce(getattr(np, "nan%s" % name)) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) - assert ds.dims == actual.dims + assert ds.sizes == actual.sizes # make sure the order of data_var are not changed. assert list(ds.data_vars.keys()) == list(actual.data_vars.keys()) @@ -727,7 +727,7 @@ def test_ndrolling_reduce(self, ds, center, min_periods, name, dask) -> None: name, )() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes # Do it in the opposite order expected = getattr( @@ -738,7 +738,7 @@ def test_ndrolling_reduce(self, ds, center, min_periods, name, dask) -> None: )() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes @pytest.mark.parametrize("center", (True, False, (True, False))) @pytest.mark.parametrize("fill_value", (np.nan, 0.0)) From abc4a20a5db6903fe65f476113665af11f93f10f Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 1 Dec 2023 22:08:12 -0500 Subject: [PATCH 16/21] yet more fixes to avoid warnings --- xarray/core/dataset.py | 2 +- xarray/core/formatting_html.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5a260fa6f1e..a28f9c8401d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3936,7 +3936,7 @@ def maybe_variable(obj, k): try: return obj._variables[k] except KeyError: - return as_variable((k, range(obj.dims[k]))) + return as_variable((k, range(obj.sizes[k]))) def _validate_interp_indexer(x, new_x): # In the case of datetimes, the restrictions placed on indexers diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 3627554cf57..efd74111823 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -37,17 +37,18 @@ def short_data_repr_html(array) -> str: return f"
{text}
" -def format_dims(dims, dims_with_index) -> str: - if not dims: +def format_dims(dim_sizes, dims_with_index) -> str: + if not dim_sizes: return "" dim_css_map = { - dim: " class='xr-has-index'" if dim in dims_with_index else "" for dim in dims + dim: " class='xr-has-index'" if dim in dims_with_index else "" + for dim in dim_sizes } dims_li = "".join( f"
  • " f"{escape(str(dim))}: {size}
  • " - for dim, size in dims.items() + for dim, size in dim_sizes.items() ) return f"
      {dims_li}
    " @@ -204,7 +205,7 @@ def _mapping_section( def dim_section(obj) -> str: - dim_list = format_dims(obj.dims, obj.xindexes.dims) + dim_list = format_dims(obj.sizes, obj.xindexes.dims) return collapsible_section( "Dimensions", inline_details=dim_list, enabled=False, collapsed=True From ef11e77302e319fc2831d2b290ac8c3d698f175c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 2 Dec 2023 00:24:51 -0500 Subject: [PATCH 17/21] also warn in groupby.dims --- xarray/core/groupby.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8c81d3e6a96..15bd8d1e35b 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -36,6 +36,7 @@ from xarray.core.pycompat import integer_types from xarray.core.types import Dims, QuantileMethods, T_DataArray, T_Xarray from xarray.core.utils import ( + FrozenMappingWarningOnValuesAccess, either_dict_or_kwargs, hashable, is_scalar, @@ -1519,7 +1520,7 @@ def dims(self) -> Frozen[Hashable, int]: if self._dims is None: self._dims = self._obj.isel({self._group_dim: self._group_indices[0]}).dims - return self._dims + return FrozenMappingWarningOnValuesAccess(self._dims) def map( self, From 965673bfada4ffbee76197619fc9d5bc0ba0d14c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 2 Dec 2023 00:25:09 -0500 Subject: [PATCH 18/21] change groupby tests to match --- xarray/tests/test_groupby.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b166992deb1..84820d56c45 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -59,6 +59,7 @@ def test_consolidate_slices() -> None: _consolidate_slices([slice(3), 4]) # type: ignore[list-item] +@pytest.mark.filterwarnings("ignore:return type") def test_groupby_dims_property(dataset) -> None: assert dataset.groupby("x").dims == dataset.isel(x=1).dims assert dataset.groupby("y").dims == dataset.isel(y=1).dims @@ -67,6 +68,14 @@ def test_groupby_dims_property(dataset) -> None: assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims +def test_groupby_sizes_property(dataset) -> None: + assert dataset.groupby("x").sizes == dataset.isel(x=1).sizes + assert dataset.groupby("y").sizes == dataset.isel(y=1).sizes + + stacked = dataset.stack({"xy": ("x", "y")}) + assert stacked.groupby("xy").sizes == stacked.isel(xy=0).sizes + + def test_multi_index_groupby_map(dataset) -> None: # regression test for GH873 ds = dataset.isel(z=1, drop=True)[["foo"]] From 27f76423d36e1202d6f0e3f4b73a282c06d2c45c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 2 Dec 2023 00:29:03 -0500 Subject: [PATCH 19/21] update whatsnew to include groupby deprecation --- doc/whats-new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 605701664cf..48ee1d5a995 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,7 @@ Deprecations from a mapping of dimension names to lengths to a set of dimension names. This is to increase consistency with :py:meth:`DataArray.dims`. To access a mapping of dimension names to lengths please use :py:meth:`Dataset.sizes`. + The same change also applies to `DatasetGroupBy.dims`. (:issue:`8496`, :pull:`8500`) By `Tom Nicholas `_. From 3406cc7dcd39331a4379add0a6e505a1d2f0b5ca Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 2 Dec 2023 00:29:24 -0500 Subject: [PATCH 20/21] filter warning when we actually test ds.dims --- xarray/tests/test_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c02977ec995..177dba34bac 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -687,6 +687,7 @@ class CustomIndex(Index): # test coordinate variables copied assert ds.variables["x"] is not coords.variables["x"] + @pytest.mark.filterwarnings("ignore:return type") def test_properties(self) -> None: ds = create_test_data() From 726f390bd75269391b86120f6778be657f02b2ac Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 2 Dec 2023 00:31:56 -0500 Subject: [PATCH 21/21] remove error I used for debugging --- xarray/core/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 66af2bcad76..61372513b2a 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -497,7 +497,6 @@ def _warn(self) -> None: "please use `Dataset.sizes`.", FutureWarning, ) - raise NotImplementedError() def __getitem__(self, key: K) -> V: self._warn()