diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4cd24a54fc8..947e0aaea2a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,12 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- The :py:class:`Coordinates` constructor now creates a (pandas) index by + default for each dimension coordinate. To keep the previous behavior (no index + created), pass an empty dictionary to ``indexes``. The constructor now also + extracts and add the indexes from another :py:class:`Coordinates` object + passed via ``coords`` (:pull:`8107`). + By `BenoƮt Bovy `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index f03d98f781a..883cc19e215 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -18,6 +18,7 @@ from xarray.core.indexes import ( Index, Indexes, + PandasIndex, PandasMultiIndex, assert_no_index_corrupted, create_default_index_implicit, @@ -193,22 +194,69 @@ class Coordinates(AbstractCoordinates): Coordinates are either: - returned via the :py:attr:`Dataset.coords` and :py:attr:`DataArray.coords` - properties. - - built from index objects (e.g., :py:meth:`Coordinates.from_pandas_multiindex`). - - built directly from coordinate data and index objects (beware that no consistency - check is done on those inputs). - - In the latter case, no default (pandas) index is created. + properties + - built from Pandas or other index objects + (e.g., :py:meth:`Coordinates.from_pandas_multiindex`) + - built directly from coordinate data and Xarray ``Index`` objects (beware that + no consistency check is done on those inputs) Parameters ---------- - coords: dict-like - Mapping where keys are coordinate names and values are objects that - can be converted into a :py:class:`~xarray.Variable` object - (see :py:func:`~xarray.as_variable`). - indexes: dict-like - Mapping of where keys are coordinate names and values are - :py:class:`~xarray.indexes.Index` objects. + coords: dict-like, optional + Mapping where keys are coordinate names and values are objects that + can be converted into a :py:class:`~xarray.Variable` object + (see :py:func:`~xarray.as_variable`). If another + :py:class:`~xarray.Coordinates` object is passed, its indexes + will be added to the new created object. + indexes: dict-like, optional + Mapping of where keys are coordinate names and values are + :py:class:`~xarray.indexes.Index` objects. If None (default), + pandas indexes will be created for each dimension coordinate. + Passing an empty dictionary will skip this default behavior. + + Examples + -------- + Create a dimension coordinate with a default (pandas) index: + + >>> xr.Coordinates({"x": [1, 2]}) + Coordinates: + * x (x) int64 1 2 + + Create a dimension coordinate with no index: + + >>> xr.Coordinates(coords={"x": [1, 2]}, indexes={}) + Coordinates: + x (x) int64 1 2 + + Create a new Coordinates object from existing dataset coordinates + (indexes are passed): + + >>> ds = xr.Dataset(coords={"x": [1, 2]}) + >>> xr.Coordinates(ds.coords) + Coordinates: + * x (x) int64 1 2 + + Create indexed coordinates from a ``pandas.MultiIndex`` object: + + >>> midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]]) + >>> xr.Coordinates.from_pandas_multiindex(midx, "x") + Coordinates: + * x (x) object MultiIndex + * x_level_0 (x) object 'a' 'a' 'b' 'b' + * x_level_1 (x) int64 0 1 0 1 + + Create a new Dataset object by passing a Coordinates object: + + >>> midx_coords = xr.Coordinates.from_pandas_multiindex(midx, "x") + >>> xr.Dataset(coords=midx_coords) + + Dimensions: (x: 4) + Coordinates: + * x (x) object MultiIndex + * x_level_0 (x) object 'a' 'a' 'b' 'b' + * x_level_1 (x) int64 0 1 0 1 + Data variables: + *empty* """ @@ -228,17 +276,40 @@ def __init__( from xarray.core.dataset import Dataset if coords is None: - variables = {} - elif isinstance(coords, Coordinates): + coords = {} + + variables: dict[Hashable, Variable] + default_indexes: dict[Hashable, PandasIndex] = {} + coords_obj_indexes: dict[Hashable, Index] = {} + + if isinstance(coords, Coordinates): + if indexes is not None: + raise ValueError( + "passing both a ``Coordinates`` object and a mapping of indexes " + "to ``Coordinates.__init__`` is not allowed " + "(this constructor does not support merging them)" + ) variables = {k: v.copy() for k, v in coords.variables.items()} + coords_obj_indexes = dict(coords.xindexes) else: - variables = {k: as_variable(v) for k, v in coords.items()} + variables = {} + for name, data in coords.items(): + var = as_variable(data, name=name) + if var.dims == (name,) and indexes is None: + index, index_vars = create_default_index_implicit(var, list(coords)) + default_indexes.update({k: index for k in index_vars}) + variables.update(index_vars) + else: + variables[name] = var if indexes is None: indexes = {} else: indexes = dict(indexes) + indexes.update(default_indexes) + indexes.update(coords_obj_indexes) + no_coord_index = set(indexes) - set(variables) if no_coord_index: raise ValueError( diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index bf68a5c1838..96eb9b045d2 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -17,6 +17,17 @@ def test_init_noindex(self) -> None: expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) assert_identical(coords.to_dataset(), expected) + def test_init_default_index(self) -> None: + coords = Coordinates(coords={"x": [1, 2]}) + expected = Dataset(coords={"x": [1, 2]}) + assert_identical(coords.to_dataset(), expected) + assert "x" in coords.xindexes + + def test_init_no_default_index(self) -> None: + # dimension coordinate with no default index (explicit) + coords = Coordinates(coords={"x": [1, 2]}, indexes={}) + assert "x" not in coords.xindexes + def test_init_from_coords(self) -> None: expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) coords = Coordinates(coords=expected.coords) @@ -25,10 +36,19 @@ def test_init_from_coords(self) -> None: # test variables copied assert coords.variables["foo"] is not expected.variables["foo"] - # default index - expected = Dataset(coords={"x": ("x", [0, 1, 2])}) - coords = Coordinates(coords=expected.coords, indexes=expected.xindexes) + # test indexes are extracted + expected = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=expected.coords) assert_identical(coords.to_dataset(), expected) + assert expected.xindexes == coords.xindexes + + # coords + indexes not supported + with pytest.raises( + ValueError, match="passing both.*Coordinates.*indexes.*not allowed" + ): + coords = Coordinates( + coords=expected.coords, indexes={"x": PandasIndex([0, 1, 2], "x")} + ) def test_init_empty(self) -> None: coords = Coordinates() @@ -60,37 +80,31 @@ def test_from_pandas_multiindex(self) -> None: assert_identical(expected[name], coords.variables[name]) def test_dims(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.dims == {"x": 3} def test_sizes(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.sizes == {"x": 3} def test_dtypes(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.dtypes == {"x": int} def test_getitem(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert_identical( coords["x"], DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"), ) def test_delitem(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) del coords["x"] assert "x" not in coords def test_update(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) coords.update({"y": ("y", [4, 5, 6])}) assert "y" in coords @@ -99,18 +113,16 @@ def test_update(self) -> None: assert_identical(coords["y"], expected) def test_equals(self): - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.equals(coords) - assert not coords.equals("no_a_coords") + assert not coords.equals("not_a_coords") def test_identical(self): - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.identical(coords) - assert not coords.identical("no_a_coords") + assert not coords.identical("not_a_coords") def test_copy(self) -> None: no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) @@ -129,8 +141,7 @@ def test_copy(self) -> None: assert source_ndarray(v0.data) is not source_ndarray(v1.data) def test_align(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) left = coords diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 183c0ad7371..2dcbe976525 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -489,7 +489,7 @@ def test_constructor_dask_coords(self) -> None: def test_constructor_no_default_index(self) -> None: # explicitly passing a Coordinates object skips the creation of default index - da = DataArray(range(3), coords=Coordinates({"x": ("x", [1, 2, 3])})) + da = DataArray(range(3), coords=Coordinates({"x": [1, 2, 3]}, indexes={})) assert "x" in da.coords assert "x" not in da.xindexes @@ -1585,7 +1585,7 @@ class CustomIndex(Index): assert isinstance(actual.xindexes["x"], CustomIndex) def test_assign_coords_no_default_index(self) -> None: - coords = Coordinates({"y": ("y", [1, 2, 3])}) + coords = Coordinates({"y": [1, 2, 3]}, indexes={}) da = DataArray([1, 2, 3], dims="y") actual = da.assign_coords(coords) assert_identical(actual.coords, coords, check_default_indexes=False) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5304c54971a..a9a7b4156cc 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -636,7 +636,7 @@ def test_constructor_with_coords(self) -> None: def test_constructor_no_default_index(self) -> None: # explicitly passing a Coordinates object skips the creation of default index - ds = Dataset(coords=Coordinates({"x": ("x", [1, 2, 3])})) + ds = Dataset(coords=Coordinates({"x": [1, 2, 3]}, indexes={})) assert "x" in ds assert "x" not in ds.xindexes @@ -4298,7 +4298,7 @@ class CustomIndex(Index): assert isinstance(actual.xindexes["x"], CustomIndex) def test_assign_coords_no_default_index(self) -> None: - coords = Coordinates({"y": ("y", [1, 2, 3])}) + coords = Coordinates({"y": [1, 2, 3]}, indexes={}) ds = Dataset() actual = ds.assign_coords(coords) expected = coords.to_dataset()