Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Option to not auto-create index during expand_dims #8960

Merged
merged 18 commits into from
Apr 27, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4497,6 +4497,7 @@ def expand_dims(
self,
dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
axis: None | int | Sequence[int] = None,
create_1d_index: bool = True,
**dim_kwargs: Any,
) -> Self:
"""Return a new object with an additional axis (or axes) inserted at
Expand All @@ -4506,6 +4507,9 @@ def expand_dims(
If dim is already a scalar coordinate, it will be promoted to a 1D
coordinate consisting of a single value.

The automatic creation of indexes to back new 1D coordinate variables
controlled by the create_1d_index kwarg.

Parameters
----------
dim : hashable, sequence of hashable, mapping, or None
Expand All @@ -4521,6 +4525,8 @@ def expand_dims(
multiple axes are inserted. In this case, dim arguments should be
same length list. If axis=None is passed, all the axes will be
inserted to the start of the result array.
create_1d_index : bool, default is True
Whether to create new PandasIndex objects for any new 1D coordinate variables.
**dim_kwargs : int or sequence or ndarray
The keywords are arbitrary dimensions being inserted and the values
are either the lengths of the new dims (if int is given), or their
Expand Down Expand Up @@ -4640,6 +4646,8 @@ def expand_dims(
# save the coordinates to the variables dict, and set the
# value within the dim dict to the length of the iterable
# for later use.

# TODO should we have an option to not create a variable here?
index = PandasIndex(v, k)
indexes[k] = index
variables.update(index.create_variables())
Expand Down Expand Up @@ -4678,11 +4686,16 @@ def expand_dims(
variables[k] = v.set_dims(dict(all_dims))
else:
if k not in variables:
# If dims includes a label of a non-dimension coordinate,
# it will be promoted to a 1D coordinate with a single value.
index, index_vars = create_default_index_implicit(v.set_dims(k))
indexes[k] = index
variables.update(index_vars)
if k in coord_names and create_1d_index:
# If dims includes a label of a non-dimension coordinate,
# it will be promoted to a 1D coordinate with a single value.
index, index_vars = create_default_index_implicit(v.set_dims(k))
indexes[k] = index
variables.update(index_vars)
else:
# create 1D variable without creating a new index
new_1d_var = v.set_dims(k)
variables.update({k: new_1d_var})

return self._replace_with_new_dims(
variables, coord_names=coord_names, indexes=indexes
Expand Down
30 changes: 30 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3431,6 +3431,36 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
)
assert_identical(other_way_expected, other_way)

@pytest.mark.parametrize("create_1d_index_flag", [True, False])
def test_expand_dims_create_index_data_variable(self, create_1d_index_flag):
# data variables should not gain an index ever
ds = Dataset({"x": 0})
expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag)

# TODO I can't just create the expected dataset directly using constructor because of GH issue 8959
# expected = Dataset(data_vars={"x": ("x", [0])})
expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
TomNicholas marked this conversation as resolved.
Show resolved Hide resolved

# TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
# assert_identical(expanded, expected)
assert expected.data_vars == {"x": Variable(data=[0], dims=["x"])}
assert expanded.indexes == {}

def test_expand_dims_create_index_coordinate_variable(self):
# coordinate variables should gain an index only if create_1d_index is True (the default)
ds = Dataset(coords={"x": 0})
expanded = ds.expand_dims("x")
expected = Dataset({"x": ("x", [0])})
assert_identical(expanded, expected)

expanded_no_index = ds.expand_dims("x", create_1d_index=False)
expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")

# TODO also can't just assert equivalence because it will fail internal invariants default indexes checks
# assert_identical(expanded, expected)
assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])}
assert expanded_no_index.indexes == {}

@requires_pandas_version_two
def test_expand_dims_non_nanosecond_conversion(self) -> None:
# Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000
Expand Down
Loading