Skip to content

Commit b8aaa53

Browse files
max-sixtypre-commit-ci[bot]headtr1ck
authored
Add a .drop_attrs method (#8258)
* Add a `.drop_attrs` method Part of #3891 * Add tests * Add explicit coords test * Use `._replace` for half the method * . * Add a `deep` kwarg (default `True`?) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * api * Update xarray/core/dataarray.py Co-authored-by: Michael Niklas <[email protected]> * Update xarray/core/dataset.py Co-authored-by: Michael Niklas <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michael Niklas <[email protected]>
1 parent a69815f commit b8aaa53

File tree

6 files changed

+118
-0
lines changed

6 files changed

+118
-0
lines changed

doc/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Dataset contents
111111
Dataset.drop_duplicates
112112
Dataset.drop_dims
113113
Dataset.drop_encoding
114+
Dataset.drop_attrs
114115
Dataset.set_coords
115116
Dataset.reset_coords
116117
Dataset.convert_calendar
@@ -306,6 +307,7 @@ DataArray contents
306307
DataArray.drop_indexes
307308
DataArray.drop_duplicates
308309
DataArray.drop_encoding
310+
DataArray.drop_attrs
309311
DataArray.reset_coords
310312
DataArray.copy
311313
DataArray.convert_calendar

doc/whats-new.rst

+4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ New Features
2626
By `Martin Raspaud <https://github.com/mraspaud>`_.
2727
- Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`).
2828
By `Justus Magin <https://github.com/keewis>`_.
29+
- Add :py:meth:`DataArray.drop_attrs` & :py:meth:`Dataset.drop_attrs` methods,
30+
to return an object without ``attrs``. A ``deep`` parameter controls whether
31+
variables' ``attrs`` are also dropped.
32+
By `Maximilian Roos <https://github.com/max-sixty>`_. (:pull:`8288`)
2933

3034
Breaking changes
3135
~~~~~~~~~~~~~~~~

xarray/core/dataarray.py

+17
Original file line numberDiff line numberDiff line change
@@ -7456,3 +7456,20 @@ def to_dask_dataframe(
74567456
# this needs to be at the end, or mypy will confuse with `str`
74577457
# https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
74587458
str = utils.UncachedAccessor(StringAccessor["DataArray"])
7459+
7460+
def drop_attrs(self, *, deep: bool = True) -> Self:
7461+
"""
7462+
Removes all attributes from the DataArray.
7463+
7464+
Parameters
7465+
----------
7466+
deep : bool, default True
7467+
Removes attributes from coordinates.
7468+
7469+
Returns
7470+
-------
7471+
DataArray
7472+
"""
7473+
return (
7474+
self._to_temp_dataset().drop_attrs(deep=deep).pipe(self._from_temp_dataset)
7475+
)

xarray/core/dataset.py

+42
Original file line numberDiff line numberDiff line change
@@ -10680,3 +10680,45 @@ def resample(
1068010680
restore_coord_dims=restore_coord_dims,
1068110681
**indexer_kwargs,
1068210682
)
10683+
10684+
def drop_attrs(self, *, deep: bool = True) -> Self:
10685+
"""
10686+
Removes all attributes from the Dataset and its variables.
10687+
10688+
Parameters
10689+
----------
10690+
deep : bool, default True
10691+
Removes attributes from all variables.
10692+
10693+
Returns
10694+
-------
10695+
Dataset
10696+
"""
10697+
# Remove attributes from the dataset
10698+
self = self._replace(attrs={})
10699+
10700+
if not deep:
10701+
return self
10702+
10703+
# Remove attributes from each variable in the dataset
10704+
for var in self.variables:
10705+
# variables don't have a `._replace` method, so we copy and then remove
10706+
# attrs. If we added a `._replace` method, we could use that instead.
10707+
if var not in self.indexes:
10708+
self[var] = self[var].copy()
10709+
self[var].attrs = {}
10710+
10711+
new_idx_variables = {}
10712+
# Not sure this is the most elegant way of doing this, but it works.
10713+
# (Should we have a more general "map over all variables, including
10714+
# indexes" approach?)
10715+
for idx, idx_vars in self.xindexes.group_by_index():
10716+
# copy each coordinate variable of an index and drop their attrs
10717+
temp_idx_variables = {k: v.copy() for k, v in idx_vars.items()}
10718+
for v in temp_idx_variables.values():
10719+
v.attrs = {}
10720+
# re-wrap the index object in new coordinate variables
10721+
new_idx_variables.update(idx.create_variables(temp_idx_variables))
10722+
self = self.assign(new_idx_variables)
10723+
10724+
return self

xarray/tests/test_dataarray.py

+5
Original file line numberDiff line numberDiff line change
@@ -2980,6 +2980,11 @@ def test_assign_attrs(self) -> None:
29802980
assert_identical(new_actual, expected)
29812981
assert actual.attrs == {"a": 1, "b": 2}
29822982

2983+
def test_drop_attrs(self) -> None:
2984+
# Mostly tested in test_dataset.py, but adding a very small test here
2985+
da = DataArray([], attrs=dict(a=1, b=2))
2986+
assert da.drop_attrs().attrs == {}
2987+
29832988
@pytest.mark.parametrize(
29842989
"func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs]
29852990
)

xarray/tests/test_dataset.py

+48
Original file line numberDiff line numberDiff line change
@@ -4450,6 +4450,54 @@ def test_assign_attrs(self) -> None:
44504450
assert_identical(new_actual, expected)
44514451
assert actual.attrs == dict(a=1, b=2)
44524452

4453+
def test_drop_attrs(self) -> None:
4454+
# Simple example
4455+
ds = Dataset().assign_attrs(a=1, b=2)
4456+
original = ds.copy()
4457+
expected = Dataset()
4458+
result = ds.drop_attrs()
4459+
assert_identical(result, expected)
4460+
4461+
# Doesn't change original
4462+
assert_identical(ds, original)
4463+
4464+
# Example with variables and coords with attrs, and a multiindex. (arguably
4465+
# should have used a canonical dataset with all the features we're should
4466+
# support...)
4467+
var = Variable("x", [1, 2, 3], attrs=dict(x=1, y=2))
4468+
idx = IndexVariable("y", [1, 2, 3], attrs=dict(c=1, d=2))
4469+
mx = xr.Coordinates.from_pandas_multiindex(
4470+
pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["d", "e"]), "z"
4471+
)
4472+
ds = Dataset(dict(var1=var), coords=dict(y=idx, z=mx)).assign_attrs(a=1, b=2)
4473+
assert ds.attrs != {}
4474+
assert ds["var1"].attrs != {}
4475+
assert ds["y"].attrs != {}
4476+
assert ds.coords["y"].attrs != {}
4477+
4478+
original = ds.copy(deep=True)
4479+
result = ds.drop_attrs()
4480+
4481+
assert result.attrs == {}
4482+
assert result["var1"].attrs == {}
4483+
assert result["y"].attrs == {}
4484+
assert list(result.data_vars) == list(ds.data_vars)
4485+
assert list(result.coords) == list(ds.coords)
4486+
4487+
# Doesn't change original
4488+
assert_identical(ds, original)
4489+
# Specifically test that the attrs on the coords are still there. (The index
4490+
# can't currently contain `attrs`, so we can't test those.)
4491+
assert ds.coords["y"].attrs != {}
4492+
4493+
# Test for deep=False
4494+
result_shallow = ds.drop_attrs(deep=False)
4495+
assert result_shallow.attrs == {}
4496+
assert result_shallow["var1"].attrs != {}
4497+
assert result_shallow["y"].attrs != {}
4498+
assert list(result.data_vars) == list(ds.data_vars)
4499+
assert list(result.coords) == list(ds.coords)
4500+
44534501
def test_assign_multiindex_level(self) -> None:
44544502
data = create_test_multiindex()
44554503
with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "):

0 commit comments

Comments
 (0)