From 8793cb9636fb7ae3f384989198229523f16b8a52 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Tue, 19 Feb 2019 16:19:12 +1100 Subject: [PATCH 1/3] Better names for concat arrays --- doc/whats-new.rst | 4 ++++ xarray/core/combine.py | 15 +++++---------- xarray/tests/test_combine.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9ac671d5858..e9fcc5ce48c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -82,6 +82,10 @@ Bug fixes - Silenced warnings that appear when using pandas 0.24. By `Stephan Hoyer `_ +- Concatenating a sequence of :py:class:`~xarray.DataArray` with varying names + sets the name of the output array to ``None``, instead of the name of the + first input array. + (:issue:`2775`). By `Zac Hatfield-Dodds `_. - Interpolating via resample now internally specifies ``bounds_error=False`` as an argument to ``scipy.interpolate.interp1d``, allowing for interpolation from higher frequencies to lower frequencies. Datapoints outside the bounds diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 11961dff520..fea10e13e52 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -6,6 +6,7 @@ from . import utils from .alignment import align +from .computation import result_name from .merge import merge from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -323,16 +324,10 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat, raise ValueError('data_vars is not a valid argument when ' 'concatenating DataArray objects') - datasets = [] - for n, arr in enumerate(arrays): - if n == 0: - name = arr.name - elif name != arr.name: - if compat == 'identical': - raise ValueError('array names not identical') - else: - arr = arr.rename(name) - datasets.append(arr._to_temp_dataset()) + name = result_name(arrays) + if name is None and compat == 'identical': + raise ValueError('array names not identical') + datasets = [arr.rename(name)._to_temp_dataset() for arr in arrays] ds = _dataset_concat(datasets, dim, data_vars, coords, compat, positions) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index c37abc98f07..628423328f3 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -297,6 +297,16 @@ def test_concat_lazy(self): assert combined.shape == (2, 3, 3) assert combined.dims == ('z', 'x', 'y') + def test_concat_names(self): + ds = Dataset({'foo': (['x', 'y'], np.random.random((2, 2))), + 'bar': (['x', 'y'], np.random.random((2, 2)))}) + # Concat arrays with different names, new name is None + new = concat([ds.foo, ds.bar], dim='new') + assert new.name is None + # Concat arrays with same name, name is preserved + foobar = ds.foo.rename('bar') + assert concat([foobar, ds.bar], dim='new').name == 'bar' + class TestAutoCombine(object): From 681d082fcee114e09f1b1bcfa9cf4923806fa32f Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Tue, 19 Feb 2019 16:38:37 +1100 Subject: [PATCH 2/3] Infer coord for array concat This is really nice to have when using concat to produce faceted plots of various kinds, and harmless when it's useless. --- doc/whats-new.rst | 3 +++ xarray/core/combine.py | 17 +++++++++++++++-- xarray/tests/test_combine.py | 14 ++++++++++++-- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e9fcc5ce48c..2c87e0cab88 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,6 +40,9 @@ Enhancements `Spencer Clark `_. - Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`) By `Ryan Abernathey `_ +- Use new dimension name and unique array names to create a new coordinate + when concatenating arrays, if no coordinates are given. + (:issue:`2775`). By `Zac Hatfield-Dodds `_. - :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` are newly added. See :ref:`comput.coarsen` for details. diff --git a/xarray/core/combine.py b/xarray/core/combine.py index fea10e13e52..3c4994dd8eb 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -325,10 +325,23 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat, 'concatenating DataArray objects') name = result_name(arrays) - if name is None and compat == 'identical': - raise ValueError('array names not identical') + names = [arr.name for arr in arrays] + if compat == 'identical' and len(set(names)) != 1: + raise ValueError( + "compat='identical', but array names {!r} are not identical" + .format(names if len(names) <= 10 else sorted(set(names))) + ) datasets = [arr.rename(name)._to_temp_dataset() for arr in arrays] + if ( + isinstance(dim, str) + and len(set(names) - {None}) == len(names) + and not any(dim in a.dims or dim in a.coords for a in arrays) + ): + # We're concatenating arrays with unique non-None names along + # a new dimension, so we use the existing names as coordinates. + dim = pd.Index(names, name=dim) + ds = _dataset_concat(datasets, dim, data_vars, coords, compat, positions) return arrays[0]._from_temp_dataset(ds, name) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 628423328f3..c5e88e1f627 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -248,7 +248,8 @@ def test_concat(self): # from dataset array: expected = DataArray(np.array([foo.values, bar.values]), - dims=['w', 'x', 'y'], coords={'x': [0, 1]}) + dims=['w', 'x', 'y'], + coords={'x': [0, 1], 'w': ['foo', 'bar']}) actual = concat([foo, bar], 'w') assert_equal(expected, actual) # from iteration: @@ -297,15 +298,24 @@ def test_concat_lazy(self): assert combined.shape == (2, 3, 3) assert combined.dims == ('z', 'x', 'y') - def test_concat_names(self): + def test_concat_names_and_coords(self): ds = Dataset({'foo': (['x', 'y'], np.random.random((2, 2))), 'bar': (['x', 'y'], np.random.random((2, 2)))}) # Concat arrays with different names, new name is None + # and unique array names are used as coordinates new = concat([ds.foo, ds.bar], dim='new') assert new.name is None + assert (new.coords['new'] == ['foo', 'bar']).values.all() + # Get a useful error message for unexpectedly different names + with pytest.raises(ValueError) as err: + concat([ds.foo, ds.bar], dim='new', compat='identical') + assert err.value.args[0] == "compat='identical', " + \ + "but array names ['foo', 'bar'] are not identical" # Concat arrays with same name, name is preserved + # and non-unique names are not used as coords foobar = ds.foo.rename('bar') assert concat([foobar, ds.bar], dim='new').name == 'bar' + assert 'new' not in concat([foobar, ds.bar], dim='new').coords class TestAutoCombine(object): From 63da214d697345ebdd0ecc0967c72eafc70bcb0d Mon Sep 17 00:00:00 2001 From: Zac-HD Date: Sat, 23 Feb 2019 10:18:09 +1100 Subject: [PATCH 3/3] load_dataset is not removed yet it's still deprecated, but we'll leave it for a bit longer before removal. --- xarray/tutorial.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 3f92bd9a400..16c1b3a206e 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -91,16 +91,17 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, def load_dataset(*args, **kwargs): """ - `load_dataset` will be removed in version 0.12. The current behavior of - this function can be achived by using `tutorial.open_dataset(...).load()`. + `load_dataset` is deprecated and will be removed in a future version. + The current behavior of this function can be achived by using + `tutorial.open_dataset(...).load()`. See Also -------- open_dataset """ warnings.warn( - "load_dataset` will be removed in xarray version 0.12. The current " - "behavior of this function can be achived by using " + "load_dataset` will be removed in a future version of Xarray. " + "The current behavior of this function can be achived by using " "`tutorial.open_dataset(...).load()`.", DeprecationWarning, stacklevel=2) return open_dataset(*args, **kwargs).load()