Skip to content

Commit f27b40e

Browse files
committed
Infer coord for array concat
This is really nice to have when using concat to produce faceted plots of various kinds, and harmless when it's useless.
1 parent 8793cb9 commit f27b40e

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

doc/whats-new.rst

+3
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ Enhancements
4040
`Spencer Clark <https://github.com/spencerkclark>`_.
4141
- Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`)
4242
By `Ryan Abernathey <https://github.com/rabernat>`_
43+
- Use new dimension name and unique array names to create a new coordinate
44+
when concatenating arrays, if no coordinates are given.
45+
(:issue:`2775`). By `Zac Hatfield-Dodds <https://github.com/Zac-HD>`_.
4346
- :py:meth:`~xarray.DataArray.coarsen` and
4447
:py:meth:`~xarray.Dataset.coarsen` are newly added.
4548
See :ref:`comput.coarsen` for details.

xarray/core/combine.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,23 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
325325
'concatenating DataArray objects')
326326

327327
name = result_name(arrays)
328-
if name is None and compat == 'identical':
329-
raise ValueError('array names not identical')
328+
names = [arr.name for arr in arrays]
329+
if compat == 'identical' and len(set(names)) != 1:
330+
raise ValueError(
331+
"compat='identical', but array names {!r} are not identical"
332+
.format(names if len(names) <= 10 else sorted(set(names)))
333+
)
330334
datasets = [arr.rename(name)._to_temp_dataset() for arr in arrays]
331335

336+
if (
337+
isinstance(dim, str)
338+
and len(set(names) - {None}) == len(names)
339+
and not any(dim in a.dims or dim in a.coords for a in arrays)
340+
):
341+
# We're concatenating arrays with unique non-None names along
342+
# a new dimension, so we use the existing names as coordinates.
343+
dim = pd.Index(names, name=dim)
344+
332345
ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
333346
positions)
334347
return arrays[0]._from_temp_dataset(ds, name)

xarray/tests/test_combine.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,8 @@ def test_concat(self):
248248

249249
# from dataset array:
250250
expected = DataArray(np.array([foo.values, bar.values]),
251-
dims=['w', 'x', 'y'], coords={'x': [0, 1]})
251+
dims=['w', 'x', 'y'],
252+
coords={'x': [0, 1], 'w': ['foo', 'bar']})
252253
actual = concat([foo, bar], 'w')
253254
assert_equal(expected, actual)
254255
# from iteration:
@@ -297,15 +298,24 @@ def test_concat_lazy(self):
297298
assert combined.shape == (2, 3, 3)
298299
assert combined.dims == ('z', 'x', 'y')
299300

300-
def test_concat_names(self):
301+
def test_concat_names_and_coords(self):
301302
ds = Dataset({'foo': (['x', 'y'], np.random.random((2, 2))),
302303
'bar': (['x', 'y'], np.random.random((2, 2)))})
303304
# Concat arrays with different names, new name is None
305+
# and unique array names are used as coordinates
304306
new = concat([ds.foo, ds.bar], dim='new')
305307
assert new.name is None
308+
assert (new.coords['new'] == ['foo', 'bar']).values.all()
309+
# Get a useful error message for unexpectedly different names
310+
with pytest.raises(ValueError) as err:
311+
concat([ds.foo, ds.bar], dim='new', compat='identical')
312+
assert err.value.args[0] == "compat='identical', " + \
313+
"but array names ['foo', 'bar'] are not identical"
306314
# Concat arrays with same name, name is preserved
315+
# and non-unique names are not used as coords
307316
foobar = ds.foo.rename('bar')
308317
assert concat([foobar, ds.bar], dim='new').name == 'bar'
318+
assert 'new' not in concat([foobar, ds.bar], dim='new').coords
309319

310320

311321
class TestAutoCombine(object):

0 commit comments

Comments
 (0)