Skip to content

Commit a4f5ec2

Browse files
fmaussionshoyer
authored andcommitted
Preserve variables order in Dataset.concat() (#1049)
* fixes #1027 * reviews + whats new * wrong commit
1 parent a247d93 commit a4f5ec2

File tree

3 files changed

+26
-7
lines changed

3 files changed

+26
-7
lines changed

doc/whats-new.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,16 @@ Bug fixes
9898
``missing_value`` are set to ``NaN`` (:issue:`997`).
9999
By `Marco Zühlke <https://github.com/mzuehlke>`_.
100100

101-
- ``.where()`` and ``.fillna()`` now preserve attributes(:issue:`1009`).
101+
- ``.where()`` and ``.fillna()`` now preserve attributes (:issue:`1009`).
102102
By `Fabien Maussion <https://github.com/fmaussion>`_.
103103

104104
- Applying :py:func:`broadcast()` to an xarray object based on the dask backend
105105
won't accidentally convert the array from dask to numpy anymore (:issue:`978`).
106106
By `Guido Imperiale <https://github.com/crusaderky>`_.
107+
108+
- ``Dataset.concat()`` now preserves variables order (:issue:`1027`).
109+
By `Fabien Maussion <https://github.com/fmaussion>`_.
110+
107111
.. _whats-new.0.8.2:
108112

109113
v0.8.2 (18 August 2016)

xarray/core/combine.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,6 @@ def differs(vname):
189189
concat_over.update(process_subset_opt(coords, 'coords'))
190190
if dim in datasets[0]:
191191
concat_over.add(dim)
192-
193192
return concat_over
194193

195194

@@ -264,11 +263,12 @@ def ensure_common_dims(vars):
264263
var = var.expand_dims(common_dims, common_shape)
265264
yield var
266265

267-
# stack up each variable to fill-out the dataset
268-
for k in concat_over:
269-
vars = ensure_common_dims([ds.variables[k] for ds in datasets])
270-
combined = concat_vars(vars, dim, positions)
271-
insert_result_variable(k, combined)
266+
# stack up each variable to fill-out the dataset (in order)
267+
for k in datasets[0].variables:
268+
if k in concat_over:
269+
vars = ensure_common_dims([ds.variables[k] for ds in datasets])
270+
combined = concat_vars(vars, dim, positions)
271+
insert_result_variable(k, combined)
272272

273273
result = Dataset(result_vars, attrs=result_attrs)
274274
result = result.set_coords(result_coord_names)

xarray/test/test_dataset.py

+15
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,21 @@ def test_groupby_nan(self):
18581858
expected = Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]})
18591859
self.assertDatasetIdentical(actual, expected)
18601860

1861+
def test_groupby_order(self):
1862+
# groupby should preserve variables order
1863+
1864+
ds = Dataset()
1865+
for vn in ['a', 'b', 'c']:
1866+
ds[vn] = DataArray(np.arange(10), dims=['t'])
1867+
all_vars_ref = list(ds.variables.keys())
1868+
data_vars_ref = list(ds.data_vars.keys())
1869+
ds = ds.groupby('t').mean()
1870+
all_vars = list(ds.variables.keys())
1871+
data_vars = list(ds.data_vars.keys())
1872+
self.assertEqual(data_vars, data_vars_ref)
1873+
# coords are now at the end of the list, so the test below fails
1874+
# self.assertEqual(all_vars, all_vars_ref)
1875+
18611876
def test_resample_and_first(self):
18621877
times = pd.date_range('2000-01-01', freq='6H', periods=10)
18631878
ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),

0 commit comments

Comments
 (0)