Preserve variables order in Dataset.concat() (#1049)

fmaussion · shoyer · commit a4f5ec2263ee · 2016-11-01T21:35:53.000-04:00
* fixes #1027 * reviews + whats new * wrong commit
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -98,12 +98,16 @@ Bug fixes
   ``missing_value`` are set to ``NaN`` (:issue:`997`).
   By `Marco Zühlke <https://github.com/mzuehlke>`_.
 
-- ``.where()`` and ``.fillna()`` now preserve attributes(:issue:`1009`).
+- ``.where()`` and ``.fillna()`` now preserve attributes (:issue:`1009`).
   By `Fabien Maussion <https://github.com/fmaussion>`_.
 
 - Applying :py:func:`broadcast()` to an xarray object based on the dask backend
   won't accidentally convert the array from dask to numpy anymore (:issue:`978`).
   By `Guido Imperiale <https://github.com/crusaderky>`_.
+
+- ``Dataset.concat()`` now preserves variables order (:issue:`1027`).
+  By `Fabien Maussion <https://github.com/fmaussion>`_.
+
 .. _whats-new.0.8.2:
 
 v0.8.2 (18 August 2016)
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -189,7 +189,6 @@ def differs(vname):
     concat_over.update(process_subset_opt(coords, 'coords'))
     if dim in datasets[0]:
         concat_over.add(dim)
-
     return concat_over
 
 
@@ -264,11 +263,12 @@ def ensure_common_dims(vars):
                 var = var.expand_dims(common_dims, common_shape)
             yield var
 
-    # stack up each variable to fill-out the dataset
-    for k in concat_over:
-        vars = ensure_common_dims([ds.variables[k] for ds in datasets])
-        combined = concat_vars(vars, dim, positions)
-        insert_result_variable(k, combined)
+    # stack up each variable to fill-out the dataset (in order)
+    for k in datasets[0].variables:
+        if k in concat_over:
+            vars = ensure_common_dims([ds.variables[k] for ds in datasets])
+            combined = concat_vars(vars, dim, positions)
+            insert_result_variable(k, combined)
 
     result = Dataset(result_vars, attrs=result_attrs)
     result = result.set_coords(result_coord_names)
diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py
@@ -1858,6 +1858,21 @@ def test_groupby_nan(self):
         expected = Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]})
         self.assertDatasetIdentical(actual, expected)
 
+    def test_groupby_order(self):
+        # groupby should preserve variables order
+
+        ds = Dataset()
+        for vn in ['a', 'b', 'c']:
+            ds[vn] = DataArray(np.arange(10), dims=['t'])
+        all_vars_ref = list(ds.variables.keys())
+        data_vars_ref = list(ds.data_vars.keys())
+        ds = ds.groupby('t').mean()
+        all_vars = list(ds.variables.keys())
+        data_vars = list(ds.data_vars.keys())
+        self.assertEqual(data_vars, data_vars_ref)
+        # coords are now at the end of the list, so the test below fails
+        # self.assertEqual(all_vars, all_vars_ref)
+
     def test_resample_and_first(self):
         times = pd.date_range('2000-01-01', freq='6H', periods=10)
         ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),