Some refactoring

dcherian · Nov 7, 2024 · d1a3fc1 · d1a3fc1
1 parent 978fad9
commit d1a3fc1
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 11 deletions.
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -63,6 +63,7 @@
     Bins,
     DaCompatible,
     NetcdfWriteModes,
+    T_Chunks,
     T_DataArray,
     T_DataArrayOrSet,
     ZarrWriteModes,
@@ -105,6 +106,7 @@
         Dims,
         ErrorOptions,
         ErrorOptionsWithWarn,
+        GroupIndices,
         GroupInput,
         InterpOptions,
         PadModeOptions,
@@ -1687,6 +1689,12 @@ def sel(
         )
         return self._from_temp_dataset(ds)
 
+    def _shuffle(
+        self, dim: Hashable, *, indices: GroupIndices, chunks: T_Chunks
+    ) -> Self:
+        ds = self._to_temp_dataset()._shuffle(dim=dim, indices=indices, chunks=chunks)
+        return self._from_temp_dataset(ds)
+
     def head(
         self,
         indexers: Mapping[Any, int] | int | None = None,

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -155,6 +155,7 @@
         DsCompatible,
         ErrorOptions,
         ErrorOptionsWithWarn,
+        GroupIndices,
         GroupInput,
         InterpOptions,
         JoinOptions,
@@ -3238,7 +3239,7 @@ def sel(
         result = self.isel(indexers=query_results.dim_indexers, drop=drop)
         return result._overwrite_indexes(*query_results.as_tuple()[1:])
 
-    def _shuffle(self, dim, *, indices: list[list[int]], chunks: T_Chunks) -> Self:
+    def _shuffle(self, dim, *, indices: GroupIndices, chunks: T_Chunks) -> Self:
         # Shuffling is only different from `isel` for chunked arrays.
         # Extract them out, and treat them specially. The rest, we route through isel.
         # This makes it easy to ensure correct handling of indexes.
@@ -3249,14 +3250,22 @@ def _shuffle(self, dim, *, indices: list[list[int]], chunks: T_Chunks) -> Self:
         }
         subset = self[[name for name in self._variables if name not in is_chunked]]
 
+        no_slices: list[list[int]] = [
+            list(range(*idx.indices(self.sizes[dim])))
+            if isinstance(idx, slice)
+            else idx
+            for idx in indices
+        ]
+        no_slices = [idx for idx in no_slices if idx]
+
         shuffled = (
             subset
             if dim not in subset.dims
-            else subset.isel({dim: np.concatenate(indices)})
+            else subset.isel({dim: np.concatenate(no_slices)})
         )
         for name, var in is_chunked.items():
             shuffled[name] = var._shuffle(
-                indices=indices,
+                indices=no_slices,
                 dim=dim,
                 chunks=chunks,
             )

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -743,19 +743,12 @@ def _shuffle_obj(self, chunks: T_Chunks) -> T_Xarray:
         was_array = isinstance(self._obj, DataArray)
         as_dataset = self._obj._to_temp_dataset() if was_array else self._obj
 
-        size = self._obj.sizes[self._group_dim]
-        no_slices: list[list[int]] = [
-            list(range(*idx.indices(size))) if isinstance(idx, slice) else idx
-            for idx in self.encoded.group_indices
-        ]
-        no_slices = [idx for idx in no_slices if idx]
-
         for grouper in self.groupers:
             if grouper.name not in as_dataset._variables:
                 as_dataset.coords[grouper.name] = grouper.group
 
         shuffled = as_dataset._shuffle(
-            dim=self._group_dim, indices=no_slices, chunks=chunks
+            dim=self._group_dim, indices=self.encoded.group_indices, chunks=chunks
         )
         shuffled = self._maybe_unstack(shuffled)
         new_obj = self._obj._from_temp_dataset(shuffled) if was_array else shuffled