modin-project · anmyachev · Aug 10, 2023 · Jul 7, 2023 · Aug 8, 2023 · Aug 9, 2023
@@ -3460,7 +3460,15 @@ def _groupby_shuffle(
         original_agg_func = agg_func
 
         def agg_func(grp, *args, **kwargs):
-            return agg_method(grp, original_agg_func, *args, **kwargs)
+            result = agg_method(grp, original_agg_func, *args, **kwargs)
+
+            # Convert Series to DataFrame
+            if result.ndim == 1:
+                result = result.to_frame(
+                    MODIN_UNNAMED_SERIES_LABEL if result.name is None else result.name
+                )
+
+            return result
 
         result = obj._modin_frame.groupby(
             axis=axis,

@@ -963,16 +963,8 @@ def size(self):
                 idx_name=self._idx_name,
                 **self._kwargs,
             ).size()
-        work_object = type(self)(
-            self._df,
-            self._by,
-            self._axis,
-            drop=False,
-            idx_name=None,
-            **self._kwargs,
-        )
-        result = work_object._wrap_aggregation(
-            type(work_object._query_compiler).groupby_size,
+        result = self._wrap_aggregation(
+            type(self._query_compiler).groupby_size,
             numeric_only=False,
         )
         if not isinstance(result, Series):

@@ -17,13 +17,15 @@
     test_data_values,
     df_equals,
 )
-from modin.config import NPartitions, Engine, MinPartitionSize
+from modin.config import NPartitions, Engine, MinPartitionSize, ExperimentalGroupbyImpl
 from modin.distributed.dataframe.pandas import from_partitions
 from modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas
+from modin.utils import try_cast_to_pandas
 
 import numpy as np
 import pandas
 import pytest
+import unittest.mock as mock
 
 NPartitions.put(4)
 
@@ -1089,3 +1091,22 @@ def test_setitem_bool_preserve_dtypes():
     # scalar as a col_loc
     df.loc[indexer, "a"] = 2.0
     assert df._query_compiler._modin_frame.has_materialized_dtypes
+
+
+@pytest.mark.parametrize(
+    "modify_config", [{ExperimentalGroupbyImpl: True}], indirect=True
+)
+def test_groupby_size_shuffling(modify_config):
+    # verifies that 'groupby.size()' works with reshuffling implementation
+    # https://github.com/modin-project/modin/issues/6367
+    df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [3, 4, 5, 6]})
+    modin_frame = df._query_compiler._modin_frame
+
+    with mock.patch.object(
+        modin_frame,
+        "_apply_func_to_range_partitioning",
+        wraps=modin_frame._apply_func_to_range_partitioning,
+    ) as shuffling_method:
+        try_cast_to_pandas(df.groupby("a").size())
+
+    shuffling_method.assert_called()