diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 9819e8c6cf0..fda75f173a2 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -3114,7 +3114,12 @@ def _groupby_internal_columns(self, by, drop): else: if not isinstance(by, list): by = [by] if by is not None else [] - internal_by = [o for o in by if hashable(o) and o in self.columns] + internal_by = [] + for o in by: + if isinstance(o, pandas.Grouper): + internal_by.append(o.key) + elif hashable(o) and o in self.columns: + internal_by.append(o) internal_qc = ( [self.getitem_column_array(internal_by)] if len(internal_by) else [] ) diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index 8bef1bba1c8..fea11e13e26 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -482,29 +482,34 @@ def groupby( ( (hashable(o) and (o in self)) or isinstance(o, Series) + or (isinstance(o, pandas.Grouper) and o.key in self) or (is_list_like(o) and len(o) == len(self.axes[axis])) ) for o in by ): - # We want to split 'by's into those that belongs to the self (internal_by) - # and those that doesn't (external_by) - internal_by, external_by = [], [] + has_external = False + processed_by = [] for current_by in by: - if hashable(current_by): - internal_by.append(current_by) + if isinstance(current_by, pandas.Grouper): + processed_by.append(current_by) + has_external = True + elif hashable(current_by): + processed_by.append(current_by) elif isinstance(current_by, Series): if current_by._parent is self: - internal_by.append(current_by.name) + processed_by.append(current_by.name) else: - external_by.append(current_by._query_compiler) + processed_by.append(current_by._query_compiler) + has_external = True else: - external_by.append(current_by) + has_external = True + processed_by.append(current_by) - by = internal_by + external_by + by = processed_by - if len(external_by) == 0: - by = self[internal_by]._query_compiler + if not has_external: + by = self[processed_by]._query_compiler drop = True else: diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 2f452e98b08..8154f665c74 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -573,7 +573,13 @@ def _internal_by(self): internal_by = tuple() if self._drop: if is_list_like(self._by): - internal_by = tuple(by for by in self._by if isinstance(by, str)) + internal_by_list = [] + for by in self._by: + if isinstance(by, str): + internal_by_list.append(by) + elif isinstance(by, pandas.Grouper): + internal_by_list.append(by.key) + internal_by = tuple(internal_by_list) else: ErrorMessage.catch_bugs_and_request_email( failure_condition=not isinstance(self._by, BaseQueryCompiler),