depr: rename by to group_by in dataframe.upsample, dataframe.group_by…

…_dynamic, and dataframe.rolling
pola-rs · Mar 4, 2024 · 8b13ee1 · 8b13ee1
1 parent baacf3d
commit 8b13ee1
Show file tree

Hide file tree

Showing 11 changed files with 117 additions and 91 deletions.
diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs
@@ -973,7 +973,7 @@ impl LazyFrame {
     pub fn group_by_rolling<E: AsRef<[Expr]>>(
         self,
         index_column: Expr,
-        by: E,
+        group_by: E,
         mut options: RollingGroupOptions,
     ) -> LazyGroupBy {
         if let Expr::Column(name) = index_column {
@@ -984,15 +984,15 @@ impl LazyFrame {
                 .unwrap();
             return self.with_column(index_column).group_by_rolling(
                 Expr::Column(Arc::from(output_field.name().as_str())),
-                by,
+                group_by,
                 options,
             );
         }
         let opt_state = self.get_opt_state();
         LazyGroupBy {
             logical_plan: self.logical_plan,
             opt_state,
-            keys: by.as_ref().to_vec(),
+            keys: group_by.as_ref().to_vec(),
             maintain_order: true,
             dynamic_options: None,
             rolling_options: Some(options),
@@ -1012,13 +1012,13 @@ impl LazyFrame {
     /// - period: length of the window
     /// - offset: offset of the window
     ///
-    /// The `by` argument should be empty `[]` if you don't want to combine this
+    /// The `group_by` argument should be empty `[]` if you don't want to combine this
     /// with a ordinary group_by on these keys.
     #[cfg(feature = "dynamic_group_by")]
     pub fn group_by_dynamic<E: AsRef<[Expr]>>(
         self,
         index_column: Expr,
-        by: E,
+        group_by: E,
         mut options: DynamicGroupOptions,
     ) -> LazyGroupBy {
         if let Expr::Column(name) = index_column {
@@ -1029,15 +1029,15 @@ impl LazyFrame {
                 .unwrap();
             return self.with_column(index_column).group_by_dynamic(
                 Expr::Column(Arc::from(output_field.name().as_str())),
-                by,
+                group_by,
                 options,
             );
         }
         let opt_state = self.get_opt_state();
         LazyGroupBy {
             logical_plan: self.logical_plan,
             opt_state,
-            keys: by.as_ref().to_vec(),
+            keys: group_by.as_ref().to_vec(),
             maintain_order: true,
             dynamic_options: Some(options),
             rolling_options: None,

diff --git a/crates/polars-time/src/group_by/dynamic.rs b/crates/polars-time/src/group_by/dynamic.rs
@@ -92,39 +92,39 @@ const UP_NAME: &str = "_upper_boundary";
 pub trait PolarsTemporalGroupby {
     fn group_by_rolling(
         &self,
-        by: Vec<Series>,
+        group_by: Vec<Series>,
         options: &RollingGroupOptions,
     ) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)>;
 
     fn group_by_dynamic(
         &self,
-        by: Vec<Series>,
+        group_by: Vec<Series>,
         options: &DynamicGroupOptions,
     ) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)>;
 }
 
 impl PolarsTemporalGroupby for DataFrame {
     fn group_by_rolling(
         &self,
-        by: Vec<Series>,
+        group_by: Vec<Series>,
         options: &RollingGroupOptions,
     ) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
-        Wrap(self).group_by_rolling(by, options)
+        Wrap(self).group_by_rolling(group_by, options)
     }
 
     fn group_by_dynamic(
         &self,
-        by: Vec<Series>,
+        group_by: Vec<Series>,
         options: &DynamicGroupOptions,
     ) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
-        Wrap(self).group_by_dynamic(by, options)
+        Wrap(self).group_by_dynamic(group_by, options)
     }
 }
 
 impl Wrap<&DataFrame> {
     fn group_by_rolling(
         &self,
-        by: Vec<Series>,
+        group_by: Vec<Series>,
         options: &RollingGroupOptions,
     ) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
         polars_ensure!(
@@ -133,7 +133,7 @@ impl Wrap<&DataFrame> {
                         "rolling window period should be strictly positive",
         );
         let time = self.0.column(&options.index_column)?.clone();
-        if by.is_empty() {
+        if group_by.is_empty() {
             // If by is given, the column must be sorted in the 'by' arg, which we can not check now
             // this will be checked when the groups are materialized.
             ensure_sorted_arg(&time, "group_by_rolling")?;
@@ -155,7 +155,7 @@ impl Wrap<&DataFrame> {
                 let dt = time.cast(&Int64).unwrap().cast(&time_type_dt).unwrap();
                 let (out, by, gt) = self.impl_group_by_rolling(
                     dt,
-                    by,
+                    group_by,
                     options,
                     TimeUnit::Nanoseconds,
                     None,
@@ -169,7 +169,7 @@ impl Wrap<&DataFrame> {
                 let dt = time.cast(&time_type).unwrap();
                 let (out, by, gt) = self.impl_group_by_rolling(
                     dt,
-                    by,
+                    group_by,
                     options,
                     TimeUnit::Nanoseconds,
                     None,
@@ -186,17 +186,22 @@ impl Wrap<&DataFrame> {
         };
         match tz {
             #[cfg(feature = "timezones")]
-            Some(tz) => {
-                self.impl_group_by_rolling(dt, by, options, tu, tz.parse::<Tz>().ok(), time_type)
-            },
-            _ => self.impl_group_by_rolling(dt, by, options, tu, None, time_type),
+            Some(tz) => self.impl_group_by_rolling(
+                dt,
+                group_by,
+                options,
+                tu,
+                tz.parse::<Tz>().ok(),
+                time_type,
+            ),
+            _ => self.impl_group_by_rolling(dt, group_by, options, tu, None, time_type),
         }
     }
 
     /// Returns: time_keys, keys, groupsproxy.
     fn group_by_dynamic(
         &self,
-        by: Vec<Series>,
+        group_by: Vec<Series>,
         options: &DynamicGroupOptions,
     ) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
         if options.offset.parsed_int || options.every.parsed_int || options.period.parsed_int {
@@ -209,7 +214,7 @@ impl Wrap<&DataFrame> {
         }
 
         let time = self.0.column(&options.index_column)?.rechunk();
-        if by.is_empty() {
+        if group_by.is_empty() {
             // If by is given, the column must be sorted in the 'by' arg, which we can not check now
             // this will be checked when the groups are materialized.
             ensure_sorted_arg(&time, "group_by_dynamic")?;
@@ -228,8 +233,13 @@ impl Wrap<&DataFrame> {
             Int32 => {
                 let time_type = Datetime(TimeUnit::Nanoseconds, None);
                 let dt = time.cast(&Int64).unwrap().cast(&time_type).unwrap();
-                let (out, mut keys, gt) =
-                    self.impl_group_by_dynamic(dt, by, options, TimeUnit::Nanoseconds, &time_type)?;
+                let (out, mut keys, gt) = self.impl_group_by_dynamic(
+                    dt,
+                    group_by,
+                    options,
+                    TimeUnit::Nanoseconds,
+                    &time_type,
+                )?;
                 let out = out.cast(&Int64).unwrap().cast(&Int32).unwrap();
                 for k in &mut keys {
                     if k.name() == UP_NAME || k.name() == LB_NAME {
@@ -241,8 +251,13 @@ impl Wrap<&DataFrame> {
             Int64 => {
                 let time_type = Datetime(TimeUnit::Nanoseconds, None);
                 let dt = time.cast(&time_type).unwrap();
-                let (out, mut keys, gt) =
-                    self.impl_group_by_dynamic(dt, by, options, TimeUnit::Nanoseconds, &time_type)?;
+                let (out, mut keys, gt) = self.impl_group_by_dynamic(
+                    dt,
+                    group_by,
+                    options,
+                    TimeUnit::Nanoseconds,
+                    &time_type,
+                )?;
                 let out = out.cast(&Int64).unwrap();
                 for k in &mut keys {
                     if k.name() == UP_NAME || k.name() == LB_NAME {
@@ -257,7 +272,7 @@ impl Wrap<&DataFrame> {
                 dt
             ),
         };
-        self.impl_group_by_dynamic(dt, by, options, tu, time_type)
+        self.impl_group_by_dynamic(dt, group_by, options, tu, time_type)
     }
 
     fn impl_group_by_dynamic(

diff --git a/docs/src/python/user-guide/transformations/time-series/rolling.py b/docs/src/python/user-guide/transformations/time-series/rolling.py
@@ -59,7 +59,7 @@
     "time",
     every="1h",
     closed="both",
-    by="groups",
+    group_by="groups",
     include_boundaries=True,
 ).agg(pl.len())
 print(out)

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -5464,14 +5464,15 @@ def group_by(
         """
         return GroupBy(self, *by, **named_by, maintain_order=maintain_order)
 
+    @deprecate_renamed_parameter("by", "group_by", version="0.20.14")
     def rolling(
         self,
         index_column: IntoExpr,
         *,
         period: str | timedelta,
         offset: str | timedelta | None = None,
         closed: ClosedInterval = "right",
-        by: IntoExpr | Iterable[IntoExpr] | None = None,
+        group_by: IntoExpr | Iterable[IntoExpr] | None = None,
         check_sorted: bool = True,
     ) -> RollingGroupBy:
         """
@@ -5536,7 +5537,7 @@ def rolling(
             offset of the window. Default is -period
         closed : {'right', 'left', 'both', 'none'}
             Define which sides of the temporal interval are closed (inclusive).
-        by
+        group_by
             Also group by this column/these columns
         check_sorted
             When the `by` argument is given, polars can not check sortedness
@@ -5602,10 +5603,11 @@ def rolling(
             period=period,
             offset=offset,
             closed=closed,
-            by=by,
+            group_by=group_by,
             check_sorted=check_sorted,
         )
 
+    @deprecate_renamed_parameter("by", "group_by", version="0.20.14")
     def group_by_dynamic(
         self,
         index_column: IntoExpr,
@@ -5617,7 +5619,7 @@ def group_by_dynamic(
         include_boundaries: bool = False,
         closed: ClosedInterval = "left",
         label: Label = "left",
-        by: IntoExpr | Iterable[IntoExpr] | None = None,
+        group_by: IntoExpr | Iterable[IntoExpr] | None = None,
         start_by: StartBy = "window",
         check_sorted: bool = True,
     ) -> DynamicGroupBy:
@@ -5677,7 +5679,7 @@ def group_by_dynamic(
             - 'datapoint': the first value of the index column in the given window.
               If you don't need the label to be at one of the boundaries, choose this
               option for maximum performance
-        by
+        group_by
             Also group by this column/these columns
         start_by : {'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}
             The strategy to determine the start of the first window by.
@@ -5874,7 +5876,7 @@ def group_by_dynamic(
         ...     "time",
         ...     every="1h",
         ...     closed="both",
-        ...     by="groups",
+        ...     group_by="groups",
         ...     include_boundaries=True,
         ... ).agg(pl.col("n"))
         shape: (7, 5)
@@ -5934,18 +5936,19 @@ def group_by_dynamic(
             label=label,
             include_boundaries=include_boundaries,
             closed=closed,
-            by=by,
+            group_by=group_by,
             start_by=start_by,
             check_sorted=check_sorted,
         )
 
+    @deprecate_renamed_parameter("by", "group_by", version="0.20.14")
     def upsample(
         self,
         time_column: str,
         *,
         every: str | timedelta,
         offset: str | timedelta | None = None,
-        by: str | Sequence[str] | None = None,
+        group_by: str | Sequence[str] | None = None,
         maintain_order: bool = False,
     ) -> Self:
         """
@@ -5985,7 +5988,7 @@ def upsample(
             Interval will start 'every' duration.
         offset
             Change the start of the date_range by this offset.
-        by
+        group_by
             First group by these columns and then upsample for every group.
         maintain_order
             Keep the ordering predictable. This is slower.
@@ -6014,7 +6017,7 @@ def upsample(
         ...     }
         ... ).set_sorted("time")
         >>> df.upsample(
-        ...     time_column="time", every="1mo", by="groups", maintain_order=True
+        ...     time_column="time", every="1mo", group_by="groups", maintain_order=True
         ... ).select(pl.all().forward_fill())
         shape: (7, 3)
         ┌─────────────────────┬────────┬────────┐
@@ -6033,18 +6036,18 @@ def upsample(
         """
         every = deprecate_saturating(every)
         offset = deprecate_saturating(offset)
-        if by is None:
-            by = []
-        if isinstance(by, str):
-            by = [by]
+        if group_by is None:
+            group_by = []
+        if isinstance(group_by, str):
+            group_by = [group_by]
         if offset is None:
             offset = "0ns"
 
         every = parse_as_duration_string(every)
         offset = parse_as_duration_string(offset)
 
         return self._from_pydf(
-            self._df.upsample(by, time_column, every, offset, maintain_order)
+            self._df.upsample(group_by, time_column, every, offset, maintain_order)
         )
 
     def join_asof(
@@ -10578,7 +10581,7 @@ def groupby_rolling(
             period=period,
             offset=offset,
             closed=closed,
-            by=by,
+            group_by=by,
             check_sorted=check_sorted,
         )
 
@@ -10630,7 +10633,7 @@ def group_by_rolling(
             period=period,
             offset=offset,
             closed=closed,
-            by=by,
+            group_by=by,
             check_sorted=check_sorted,
         )
 
@@ -10718,7 +10721,7 @@ def groupby_dynamic(
             truncate=truncate,
             include_boundaries=include_boundaries,
             closed=closed,
-            by=by,
+            group_by=by,
             start_by=start_by,
             check_sorted=check_sorted,
         )