Skip to content

Commit

Permalink
depr: rename by to group_by in dataframe.upsample, dataframe.group_by…
Browse files Browse the repository at this point in the history
…_dynamic, and dataframe.rolling
  • Loading branch information
MarcoGorelli committed Mar 4, 2024
1 parent baacf3d commit 8b13ee1
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 91 deletions.
14 changes: 7 additions & 7 deletions crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,7 @@ impl LazyFrame {
pub fn group_by_rolling<E: AsRef<[Expr]>>(
self,
index_column: Expr,
by: E,
group_by: E,
mut options: RollingGroupOptions,
) -> LazyGroupBy {
if let Expr::Column(name) = index_column {
Expand All @@ -984,15 +984,15 @@ impl LazyFrame {
.unwrap();
return self.with_column(index_column).group_by_rolling(
Expr::Column(Arc::from(output_field.name().as_str())),
by,
group_by,
options,
);
}
let opt_state = self.get_opt_state();
LazyGroupBy {
logical_plan: self.logical_plan,
opt_state,
keys: by.as_ref().to_vec(),
keys: group_by.as_ref().to_vec(),
maintain_order: true,
dynamic_options: None,
rolling_options: Some(options),
Expand All @@ -1012,13 +1012,13 @@ impl LazyFrame {
/// - period: length of the window
/// - offset: offset of the window
///
/// The `by` argument should be empty `[]` if you don't want to combine this
/// The `group_by` argument should be empty `[]` if you don't want to combine this
/// with a ordinary group_by on these keys.
#[cfg(feature = "dynamic_group_by")]
pub fn group_by_dynamic<E: AsRef<[Expr]>>(
self,
index_column: Expr,
by: E,
group_by: E,
mut options: DynamicGroupOptions,
) -> LazyGroupBy {
if let Expr::Column(name) = index_column {
Expand All @@ -1029,15 +1029,15 @@ impl LazyFrame {
.unwrap();
return self.with_column(index_column).group_by_dynamic(
Expr::Column(Arc::from(output_field.name().as_str())),
by,
group_by,
options,
);
}
let opt_state = self.get_opt_state();
LazyGroupBy {
logical_plan: self.logical_plan,
opt_state,
keys: by.as_ref().to_vec(),
keys: group_by.as_ref().to_vec(),
maintain_order: true,
dynamic_options: Some(options),
rolling_options: None,
Expand Down
57 changes: 36 additions & 21 deletions crates/polars-time/src/group_by/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,39 +92,39 @@ const UP_NAME: &str = "_upper_boundary";
pub trait PolarsTemporalGroupby {
fn group_by_rolling(
&self,
by: Vec<Series>,
group_by: Vec<Series>,
options: &RollingGroupOptions,
) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)>;

fn group_by_dynamic(
&self,
by: Vec<Series>,
group_by: Vec<Series>,
options: &DynamicGroupOptions,
) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)>;
}

impl PolarsTemporalGroupby for DataFrame {
fn group_by_rolling(
&self,
by: Vec<Series>,
group_by: Vec<Series>,
options: &RollingGroupOptions,
) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
Wrap(self).group_by_rolling(by, options)
Wrap(self).group_by_rolling(group_by, options)
}

fn group_by_dynamic(
&self,
by: Vec<Series>,
group_by: Vec<Series>,
options: &DynamicGroupOptions,
) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
Wrap(self).group_by_dynamic(by, options)
Wrap(self).group_by_dynamic(group_by, options)
}
}

impl Wrap<&DataFrame> {
fn group_by_rolling(
&self,
by: Vec<Series>,
group_by: Vec<Series>,
options: &RollingGroupOptions,
) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
polars_ensure!(
Expand All @@ -133,7 +133,7 @@ impl Wrap<&DataFrame> {
"rolling window period should be strictly positive",
);
let time = self.0.column(&options.index_column)?.clone();
if by.is_empty() {
if group_by.is_empty() {
// If by is given, the column must be sorted in the 'by' arg, which we can not check now
// this will be checked when the groups are materialized.
ensure_sorted_arg(&time, "group_by_rolling")?;
Expand All @@ -155,7 +155,7 @@ impl Wrap<&DataFrame> {
let dt = time.cast(&Int64).unwrap().cast(&time_type_dt).unwrap();
let (out, by, gt) = self.impl_group_by_rolling(
dt,
by,
group_by,
options,
TimeUnit::Nanoseconds,
None,
Expand All @@ -169,7 +169,7 @@ impl Wrap<&DataFrame> {
let dt = time.cast(&time_type).unwrap();
let (out, by, gt) = self.impl_group_by_rolling(
dt,
by,
group_by,
options,
TimeUnit::Nanoseconds,
None,
Expand All @@ -186,17 +186,22 @@ impl Wrap<&DataFrame> {
};
match tz {
#[cfg(feature = "timezones")]
Some(tz) => {
self.impl_group_by_rolling(dt, by, options, tu, tz.parse::<Tz>().ok(), time_type)
},
_ => self.impl_group_by_rolling(dt, by, options, tu, None, time_type),
Some(tz) => self.impl_group_by_rolling(
dt,
group_by,
options,
tu,
tz.parse::<Tz>().ok(),
time_type,
),
_ => self.impl_group_by_rolling(dt, group_by, options, tu, None, time_type),
}
}

/// Returns: time_keys, keys, groupsproxy.
fn group_by_dynamic(
&self,
by: Vec<Series>,
group_by: Vec<Series>,
options: &DynamicGroupOptions,
) -> PolarsResult<(Series, Vec<Series>, GroupsProxy)> {
if options.offset.parsed_int || options.every.parsed_int || options.period.parsed_int {
Expand All @@ -209,7 +214,7 @@ impl Wrap<&DataFrame> {
}

let time = self.0.column(&options.index_column)?.rechunk();
if by.is_empty() {
if group_by.is_empty() {
// If by is given, the column must be sorted in the 'by' arg, which we can not check now
// this will be checked when the groups are materialized.
ensure_sorted_arg(&time, "group_by_dynamic")?;
Expand All @@ -228,8 +233,13 @@ impl Wrap<&DataFrame> {
Int32 => {
let time_type = Datetime(TimeUnit::Nanoseconds, None);
let dt = time.cast(&Int64).unwrap().cast(&time_type).unwrap();
let (out, mut keys, gt) =
self.impl_group_by_dynamic(dt, by, options, TimeUnit::Nanoseconds, &time_type)?;
let (out, mut keys, gt) = self.impl_group_by_dynamic(
dt,
group_by,
options,
TimeUnit::Nanoseconds,
&time_type,
)?;
let out = out.cast(&Int64).unwrap().cast(&Int32).unwrap();
for k in &mut keys {
if k.name() == UP_NAME || k.name() == LB_NAME {
Expand All @@ -241,8 +251,13 @@ impl Wrap<&DataFrame> {
Int64 => {
let time_type = Datetime(TimeUnit::Nanoseconds, None);
let dt = time.cast(&time_type).unwrap();
let (out, mut keys, gt) =
self.impl_group_by_dynamic(dt, by, options, TimeUnit::Nanoseconds, &time_type)?;
let (out, mut keys, gt) = self.impl_group_by_dynamic(
dt,
group_by,
options,
TimeUnit::Nanoseconds,
&time_type,
)?;
let out = out.cast(&Int64).unwrap();
for k in &mut keys {
if k.name() == UP_NAME || k.name() == LB_NAME {
Expand All @@ -257,7 +272,7 @@ impl Wrap<&DataFrame> {
dt
),
};
self.impl_group_by_dynamic(dt, by, options, tu, time_type)
self.impl_group_by_dynamic(dt, group_by, options, tu, time_type)
}

fn impl_group_by_dynamic(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"time",
every="1h",
closed="both",
by="groups",
group_by="groups",
include_boundaries=True,
).agg(pl.len())
print(out)
Expand Down
39 changes: 21 additions & 18 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5464,14 +5464,15 @@ def group_by(
"""
return GroupBy(self, *by, **named_by, maintain_order=maintain_order)

@deprecate_renamed_parameter("by", "group_by", version="0.20.14")
def rolling(
self,
index_column: IntoExpr,
*,
period: str | timedelta,
offset: str | timedelta | None = None,
closed: ClosedInterval = "right",
by: IntoExpr | Iterable[IntoExpr] | None = None,
group_by: IntoExpr | Iterable[IntoExpr] | None = None,
check_sorted: bool = True,
) -> RollingGroupBy:
"""
Expand Down Expand Up @@ -5536,7 +5537,7 @@ def rolling(
offset of the window. Default is -period
closed : {'right', 'left', 'both', 'none'}
Define which sides of the temporal interval are closed (inclusive).
by
group_by
Also group by this column/these columns
check_sorted
When the `by` argument is given, polars can not check sortedness
Expand Down Expand Up @@ -5602,10 +5603,11 @@ def rolling(
period=period,
offset=offset,
closed=closed,
by=by,
group_by=group_by,
check_sorted=check_sorted,
)

@deprecate_renamed_parameter("by", "group_by", version="0.20.14")
def group_by_dynamic(
self,
index_column: IntoExpr,
Expand All @@ -5617,7 +5619,7 @@ def group_by_dynamic(
include_boundaries: bool = False,
closed: ClosedInterval = "left",
label: Label = "left",
by: IntoExpr | Iterable[IntoExpr] | None = None,
group_by: IntoExpr | Iterable[IntoExpr] | None = None,
start_by: StartBy = "window",
check_sorted: bool = True,
) -> DynamicGroupBy:
Expand Down Expand Up @@ -5677,7 +5679,7 @@ def group_by_dynamic(
- 'datapoint': the first value of the index column in the given window.
If you don't need the label to be at one of the boundaries, choose this
option for maximum performance
by
group_by
Also group by this column/these columns
start_by : {'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}
The strategy to determine the start of the first window by.
Expand Down Expand Up @@ -5874,7 +5876,7 @@ def group_by_dynamic(
... "time",
... every="1h",
... closed="both",
... by="groups",
... group_by="groups",
... include_boundaries=True,
... ).agg(pl.col("n"))
shape: (7, 5)
Expand Down Expand Up @@ -5934,18 +5936,19 @@ def group_by_dynamic(
label=label,
include_boundaries=include_boundaries,
closed=closed,
by=by,
group_by=group_by,
start_by=start_by,
check_sorted=check_sorted,
)

@deprecate_renamed_parameter("by", "group_by", version="0.20.14")
def upsample(
self,
time_column: str,
*,
every: str | timedelta,
offset: str | timedelta | None = None,
by: str | Sequence[str] | None = None,
group_by: str | Sequence[str] | None = None,
maintain_order: bool = False,
) -> Self:
"""
Expand Down Expand Up @@ -5985,7 +5988,7 @@ def upsample(
Interval will start 'every' duration.
offset
Change the start of the date_range by this offset.
by
group_by
First group by these columns and then upsample for every group.
maintain_order
Keep the ordering predictable. This is slower.
Expand Down Expand Up @@ -6014,7 +6017,7 @@ def upsample(
... }
... ).set_sorted("time")
>>> df.upsample(
... time_column="time", every="1mo", by="groups", maintain_order=True
... time_column="time", every="1mo", group_by="groups", maintain_order=True
... ).select(pl.all().forward_fill())
shape: (7, 3)
┌─────────────────────┬────────┬────────┐
Expand All @@ -6033,18 +6036,18 @@ def upsample(
"""
every = deprecate_saturating(every)
offset = deprecate_saturating(offset)
if by is None:
by = []
if isinstance(by, str):
by = [by]
if group_by is None:
group_by = []
if isinstance(group_by, str):
group_by = [group_by]
if offset is None:
offset = "0ns"

every = parse_as_duration_string(every)
offset = parse_as_duration_string(offset)

return self._from_pydf(
self._df.upsample(by, time_column, every, offset, maintain_order)
self._df.upsample(group_by, time_column, every, offset, maintain_order)
)

def join_asof(
Expand Down Expand Up @@ -10578,7 +10581,7 @@ def groupby_rolling(
period=period,
offset=offset,
closed=closed,
by=by,
group_by=by,
check_sorted=check_sorted,
)

Expand Down Expand Up @@ -10630,7 +10633,7 @@ def group_by_rolling(
period=period,
offset=offset,
closed=closed,
by=by,
group_by=by,
check_sorted=check_sorted,
)

Expand Down Expand Up @@ -10718,7 +10721,7 @@ def groupby_dynamic(
truncate=truncate,
include_boundaries=include_boundaries,
closed=closed,
by=by,
group_by=by,
start_by=start_by,
check_sorted=check_sorted,
)
Expand Down
Loading

0 comments on commit 8b13ee1

Please sign in to comment.