diff --git a/crates/polars-core/src/chunked_array/ops/mod.rs b/crates/polars-core/src/chunked_array/ops/mod.rs index 1d3db00e5a40..4d402de9372e 100644 --- a/crates/polars-core/src/chunked_array/ops/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/mod.rs @@ -24,7 +24,7 @@ pub(crate) mod downcast; pub(crate) mod explode; mod explode_and_offsets; mod extend; -mod fill_null; +pub mod fill_null; mod filter; mod for_each; pub mod full; diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index ca3978723188..9800fa38aeb1 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -25,6 +25,7 @@ use polars_arrow::prelude::QuantileInterpolOptions; use polars_core::frame::explode::MeltArgs; use polars_core::prelude::*; use polars_io::RowCount; +use polars_plan::dsl::all_horizontal; pub use polars_plan::frame::{AllowedOptimizations, OptState}; use polars_plan::global::FETCH_ROWS; #[cfg(any(feature = "ipc", feature = "parquet", feature = "csv"))] diff --git a/crates/polars-ops/src/series/ops/horizontal.rs b/crates/polars-ops/src/series/ops/horizontal.rs new file mode 100644 index 000000000000..1328f2a2ce77 --- /dev/null +++ b/crates/polars-ops/src/series/ops/horizontal.rs @@ -0,0 +1,68 @@ +use std::ops::{BitAnd, BitOr}; + +use polars_core::prelude::*; +use polars_core::POOL; +use rayon::prelude::*; + +pub fn sum_horizontal(s: &[Series]) -> PolarsResult { + let out = POOL + .install(|| { + s.par_iter() + .try_fold( + || UInt32Chunked::new("", &[0u32]).into_series(), + |acc, b| { + PolarsResult::Ok( + acc.fill_null(FillNullStrategy::Zero)? + + b.fill_null(FillNullStrategy::Zero)?, + ) + }, + ) + .try_reduce( + || UInt32Chunked::new("", &[0u32]).into_series(), + |a, b| { + PolarsResult::Ok( + a.fill_null(FillNullStrategy::Zero)? + + b.fill_null(FillNullStrategy::Zero)?, + ) + }, + ) + })? + .with_name("sum"); + Ok(out) +} + +pub fn any_horizontal(s: &[Series]) -> PolarsResult { + let out = POOL + .install(|| { + s.par_iter() + .try_fold( + || BooleanChunked::new("", &[false]), + |acc, b| { + let b = b.cast(&DataType::Boolean)?; + let b = b.bool()?; + PolarsResult::Ok((&acc).bitor(b)) + }, + ) + .try_reduce(|| BooleanChunked::new("", [false]), |a, b| Ok(a.bitor(b))) + })? + .with_name("any"); + Ok(out.into_series()) +} + +pub fn all_horizontal(s: &[Series]) -> PolarsResult { + let out = POOL + .install(|| { + s.par_iter() + .try_fold( + || BooleanChunked::new("", &[true]), + |acc, b| { + let b = b.cast(&DataType::Boolean)?; + let b = b.bool()?; + PolarsResult::Ok((&acc).bitand(b)) + }, + ) + .try_reduce(|| BooleanChunked::new("", [true]), |a, b| Ok(a.bitand(b))) + })? + .with_name("all"); + Ok(out.into_series()) +} diff --git a/crates/polars-ops/src/series/ops/mod.rs b/crates/polars-ops/src/series/ops/mod.rs index 8858f774e527..d4c10d7fd078 100644 --- a/crates/polars-ops/src/series/ops/mod.rs +++ b/crates/polars-ops/src/series/ops/mod.rs @@ -9,6 +9,7 @@ mod cut; mod floor_divide; #[cfg(feature = "fused")] mod fused; +mod horizontal; #[cfg(feature = "convert_index")] mod index; #[cfg(feature = "is_first_distinct")] @@ -44,6 +45,7 @@ pub use cut::*; pub use floor_divide::*; #[cfg(feature = "fused")] pub use fused::*; +pub use horizontal::*; #[cfg(feature = "convert_index")] pub use index::*; #[cfg(feature = "is_first_distinct")] diff --git a/crates/polars-plan/src/dsl/function_expr/boolean.rs b/crates/polars-plan/src/dsl/function_expr/boolean.rs index 2f225596fb5e..ed829773ce66 100644 --- a/crates/polars-plan/src/dsl/function_expr/boolean.rs +++ b/crates/polars-plan/src/dsl/function_expr/boolean.rs @@ -1,10 +1,7 @@ -use std::ops::{BitAnd, BitOr, Not}; - -use polars_core::POOL; -use rayon::prelude::*; +use std::ops::Not; use super::*; -use crate::{map, wrap}; +use crate::{map, map_as_slice, wrap}; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Debug, Eq, Hash)] @@ -99,8 +96,8 @@ impl From for SpecialEq> { IsDuplicated => map!(is_duplicated), #[cfg(feature = "is_in")] IsIn => wrap!(is_in), - AllHorizontal => wrap!(all_horizontal), - AnyHorizontal => wrap!(any_horizontal), + AllHorizontal => map_as_slice!(all_horizontal), + AnyHorizontal => map_as_slice!(any_horizontal), Not => map!(not_), } } @@ -181,38 +178,12 @@ fn is_in(s: &mut [Series]) -> PolarsResult> { polars_ops::prelude::is_in(left, other).map(|ca| Some(ca.into_series())) } -fn any_horizontal(s: &mut [Series]) -> PolarsResult> { - let mut out = POOL.install(|| { - s.par_iter() - .try_fold( - || BooleanChunked::new("", &[false]), - |acc, b| { - let b = b.cast(&DataType::Boolean)?; - let b = b.bool()?; - PolarsResult::Ok((&acc).bitor(b)) - }, - ) - .try_reduce(|| BooleanChunked::new("", [false]), |a, b| Ok(a.bitor(b))) - })?; - out.rename("any"); - Ok(Some(out.into_series())) -} - -fn all_horizontal(s: &mut [Series]) -> PolarsResult> { - let mut out = POOL.install(|| { - s.par_iter() - .try_fold( - || BooleanChunked::new("", &[true]), - |acc, b| { - let b = b.cast(&DataType::Boolean)?; - let b = b.bool()?; - PolarsResult::Ok((&acc).bitand(b)) - }, - ) - .try_reduce(|| BooleanChunked::new("", [true]), |a, b| Ok(a.bitand(b))) - })?; - out.rename("all"); - Ok(Some(out.into_series())) +fn any_horizontal(s: &[Series]) -> PolarsResult { + polars_ops::prelude::any_horizontal(s) +} + +fn all_horizontal(s: &[Series]) -> PolarsResult { + polars_ops::prelude::all_horizontal(s) } fn not_(s: &Series) -> PolarsResult { diff --git a/crates/polars-plan/src/dsl/function_expr/dispatch.rs b/crates/polars-plan/src/dsl/function_expr/dispatch.rs index 24fada5899e9..755594c6cef5 100644 --- a/crates/polars-plan/src/dsl/function_expr/dispatch.rs +++ b/crates/polars-plan/src/dsl/function_expr/dispatch.rs @@ -59,3 +59,7 @@ pub(super) fn backward_fill(s: &Series, limit: FillNullLimit) -> PolarsResult PolarsResult { s.fill_null(FillNullStrategy::Forward(limit)) } + +pub(super) fn sum_horizontal(s: &[Series]) -> PolarsResult { + polars_ops::prelude::sum_horizontal(s) +} diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index da6869a2cca8..4b3114f9acf3 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -256,6 +256,7 @@ pub enum FunctionExpr { ForwardFill { limit: FillNullLimit, }, + SumHorizontal, } impl Hash for FunctionExpr { @@ -427,6 +428,7 @@ impl Display for FunctionExpr { FfiPlugin { lib, symbol, .. } => return write!(f, "{lib}:{symbol}"), BackwardFill { .. } => "backward_fill", ForwardFill { .. } => "forward_fill", + SumHorizontal => "sum_horizontal", }; write!(f, "{s}") } @@ -746,6 +748,7 @@ impl From for SpecialEq> { }, BackwardFill { limit } => map!(dispatch::backward_fill, limit), ForwardFill { limit } => map!(dispatch::forward_fill, limit), + SumHorizontal => map_as_slice!(dispatch::sum_horizontal), } } } diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index e8b0d3061d10..c27f745a6fd2 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -236,6 +236,7 @@ impl FunctionExpr { }, BackwardFill { .. } => mapper.with_same_dtype(), ForwardFill { .. } => mapper.with_same_dtype(), + SumHorizontal => mapper.map_to_supertype(), } } } diff --git a/crates/polars-plan/src/dsl/functions/horizontal.rs b/crates/polars-plan/src/dsl/functions/horizontal.rs index 87631e140209..ca4fc074d006 100644 --- a/crates/polars-plan/src/dsl/functions/horizontal.rs +++ b/crates/polars-plan/src/dsl/functions/horizontal.rs @@ -262,19 +262,20 @@ pub fn min_horizontal>(exprs: E) -> Expr { /// /// The name of the resulting column will be `"sum"`; use [`alias`](Expr::alias) to choose a different name. pub fn sum_horizontal>(exprs: E) -> Expr { - let mut exprs = exprs.as_ref().to_vec(); - let func = |s1: Series, s2: Series| { - Ok(Some( - &s1.fill_null(FillNullStrategy::Zero).unwrap() - + &s2.fill_null(FillNullStrategy::Zero).unwrap(), - )) - }; - let init = match exprs.pop() { - Some(e) => e, - // use u32 as that is not cast to float as eagerly - _ => lit(0u32), - }; - fold_exprs(init, func, exprs).alias("sum") + let exprs = exprs.as_ref().to_vec(); + + Expr::Function { + input: exprs, + function: FunctionExpr::SumHorizontal, + options: FunctionOptions { + collect_groups: ApplyOptions::ApplyFlat, + input_wildcard_expansion: true, + auto_explode: true, + cast_to_supertypes: false, + allow_rename: true, + ..Default::default() + }, + } } /// Folds the expressions from left to right keeping the first non-null values. diff --git a/crates/polars/tests/it/lazy/folds.rs b/crates/polars/tests/it/lazy/folds.rs index 3a13b814b2ff..1b3f908fa914 100644 --- a/crates/polars/tests/it/lazy/folds.rs +++ b/crates/polars/tests/it/lazy/folds.rs @@ -21,7 +21,7 @@ fn test_fold_wildcard() -> PolarsResult<()> { // test if we don't panic due to wildcard let _out = df1 .lazy() - .select([all_horizontal([col("*").is_not_null()])]) + .select([polars_lazy::dsl::all_horizontal([col("*").is_not_null()])]) .collect()?; Ok(()) } diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index 0076d9d85910..c4e7865489d8 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -688,7 +688,7 @@ def test_sort_by_err_9259() -> None: def test_empty_inputs_error() -> None: df = pl.DataFrame({"col1": [1]}) with pytest.raises( - pl.ComputeError, match="expression: 'fold' didn't get any inputs" + pl.ComputeError, match="expression: 'sum_horizontal' didn't get any inputs" ): df.select(pl.sum_horizontal(pl.exclude("col1")))