From 962b576b1cc678840d52fdcd67a1c117db49d0bb Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Sat, 14 Sep 2024 13:02:18 +0200 Subject: [PATCH] refactor: Make DataFrame a Vec of `Column` instead of `Series` (#18664) --- crates/polars-arrow/src/bitmap/bitmask.rs | 4 +- crates/polars-core/src/chunked_array/cast.rs | 2 +- .../logical/categorical/ops/unique.rs | 2 +- .../src/chunked_array/logical/datetime.rs | 4 +- .../polars-core/src/chunked_array/ndarray.rs | 6 +- .../src/chunked_array/ops/any_value.rs | 2 +- .../src/chunked_array/ops/fill_null.rs | 2 +- .../polars-core/src/chunked_array/ops/full.rs | 4 +- .../polars-core/src/chunked_array/ops/mod.rs | 3 +- .../chunked_array/ops/sort/arg_bottom_k.rs | 2 +- .../ops/sort/arg_sort_multiple.rs | 15 +- .../src/chunked_array/ops/sort/categorical.rs | 6 +- .../src/chunked_array/ops/sort/mod.rs | 32 +- .../polars-core/src/chunked_array/ops/zip.rs | 2 +- .../polars-core/src/chunked_array/random.rs | 4 +- .../src/chunked_array/struct_/frame.rs | 2 +- .../src/chunked_array/struct_/mod.rs | 65 +- .../src/chunked_array/temporal/mod.rs | 2 +- crates/polars-core/src/datatypes/any_value.rs | 196 ++- crates/polars-core/src/fmt.rs | 50 +- crates/polars-core/src/frame/arithmetic.rs | 11 +- crates/polars-core/src/frame/chunks.rs | 7 +- .../src/frame/column/arithmetic.rs | 154 ++ crates/polars-core/src/frame/column/mod.rs | 1266 +++++++++++++++++ crates/polars-core/src/frame/explode.rs | 63 +- crates/polars-core/src/frame/from.rs | 1 + crates/polars-core/src/frame/group_by/mod.rs | 106 +- crates/polars-core/src/frame/horizontal.rs | 23 +- crates/polars-core/src/frame/mod.rs | 509 ++++--- crates/polars-core/src/frame/row/av_buffer.rs | 2 +- crates/polars-core/src/frame/row/dataframe.rs | 28 +- crates/polars-core/src/frame/row/mod.rs | 2 +- crates/polars-core/src/frame/row/transpose.rs | 84 +- crates/polars-core/src/frame/top_k.rs | 2 +- .../polars-core/src/frame/upstream_traits.rs | 16 +- crates/polars-core/src/functions.rs | 2 +- .../polars-core/src/hashing/vector_hasher.rs | 6 +- crates/polars-core/src/lib.rs | 5 + crates/polars-core/src/prelude.rs | 1 + crates/polars-core/src/scalar/from.rs | 27 + crates/polars-core/src/scalar/mod.rs | 8 +- crates/polars-core/src/serde/df.rs | 6 +- crates/polars-core/src/serde/mod.rs | 8 +- crates/polars-core/src/serde/series.rs | 2 +- crates/polars-core/src/series/any_value.rs | 2 +- .../src/series/implementations/binary.rs | 2 +- .../series/implementations/binary_offset.rs | 2 +- .../src/series/implementations/boolean.rs | 2 +- .../src/series/implementations/categorical.rs | 2 +- .../src/series/implementations/date.rs | 2 +- .../src/series/implementations/datetime.rs | 2 +- .../src/series/implementations/duration.rs | 2 +- .../src/series/implementations/floats.rs | 2 +- .../src/series/implementations/mod.rs | 2 +- .../src/series/implementations/string.rs | 2 +- .../src/series/implementations/time.rs | 2 +- crates/polars-core/src/series/mod.rs | 34 +- crates/polars-core/src/series/ops/null.rs | 2 +- crates/polars-core/src/series/series_trait.rs | 18 +- crates/polars-core/src/testing.rs | 4 +- crates/polars-core/src/tests.rs | 6 +- crates/polars-core/src/utils/flatten.rs | 2 +- crates/polars-core/src/utils/mod.rs | 13 +- .../src/expressions/aggregation.rs | 8 +- crates/polars-expr/src/expressions/apply.rs | 101 +- crates/polars-expr/src/expressions/column.rs | 23 +- crates/polars-expr/src/expressions/sortby.rs | 28 +- crates/polars-expr/src/expressions/window.rs | 11 +- crates/polars-expr/src/planner.rs | 6 +- crates/polars-expr/src/state/node_timer.rs | 4 +- crates/polars-ffi/src/version_0.rs | 6 +- crates/polars-io/src/csv/read/read_impl.rs | 28 +- crates/polars-io/src/csv/read/reader.rs | 16 +- crates/polars-io/src/csv/write/write_impl.rs | 6 +- crates/polars-io/src/hive.rs | 4 +- crates/polars-io/src/ipc/ipc_file.rs | 4 +- crates/polars-io/src/ipc/ipc_stream.rs | 6 +- crates/polars-io/src/ndjson/core.rs | 2 +- .../polars-io/src/parquet/read/read_impl.rs | 22 +- crates/polars-io/src/shared.rs | 5 +- crates/polars-io/src/utils/other.rs | 7 +- crates/polars-lazy/src/dsl/eval.rs | 36 +- crates/polars-lazy/src/dsl/list.rs | 30 +- crates/polars-lazy/src/lib.rs | 6 +- crates/polars-lazy/src/tests/aggregations.rs | 6 +- crates/polars-lazy/src/tests/arity.rs | 9 +- .../src/tests/optimization_checks.rs | 6 +- .../src/tests/predicate_queries.rs | 2 +- .../src/tests/projection_queries.rs | 2 +- crates/polars-lazy/src/tests/queries.rs | 66 +- .../src/executors/group_by.rs | 7 +- .../src/executors/group_by_dynamic.rs | 4 +- .../src/executors/group_by_partitioned.rs | 28 +- .../src/executors/group_by_rolling.rs | 8 +- .../src/executors/projection_utils.rs | 3 + .../src/executors/scan/python_scan.rs | 2 +- .../polars-mem-engine/src/executors/sort.rs | 2 +- .../polars-mem-engine/src/executors/stack.rs | 6 +- .../src/chunked_array/array/to_struct.rs | 2 +- crates/polars-ops/src/chunked_array/hist.rs | 2 +- .../src/chunked_array/list/namespace.rs | 6 +- .../src/chunked_array/list/to_struct.rs | 2 +- .../src/chunked_array/strings/extract.rs | 2 +- .../src/chunked_array/strings/json_path.rs | 5 +- .../src/chunked_array/strings/namespace.rs | 12 +- .../src/chunked_array/strings/split.rs | 2 +- crates/polars-ops/src/chunked_array/top_k.rs | 31 +- .../polars-ops/src/frame/join/asof/groups.rs | 16 +- crates/polars-ops/src/frame/join/general.rs | 4 +- .../polars-ops/src/frame/join/merge_sorted.rs | 8 +- crates/polars-ops/src/frame/join/mod.rs | 30 +- crates/polars-ops/src/frame/mod.rs | 2 +- crates/polars-ops/src/frame/pivot/mod.rs | 9 +- .../polars-ops/src/frame/pivot/positioning.rs | 51 +- crates/polars-ops/src/frame/pivot/unpivot.rs | 17 +- crates/polars-ops/src/series/ops/cut.rs | 4 +- crates/polars-ops/src/series/ops/duration.rs | 30 +- crates/polars-ops/src/series/ops/fused.rs | 45 +- .../polars-ops/src/series/ops/horizontal.rs | 19 +- .../ops/interpolation/interpolate_by.rs | 10 +- crates/polars-ops/src/series/ops/not.rs | 4 +- crates/polars-ops/src/series/ops/replace.rs | 14 +- crates/polars-ops/src/series/ops/rle.rs | 24 +- .../polars-ops/src/series/ops/to_dummies.rs | 4 +- crates/polars-ops/src/series/ops/various.rs | 10 +- .../src/executors/operators/projection.rs | 8 +- .../sinks/group_by/generic/global.rs | 11 +- .../sinks/group_by/generic/hash_table.rs | 10 +- .../executors/sinks/group_by/generic/mod.rs | 11 +- .../src/executors/sinks/group_by/mod.rs | 6 +- .../executors/sinks/group_by/primitive/mod.rs | 8 +- .../src/executors/sinks/group_by/string.rs | 8 +- .../sinks/joins/generic_probe_outer.rs | 2 +- .../src/executors/sinks/sort/ooc.rs | 3 +- .../src/executors/sinks/sort/sink_multiple.rs | 5 +- .../polars-pipe/src/executors/sources/csv.rs | 2 +- crates/polars-pipe/src/operators/chunks.rs | 12 +- crates/polars-plan/src/dsl/array.rs | 2 +- crates/polars-plan/src/dsl/expr.rs | 2 +- crates/polars-plan/src/dsl/expr_dyn_fn.rs | 40 +- .../polars-plan/src/dsl/function_expr/abs.rs | 4 +- .../src/dsl/function_expr/arg_where.rs | 6 +- .../src/dsl/function_expr/array.rs | 86 +- .../src/dsl/function_expr/binary.rs | 34 +- .../src/dsl/function_expr/boolean.rs | 84 +- .../src/dsl/function_expr/bounds.rs | 44 +- .../src/dsl/function_expr/business.rs | 29 +- .../polars-plan/src/dsl/function_expr/cat.rs | 6 +- .../polars-plan/src/dsl/function_expr/clip.rs | 19 +- .../src/dsl/function_expr/coerce.rs | 4 +- .../src/dsl/function_expr/concat.rs | 2 +- .../src/dsl/function_expr/correlation.rs | 58 +- .../polars-plan/src/dsl/function_expr/cum.rs | 25 +- .../polars-plan/src/dsl/function_expr/cut.rs | 37 + .../src/dsl/function_expr/datetime.rs | 266 ++-- .../src/dsl/function_expr/dispatch.rs | 166 ++- .../polars-plan/src/dsl/function_expr/ewm.rs | 12 +- .../src/dsl/function_expr/ewm_by.rs | 14 +- .../src/dsl/function_expr/fill_null.rs | 13 +- .../src/dsl/function_expr/fused.rs | 8 +- .../polars-plan/src/dsl/function_expr/list.rs | 164 ++- .../polars-plan/src/dsl/function_expr/log.rs | 20 +- .../polars-plan/src/dsl/function_expr/mod.rs | 44 +- .../polars-plan/src/dsl/function_expr/nan.rs | 6 +- .../src/dsl/function_expr/peaks.rs | 14 +- .../src/dsl/function_expr/plugin.rs | 9 +- .../polars-plan/src/dsl/function_expr/pow.rs | 54 +- .../src/dsl/function_expr/random.rs | 14 +- .../src/dsl/function_expr/range/date_range.rs | 10 +- .../dsl/function_expr/range/datetime_range.rs | 20 +- .../src/dsl/function_expr/range/int_range.rs | 10 +- .../src/dsl/function_expr/range/mod.rs | 4 +- .../src/dsl/function_expr/range/time_range.rs | 12 +- .../src/dsl/function_expr/range/utils.rs | 18 +- .../src/dsl/function_expr/rolling.rs | 58 +- .../src/dsl/function_expr/rolling_by.rs | 63 +- .../src/dsl/function_expr/round.rs | 53 +- .../src/dsl/function_expr/row_hash.rs | 8 +- .../src/dsl/function_expr/search_sorted.rs | 10 +- .../src/dsl/function_expr/shift_and_fill.rs | 19 +- .../src/dsl/function_expr/shrink_type.rs | 34 +- .../polars-plan/src/dsl/function_expr/sign.rs | 9 +- .../src/dsl/function_expr/strings.rs | 226 +-- .../src/dsl/function_expr/struct_.rs | 36 +- .../src/dsl/function_expr/temporal.rs | 12 +- .../src/dsl/function_expr/trigonometry.rs | 122 +- .../src/dsl/function_expr/unique.rs | 2 +- crates/polars-plan/src/dsl/functions/arity.rs | 10 +- .../src/dsl/functions/horizontal.rs | 66 +- .../polars-plan/src/dsl/functions/repeat.rs | 2 +- crates/polars-plan/src/dsl/list.rs | 2 +- crates/polars-plan/src/dsl/mod.rs | 83 +- crates/polars-plan/src/dsl/name.rs | 4 +- crates/polars-plan/src/dsl/python_udf.rs | 16 +- crates/polars-plan/src/dsl/udf.rs | 6 +- crates/polars-plan/src/plans/aexpr/mod.rs | 2 +- .../src/plans/conversion/type_coercion/mod.rs | 2 +- .../polars-plan/src/plans/functions/count.rs | 2 +- .../src/plans/functions/merge_sorted.rs | 14 +- crates/polars-plan/src/plans/lit.rs | 4 +- .../polars-plan/src/plans/python/pyarrow.rs | 4 +- .../polars-python/src/conversion/any_value.rs | 61 +- crates/polars-python/src/dataframe/general.rs | 11 +- crates/polars-python/src/expr/datetime.rs | 5 +- crates/polars-python/src/functions/lazy.rs | 36 +- .../polars-python/src/interop/arrow/to_py.rs | 6 +- .../src/interop/arrow/to_rust.rs | 8 +- .../src/interop/numpy/to_numpy_df.rs | 15 +- crates/polars-python/src/map/dataframe.rs | 10 +- crates/polars-python/src/map/lazy.rs | 14 +- crates/polars-python/src/map/mod.rs | 2 +- crates/polars-python/src/on_startup.rs | 8 +- crates/polars-python/src/series/general.rs | 2 +- crates/polars-python/src/series/mod.rs | 13 +- crates/polars-sql/src/context.rs | 8 +- crates/polars-sql/tests/issues.rs | 4 +- crates/polars-sql/tests/simple_exprs.rs | 4 +- crates/polars-sql/tests/statements.rs | 4 +- crates/polars-sql/tests/udf.rs | 16 +- .../nodes/parquet_source/row_group_decode.rs | 63 +- crates/polars-stream/src/nodes/reduce.rs | 2 +- crates/polars-stream/src/nodes/select.rs | 3 +- .../src/physical_plan/lower_expr.rs | 8 +- crates/polars-time/src/group_by/dynamic.rs | 82 +- crates/polars-time/src/upsample.rs | 4 +- crates/polars-utils/src/index.rs | 17 +- crates/polars/src/docs/eager.rs | 50 +- crates/polars/src/docs/lazy.rs | 9 +- crates/polars/src/lib.rs | 2 +- crates/polars/tests/it/core/date_like.rs | 2 +- crates/polars/tests/it/core/joins.rs | 91 +- crates/polars/tests/it/core/pivot.rs | 6 +- crates/polars/tests/it/io/csv.rs | 33 +- crates/polars/tests/it/io/ipc.rs | 6 +- crates/polars/tests/it/io/ipc_stream.rs | 2 +- crates/polars/tests/it/io/mod.rs | 4 +- crates/polars/tests/it/joins.rs | 6 +- crates/polars/tests/it/lazy/aggregation.rs | 8 +- crates/polars/tests/it/lazy/cwc.rs | 6 +- .../polars/tests/it/lazy/expressions/arity.rs | 4 +- .../tests/it/lazy/expressions/window.rs | 14 +- crates/polars/tests/it/lazy/exprs.rs | 12 +- crates/polars/tests/it/lazy/group_by.rs | 4 +- crates/polars/tests/it/lazy/queries.rs | 14 +- docs/src/rust/user-guide/expressions/lists.rs | 4 +- .../rust/user-guide/expressions/structs.rs | 4 +- .../transformations/time-series/rolling.rs | 2 +- .../transformations/time-series/timezones.rs | 2 +- py-polars/tests/unit/dataframe/test_serde.py | 1 + 249 files changed, 4605 insertions(+), 2124 deletions(-) create mode 100644 crates/polars-core/src/frame/column/arithmetic.rs create mode 100644 crates/polars-core/src/frame/column/mod.rs create mode 100644 crates/polars-core/src/scalar/from.rs create mode 100644 crates/polars-plan/src/dsl/function_expr/cut.rs diff --git a/crates/polars-arrow/src/bitmap/bitmask.rs b/crates/polars-arrow/src/bitmap/bitmask.rs index 67785a49eeda..4d6457c07956 100644 --- a/crates/polars-arrow/src/bitmap/bitmask.rs +++ b/crates/polars-arrow/src/bitmap/bitmask.rs @@ -14,7 +14,7 @@ fn nth_set_bit_u32(w: u32, n: u32) -> Option { // We use this by setting the first argument to 1 << n, which means the // first n-1 zero bits of it will spread to the first n-1 one bits of w, // after which the one bit will exactly get copied to the nth one bit of w. - #[cfg(target_feature = "bmi2")] + #[cfg(all(not(miri), target_feature = "bmi2"))] { if n >= 32 { return None; @@ -28,7 +28,7 @@ fn nth_set_bit_u32(w: u32, n: u32) -> Option { Some(nth_set_bit.trailing_zeros()) } - #[cfg(not(target_feature = "bmi2"))] + #[cfg(any(miri, not(target_feature = "bmi2")))] { // Each block of 2/4/8/16 bits contains how many set bits there are in that block. let set_per_2 = w - ((w >> 1) & 0x55555555); diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 53f6e85f221d..ea758742169e 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -125,7 +125,7 @@ fn cast_single_to_struct( new_fields.push(Series::full_null(fld.name.clone(), length, &fld.dtype)); } - StructChunked::from_series(name, &new_fields).map(|ca| ca.into_series()) + StructChunked::from_series(name, new_fields.iter()).map(|ca| ca.into_series()) } impl ChunkedArray diff --git a/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs b/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs index a0f4a4ef90db..7b851c5def54 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs @@ -66,7 +66,7 @@ impl CategoricalChunked { let mut counts = groups.group_count(); counts.rename(PlSmallStr::from_static("counts")); - let cols = vec![values.into_series(), counts.into_series()]; + let cols = vec![values.into_series().into(), counts.into_series().into()]; let df = unsafe { DataFrame::new_no_checks(cols) }; df.sort( ["counts"], diff --git a/crates/polars-core/src/chunked_array/logical/datetime.rs b/crates/polars-core/src/chunked_array/logical/datetime.rs index 1be7a1cf9747..fd99ac74ce0f 100644 --- a/crates/polars-core/src/chunked_array/logical/datetime.rs +++ b/crates/polars-core/src/chunked_array/logical/datetime.rs @@ -19,13 +19,13 @@ impl LogicalType for DatetimeChunked { fn get_any_value(&self, i: usize) -> PolarsResult> { self.0 .get_any_value(i) - .map(|av| av.as_datetime(self.time_unit(), self.time_zone())) + .map(|av| av.as_datetime(self.time_unit(), self.time_zone().as_ref())) } unsafe fn get_any_value_unchecked(&self, i: usize) -> AnyValue<'_> { self.0 .get_any_value_unchecked(i) - .as_datetime(self.time_unit(), self.time_zone()) + .as_datetime(self.time_unit(), self.time_zone().as_ref()) } fn cast_with_options( diff --git a/crates/polars-core/src/chunked_array/ndarray.rs b/crates/polars-core/src/chunked_array/ndarray.rs index 079061e31478..94889445f845 100644 --- a/crates/polars-core/src/chunked_array/ndarray.rs +++ b/crates/polars-core/src/chunked_array/ndarray.rs @@ -83,8 +83,8 @@ impl DataFrame { /// /// ```rust /// use polars_core::prelude::*; - /// let a = UInt32Chunked::new("a".into(), &[1, 2, 3]).into_series(); - /// let b = Float64Chunked::new("b".into(), &[10., 8., 6.]).into_series(); + /// let a = UInt32Chunked::new("a".into(), &[1, 2, 3]).into_column(); + /// let b = Float64Chunked::new("b".into(), &[10., 8., 6.]).into_column(); /// /// let df = DataFrame::new(vec![a, b]).unwrap(); /// let ndarray = df.to_ndarray::(IndexOrder::Fortran).unwrap(); @@ -108,7 +108,7 @@ impl DataFrame { let columns = self.get_columns(); POOL.install(|| { columns.par_iter().enumerate().try_for_each(|(col_idx, s)| { - let s = s.cast(&N::get_dtype())?; + let s = s.as_materialized_series().cast(&N::get_dtype())?; let s = match s.dtype() { DataType::Float32 => { let ca = s.f32().unwrap(); diff --git a/crates/polars-core/src/chunked_array/ops/any_value.rs b/crates/polars-core/src/chunked_array/ops/any_value.rs index 2a50b24d9bbf..ee9843ab58ad 100644 --- a/crates/polars-core/src/chunked_array/ops/any_value.rs +++ b/crates/polars-core/src/chunked_array/ops/any_value.rs @@ -100,7 +100,7 @@ pub(crate) unsafe fn arr_to_any_value<'a>( DataType::Datetime(tu, tz) => { let arr = &*(arr as *const dyn Array as *const Int64Array); let v = arr.value_unchecked(idx); - AnyValue::Datetime(v, *tu, tz) + AnyValue::Datetime(v, *tu, tz.as_ref()) }, #[cfg(feature = "dtype-date")] DataType::Date => { diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs index 7aa348d5e440..377b51afe134 100644 --- a/crates/polars-core/src/chunked_array/ops/fill_null.rs +++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs @@ -30,7 +30,7 @@ impl Series { /// ```rust /// # use polars_core::prelude::*; /// fn example() -> PolarsResult<()> { - /// let s = Series::new("some_missing".into(), &[Some(1), None, Some(2)]); + /// let s = Column::new("some_missing".into(), &[Some(1), None, Some(2)]); /// /// let filled = s.fill_null(FillNullStrategy::Forward(None))?; /// assert_eq!(Vec::from(filled.i32()?), &[Some(1), Some(1), Some(2)]); diff --git a/crates/polars-core/src/chunked_array/ops/full.rs b/crates/polars-core/src/chunked_array/ops/full.rs index 3f797d588e47..ee307cc3ca8e 100644 --- a/crates/polars-core/src/chunked_array/ops/full.rs +++ b/crates/polars-core/src/chunked_array/ops/full.rs @@ -192,8 +192,8 @@ impl ListChunked { #[cfg(feature = "dtype-struct")] impl ChunkFullNull for StructChunked { fn full_null(name: PlSmallStr, length: usize) -> StructChunked { - let s = vec![Series::new_null(PlSmallStr::EMPTY, length)]; - StructChunked::from_series(name, &s) + let s = [Series::new_null(PlSmallStr::EMPTY, length)]; + StructChunked::from_series(name, s.iter()) .unwrap() .with_outer_validity(Some(Bitmap::new_zeroed(length))) } diff --git a/crates/polars-core/src/chunked_array/ops/mod.rs b/crates/polars-core/src/chunked_array/ops/mod.rs index b252d23814eb..8da567d06491 100644 --- a/crates/polars-core/src/chunked_array/ops/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/mod.rs @@ -306,6 +306,7 @@ pub trait ChunkVar { /// fn filter_all_ones(df: &DataFrame) -> PolarsResult { /// let mask = df /// .column("column_a")? +/// .as_materialized_series() /// .equal(1)?; /// /// df.filter(&mask) @@ -384,7 +385,7 @@ pub trait ChunkSort { #[allow(unused_variables)] fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], _options: &SortMultipleOptions, ) -> PolarsResult { polars_bail!(opq = arg_sort_multiple, T::get_dtype()); diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs index e774c8ba51f3..cad95d6b1d10 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/arg_bottom_k.rs @@ -31,7 +31,7 @@ impl PartialOrd for CompareRow<'_> { /// Similar to .argsort() then .slice(0, k) but with a more efficient implementation. pub fn _arg_bottom_k( k: usize, - by_column: &[Series], + by_column: &[Column], sort_options: &mut SortMultipleOptions, ) -> PolarsResult> { let from_n_rows = by_column[0].len(); diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs index d659ebab7e69..5653039ff02e 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs @@ -8,7 +8,7 @@ use crate::utils::_split_offsets; pub(crate) fn args_validate( ca: &ChunkedArray, - other: &[Series], + other: &[Column], param_value: &[bool], param_name: &str, ) -> PolarsResult<()> { @@ -25,7 +25,7 @@ pub(crate) fn args_validate( pub(crate) fn arg_sort_multiple_impl( mut vals: Vec<(IdxSize, T)>, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { let nulls_last = &options.nulls_last; @@ -36,7 +36,7 @@ pub(crate) fn arg_sort_multiple_impl( let compare_inner: Vec<_> = by .iter() - .map(|s| s.into_total_ord_inner()) + .map(|s| s.as_materialized_series().into_total_ord_inner()) .collect_trusted(); let first_descending = descending[0]; @@ -198,7 +198,7 @@ pub fn _get_rows_encoded_unordered(by: &[Series]) -> PolarsResult { } pub fn _get_rows_encoded( - by: &[Series], + by: &[Column], descending: &[bool], nulls_last: &[bool], ) -> PolarsResult { @@ -209,6 +209,7 @@ pub fn _get_rows_encoded( let mut fields = Vec::with_capacity(by.len()); for ((by, desc), null_last) in by.iter().zip(descending).zip(nulls_last) { + let by = by.as_materialized_series(); let arr = _get_rows_encoded_compat_array(by)?; let sort_field = EncodingField { descending: *desc, @@ -236,7 +237,7 @@ pub fn _get_rows_encoded( pub fn _get_rows_encoded_ca( name: PlSmallStr, - by: &[Series], + by: &[Column], descending: &[bool], nulls_last: &[bool], ) -> PolarsResult { @@ -245,7 +246,7 @@ pub fn _get_rows_encoded_ca( } pub fn _get_rows_encoded_arr( - by: &[Series], + by: &[Column], descending: &[bool], nulls_last: &[bool], ) -> PolarsResult> { @@ -261,7 +262,7 @@ pub fn _get_rows_encoded_ca_unordered( } pub(crate) fn argsort_multiple_row_fmt( - by: &[Series], + by: &[Column], mut descending: Vec, mut nulls_last: Vec, parallel: bool, diff --git a/crates/polars-core/src/chunked_array/ops/sort/categorical.rs b/crates/polars-core/src/chunked_array/ops/sort/categorical.rs index 0dcb2cb84b51..a984c92147b9 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/categorical.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/categorical.rs @@ -76,7 +76,7 @@ impl CategoricalChunked { pub(crate) fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { if self.uses_lexical_ordering() { @@ -177,7 +177,7 @@ mod test { SortMultipleOptions::default().with_order_descending_multi([false, false]), )?; let out = out.column("cat")?; - let cat = out.categorical()?; + let cat = out.as_materialized_series().categorical()?; assert_order(cat, &["a", "a", "b", "c"]); let out = df.sort( @@ -185,7 +185,7 @@ mod test { SortMultipleOptions::default().with_order_descending_multi([false, false]), )?; let out = out.column("cat")?; - let cat = out.categorical()?; + let cat = out.as_materialized_series().categorical()?; assert_order(cat, &["b", "c", "a", "a"]); } Ok(()) diff --git a/crates/polars-core/src/chunked_array/ops/sort/mod.rs b/crates/polars-core/src/chunked_array/ops/sort/mod.rs index 1c1940b6f10d..0aa70dae1c83 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/mod.rs @@ -236,7 +236,7 @@ where fn arg_sort_multiple_numeric( ca: &ChunkedArray, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { args_validate(ca, by, &options.descending, "descending")?; @@ -294,7 +294,7 @@ where /// We assume that all numeric `Series` are of the same type, if not it will panic fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { arg_sort_multiple_numeric(self, by, options) @@ -349,7 +349,7 @@ impl ChunkSort for StringChunked { /// fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.as_binary().arg_sort_multiple(by, options) @@ -427,7 +427,7 @@ impl ChunkSort for BinaryChunked { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { args_validate(self, by, &options.descending, "descending")?; @@ -574,7 +574,7 @@ impl ChunkSort for BinaryOffsetChunked { /// uphold this contract. If not, it will panic. fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { args_validate(self, by, &options.descending, "descending")?; @@ -599,7 +599,7 @@ impl StructChunked { pub(crate) fn arg_sort(&self, options: SortOptions) -> IdxCa { let bin = _get_rows_encoded_ca( self.name().clone(), - &[self.clone().into_series()], + &[self.clone().into_column()], &[options.descending], &[options.nulls_last], ) @@ -692,7 +692,7 @@ impl ChunkSort for BooleanChunked { } fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { let mut vals = Vec::with_capacity(self.len()); @@ -724,7 +724,7 @@ pub(crate) fn convert_sort_column_multi_sort(s: &Series) -> PolarsResult .iter() .map(convert_sort_column_multi_sort) .collect::>>()?; - let mut out = StructChunked::from_series(ca.name().clone(), &new_fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), new_fields.iter())?; out.zip_outer_validity(ca); out.into_series() }, @@ -754,14 +754,16 @@ pub fn _broadcast_bools(n_cols: usize, values: &mut Vec) { } pub(crate) fn prepare_arg_sort( - columns: Vec, + columns: Vec, sort_options: &mut SortMultipleOptions, -) -> PolarsResult<(Series, Vec)> { +) -> PolarsResult<(Column, Vec)> { let n_cols = columns.len(); let mut columns = columns .iter() + .map(Column::as_materialized_series) .map(convert_sort_column_multi_sort) + .map(|s| s.map(Column::from)) .collect::>>()?; _broadcast_bools(n_cols, &mut sort_options.descending); @@ -881,11 +883,15 @@ mod test { PlSmallStr::from_static("c"), &["a", "b", "c", "d", "e", "f", "g", "h"], ); - let df = DataFrame::new(vec![a.into_series(), b.into_series(), c.into_series()])?; + let df = DataFrame::new(vec![ + a.into_series().into(), + b.into_series().into(), + c.into_series().into(), + ])?; let out = df.sort(["a", "b", "c"], SortMultipleOptions::default())?; assert_eq!( - Vec::from(out.column("b")?.i64()?), + Vec::from(out.column("b")?.as_series().unwrap().i64()?), &[ Some(0), Some(2), @@ -905,7 +911,7 @@ mod test { ) .into_series(); let b = Int32Chunked::new(PlSmallStr::from_static("b"), &[5, 4, 2, 3, 4, 5]).into_series(); - let df = DataFrame::new(vec![a, b])?; + let df = DataFrame::new(vec![a.into(), b.into()])?; let out = df.sort(["a", "b"], SortMultipleOptions::default())?; let expected = df!( diff --git a/crates/polars-core/src/chunked_array/ops/zip.rs b/crates/polars-core/src/chunked_array/ops/zip.rs index eb24468d892d..7fe09ba2c7d1 100644 --- a/crates/polars-core/src/chunked_array/ops/zip.rs +++ b/crates/polars-core/src/chunked_array/ops/zip.rs @@ -237,7 +237,7 @@ impl ChunkZip for StructChunked { .map(|(lhs, rhs)| lhs.zip_with_same_type(&mask, &rhs)) .collect::>>()?; - let mut out = StructChunked::from_series(self.name().clone(), &fields)?; + let mut out = StructChunked::from_series(self.name().clone(), fields.iter())?; // Zip the validities. if (l.null_count + r.null_count) > 0 { diff --git a/crates/polars-core/src/chunked_array/random.rs b/crates/polars-core/src/chunked_array/random.rs index 94ab33f02cee..1ad3d2b7abd7 100644 --- a/crates/polars-core/src/chunked_array/random.rs +++ b/crates/polars-core/src/chunked_array/random.rs @@ -193,7 +193,7 @@ impl DataFrame { match n.get(0) { Some(n) => self.sample_n_literal(n as usize, with_replacement, shuffle, seed), None => { - let new_cols = self.columns.iter().map(Series::clear).collect_trusted(); + let new_cols = self.columns.iter().map(Column::clear).collect_trusted(); Ok(unsafe { DataFrame::new_no_checks(new_cols) }) }, } @@ -238,7 +238,7 @@ impl DataFrame { self.sample_n_literal(n, with_replacement, shuffle, seed) }, None => { - let new_cols = self.columns.iter().map(Series::clear).collect_trusted(); + let new_cols = self.columns.iter().map(Column::clear).collect_trusted(); Ok(unsafe { DataFrame::new_no_checks(new_cols) }) }, } diff --git a/crates/polars-core/src/chunked_array/struct_/frame.rs b/crates/polars-core/src/chunked_array/struct_/frame.rs index 280a9df6da56..83f0f1299667 100644 --- a/crates/polars-core/src/chunked_array/struct_/frame.rs +++ b/crates/polars-core/src/chunked_array/struct_/frame.rs @@ -5,6 +5,6 @@ use crate::prelude::StructChunked; impl DataFrame { pub fn into_struct(self, name: PlSmallStr) -> StructChunked { - StructChunked::from_series(name, &self.columns).expect("same invariants") + StructChunked::from_columns(name, &self.columns).expect("same invariants") } } diff --git a/crates/polars-core/src/chunked_array/struct_/mod.rs b/crates/polars-core/src/chunked_array/struct_/mod.rs index 882251a43d6d..0c4eb50ddc58 100644 --- a/crates/polars-core/src/chunked_array/struct_/mod.rs +++ b/crates/polars-core/src/chunked_array/struct_/mod.rs @@ -18,21 +18,24 @@ use crate::utils::Container; pub type StructChunked = ChunkedArray; -fn constructor(name: PlSmallStr, fields: &[Series]) -> PolarsResult { +fn constructor<'a, I: ExactSizeIterator + Clone>( + name: PlSmallStr, + fields: I, +) -> PolarsResult { // Different chunk lengths: rechunk and recurse. - if !fields.iter().map(|s| s.n_chunks()).all_equal() { - let fields = fields.iter().map(|s| s.rechunk()).collect::>(); - return constructor(name, &fields); + if !fields.clone().map(|s| s.n_chunks()).all_equal() { + let fields = fields.map(|s| s.rechunk()).collect::>(); + return constructor(name, fields.iter()); } - let n_chunks = fields[0].n_chunks(); - let dtype = DataType::Struct(fields.iter().map(|s| s.field().into_owned()).collect()); + let n_chunks = fields.clone().next().unwrap().n_chunks(); + let dtype = DataType::Struct(fields.clone().map(|s| s.field().into_owned()).collect()); let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest()); let chunks = (0..n_chunks) .map(|c_i| { let fields = fields - .iter() + .clone() .map(|field| field.chunks()[c_i].clone()) .collect::>(); @@ -55,21 +58,28 @@ fn constructor(name: PlSmallStr, fields: &[Series]) -> PolarsResult { - let fields = fields.iter().map(|s| s.rechunk()).collect::>(); - constructor(name, &fields) + let fields = fields.map(|s| s.rechunk()).collect::>(); + constructor(name, fields.iter()) }, } } impl StructChunked { - pub fn from_series(name: PlSmallStr, fields: &[Series]) -> PolarsResult { + pub fn from_columns(name: PlSmallStr, fields: &[Column]) -> PolarsResult { + Self::from_series(name, fields.iter().map(|c| c.as_materialized_series())) + } + + pub fn from_series<'a, I: ExactSizeIterator + Clone>( + name: PlSmallStr, + fields: I, + ) -> PolarsResult { let mut names = PlHashSet::with_capacity(fields.len()); - let first_len = fields.first().map(|s| s.len()).unwrap_or(0); + let first_len = fields.clone().next().map(|s| s.len()).unwrap_or(0); let mut max_len = first_len; let mut all_equal_len = true; let mut is_empty = false; - for s in fields { + for s in fields.clone() { let s_len = s.len(); max_len = std::cmp::max(max_len, s_len); @@ -108,10 +118,10 @@ impl StructChunked { ); } } - constructor(name, &new_fields) - } else if fields.is_empty() { - let fields = &[Series::new_null(PlSmallStr::EMPTY, 0)]; - constructor(name, fields) + constructor(name, new_fields.iter()) + } else if fields.len() == 0 { + let fields = [Series::new_null(PlSmallStr::EMPTY, 0)]; + constructor(name, fields.iter()) } else { constructor(name, fields) } @@ -175,7 +185,7 @@ impl StructChunked { }) .collect::>>()?; - let mut out = Self::from_series(self.name().clone(), &new_fields)?; + let mut out = Self::from_series(self.name().clone(), new_fields.iter())?; if self.null_count > 0 { out.zip_outer_validity(self); } @@ -231,7 +241,7 @@ impl StructChunked { } }) .collect::>>()?; - let mut out = Self::from_series(self.name().clone(), &fields)?; + let mut out = Self::from_series(self.name().clone(), fields.iter())?; if self.null_count > 0 { out.zip_outer_validity(self); } @@ -276,7 +286,7 @@ impl StructChunked { .iter() .map(func) .collect::>>()?; - Self::from_series(self.name().clone(), &fields).map(|mut ca| { + Self::from_series(self.name().clone(), fields.iter()).map(|mut ca| { if self.null_count > 0 { // SAFETY: we don't change types/ lengths. unsafe { @@ -290,15 +300,15 @@ impl StructChunked { } pub fn get_row_encoded_array(&self, options: SortOptions) -> PolarsResult> { - let s = self.clone().into_series(); - _get_rows_encoded_arr(&[s], &[options.descending], &[options.nulls_last]) + let c = self.clone().into_column(); + _get_rows_encoded_arr(&[c], &[options.descending], &[options.nulls_last]) } pub fn get_row_encoded(&self, options: SortOptions) -> PolarsResult { - let s = self.clone().into_series(); + let c = self.clone().into_column(); _get_rows_encoded_ca( self.name().clone(), - &[s], + &[c], &[options.descending], &[options.nulls_last], ) @@ -346,8 +356,15 @@ impl StructChunked { } pub fn unnest(self) -> DataFrame { + // @scalar-opt + let columns = self + .fields_as_series() + .into_iter() + .map(Column::from) + .collect(); + // SAFETY: invariants for struct are the same - unsafe { DataFrame::new_no_checks(self.fields_as_series()) } + unsafe { DataFrame::new_no_checks(columns) } } /// Get access to one of this `[StructChunked]`'s fields diff --git a/crates/polars-core/src/chunked_array/temporal/mod.rs b/crates/polars-core/src/chunked_array/temporal/mod.rs index e3ab1c01c164..ad35aa90899e 100644 --- a/crates/polars-core/src/chunked_array/temporal/mod.rs +++ b/crates/polars-core/src/chunked_array/temporal/mod.rs @@ -64,7 +64,7 @@ pub fn parse_time_zone(tz: &str) -> PolarsResult { /// /// E.g. +01:00 -> Etc/GMT-1 /// -/// Note: the sign appears reversed, but is correct, see https://en.wikipedia.org/wiki/Tz_database#Area: +/// Note: the sign appears reversed, but is correct, see : /// > In order to conform with the POSIX style, those zone names beginning with /// > "Etc/GMT" have their sign reversed from the standard ISO 8601 convention. /// > In the "Etc" area, zones west of GMT have a positive sign and those east diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 5721ee2db2a9..05ad0647dbcf 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + #[cfg(feature = "dtype-struct")] use arrow::legacy::trusted_len::TrustedLenPush; use arrow::types::PrimitiveType; @@ -60,8 +62,12 @@ pub enum AnyValue<'a> { /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) /// in nanoseconds (64 bits). #[cfg(feature = "dtype-datetime")] - Datetime(i64, TimeUnit, &'a Option), - // A 64-bit integer representing difference between date-times in [`TimeUnit`] + Datetime(i64, TimeUnit, Option<&'a TimeZone>), + /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) + /// in nanoseconds (64 bits). + #[cfg(feature = "dtype-datetime")] + DatetimeOwned(i64, TimeUnit, Option>), + /// A 64-bit integer representing difference between date-times in [`TimeUnit`] #[cfg(feature = "dtype-duration")] Duration(i64, TimeUnit), /// A 64-bit time representing the elapsed time since midnight in nanoseconds @@ -71,8 +77,14 @@ pub enum AnyValue<'a> { // otherwise it is in the array pointer #[cfg(feature = "dtype-categorical")] Categorical(u32, &'a RevMapping, SyncPtr), + // If syncptr is_null the data is in the rev-map + // otherwise it is in the array pointer + #[cfg(feature = "dtype-categorical")] + CategoricalOwned(u32, Arc, SyncPtr), #[cfg(feature = "dtype-categorical")] Enum(u32, &'a RevMapping, SyncPtr), + #[cfg(feature = "dtype-categorical")] + EnumOwned(u32, Arc, SyncPtr), /// Nested type, contains arrays that are filled with one of the datatypes. List(Series), #[cfg(feature = "dtype-array")] @@ -390,13 +402,19 @@ impl<'a> AnyValue<'a> { #[cfg(feature = "dtype-time")] Time(_) => DataType::Time, #[cfg(feature = "dtype-datetime")] - Datetime(_, tu, tz) => DataType::Datetime(*tu, (*tz).clone()), + Datetime(_, tu, tz) => DataType::Datetime(*tu, (*tz).cloned()), + #[cfg(feature = "dtype-datetime")] + DatetimeOwned(_, tu, tz) => { + DataType::Datetime(*tu, tz.as_ref().map(|v| v.as_ref().clone())) + }, #[cfg(feature = "dtype-duration")] Duration(_, tu) => DataType::Duration(*tu), #[cfg(feature = "dtype-categorical")] - Categorical(_, _, _) => DataType::Categorical(None, Default::default()), + Categorical(_, _, _) | CategoricalOwned(_, _, _) => { + DataType::Categorical(None, Default::default()) + }, #[cfg(feature = "dtype-categorical")] - Enum(_, _, _) => DataType::Enum(None, Default::default()), + Enum(_, _, _) | EnumOwned(_, _, _) => DataType::Enum(None, Default::default()), List(s) => DataType::List(Box::new(s.dtype().clone())), #[cfg(feature = "dtype-array")] Array(s, size) => DataType::Array(Box::new(s.dtype().clone()), *size), @@ -432,7 +450,7 @@ impl<'a> AnyValue<'a> { #[cfg(feature = "dtype-date")] Date(v) => NumCast::from(*v), #[cfg(feature = "dtype-datetime")] - Datetime(v, _, _) => NumCast::from(*v), + Datetime(v, _, _) | DatetimeOwned(v, _, _) => NumCast::from(*v), #[cfg(feature = "dtype-time")] Time(v) => NumCast::from(*v), #[cfg(feature = "dtype-duration")] @@ -566,7 +584,7 @@ impl<'a> AnyValue<'a> { // to datetime #[cfg(feature = "dtype-datetime")] (av, DataType::Datetime(tu, tz)) if av.is_numeric() => { - AnyValue::Datetime(av.extract::()?, *tu, tz) + AnyValue::Datetime(av.extract::()?, *tu, tz.as_ref()) }, #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))] (AnyValue::Date(v), DataType::Datetime(tu, _)) => AnyValue::Datetime( @@ -576,10 +594,13 @@ impl<'a> AnyValue<'a> { TimeUnit::Milliseconds => (*v as i64) * MS_IN_DAY, }, *tu, - &None, + None, ), #[cfg(feature = "dtype-datetime")] - (AnyValue::Datetime(v, tu, _), DataType::Datetime(tu_r, tz_r)) => AnyValue::Datetime( + ( + AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), + DataType::Datetime(tu_r, tz_r), + ) => AnyValue::Datetime( match (tu, tu_r) { (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64, (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64, @@ -590,28 +611,32 @@ impl<'a> AnyValue<'a> { _ => *v, }, *tu_r, - tz_r, + tz_r.as_ref(), ), // to date #[cfg(feature = "dtype-date")] (av, DataType::Date) if av.is_numeric() => AnyValue::Date(av.extract::()?), #[cfg(all(feature = "dtype-date", feature = "dtype-datetime"))] - (AnyValue::Datetime(v, tu, _), DataType::Date) => AnyValue::Date(match tu { - TimeUnit::Nanoseconds => *v / NS_IN_DAY, - TimeUnit::Microseconds => *v / US_IN_DAY, - TimeUnit::Milliseconds => *v / MS_IN_DAY, - } as i32), + (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Date) => { + AnyValue::Date(match tu { + TimeUnit::Nanoseconds => *v / NS_IN_DAY, + TimeUnit::Microseconds => *v / US_IN_DAY, + TimeUnit::Milliseconds => *v / MS_IN_DAY, + } as i32) + }, // to time #[cfg(feature = "dtype-time")] (av, DataType::Time) if av.is_numeric() => AnyValue::Time(av.extract::()?), #[cfg(all(feature = "dtype-time", feature = "dtype-datetime"))] - (AnyValue::Datetime(v, tu, _), DataType::Time) => AnyValue::Time(match tu { - TimeUnit::Nanoseconds => *v % NS_IN_DAY, - TimeUnit::Microseconds => (*v % US_IN_DAY) * 1_000i64, - TimeUnit::Milliseconds => (*v % MS_IN_DAY) * 1_000_000i64, - }), + (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Time) => { + AnyValue::Time(match tu { + TimeUnit::Nanoseconds => *v % NS_IN_DAY, + TimeUnit::Microseconds => (*v % US_IN_DAY) * 1_000i64, + TimeUnit::Milliseconds => (*v % MS_IN_DAY) * 1_000_000i64, + }) + }, // to duration #[cfg(feature = "dtype-duration")] @@ -693,6 +718,41 @@ impl<'a> AnyValue<'a> { None => AnyValue::Null, } } + + pub fn idx(&self) -> IdxSize { + match self { + #[cfg(not(feature = "bigidx"))] + Self::UInt32(v) => *v, + #[cfg(feature = "bigidx")] + Self::UInt64(v) => *v, + _ => panic!("expected index type found {self:?}"), + } + } + + pub fn str_value(&self) -> Cow<'a, str> { + match self { + Self::String(s) => Cow::Borrowed(s), + Self::StringOwned(s) => Cow::Owned(s.to_string()), + Self::Null => Cow::Borrowed("null"), + #[cfg(feature = "dtype-categorical")] + Self::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => { + if arr.is_null() { + Cow::Borrowed(rev.get(*idx)) + } else { + unsafe { Cow::Borrowed(arr.deref_unchecked().value(*idx as usize)) } + } + }, + #[cfg(feature = "dtype-categorical")] + Self::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => { + if arr.is_null() { + Cow::Owned(rev.get(*idx).to_string()) + } else { + unsafe { Cow::Borrowed(arr.deref_unchecked().value(*idx as usize)) } + } + }, + av => Cow::Owned(av.to_string()), + } + } } impl From> for DataType { @@ -747,6 +807,12 @@ impl AnyValue<'_> { tu.hash(state); tz.hash(state); }, + #[cfg(feature = "dtype-datetime")] + DatetimeOwned(v, tu, tz) => { + v.hash(state); + tu.hash(state); + tz.hash(state); + }, #[cfg(feature = "dtype-duration")] Duration(v, tz) => { v.hash(state); @@ -755,7 +821,10 @@ impl AnyValue<'_> { #[cfg(feature = "dtype-time")] Time(v) => v.hash(state), #[cfg(feature = "dtype-categorical")] - Categorical(v, _, _) | Enum(v, _, _) => v.hash(state), + Categorical(v, _, _) + | CategoricalOwned(v, _, _) + | Enum(v, _, _) + | EnumOwned(v, _, _) => v.hash(state), #[cfg(feature = "object")] Object(_) => {}, #[cfg(feature = "object")] @@ -812,7 +881,7 @@ impl<'a> AnyValue<'a> { } } #[cfg(feature = "dtype-datetime")] - pub(crate) fn as_datetime(&self, tu: TimeUnit, tz: &'a Option) -> AnyValue<'a> { + pub(crate) fn as_datetime(&self, tu: TimeUnit, tz: Option<&'a TimeZone>) -> AnyValue<'a> { match self { AnyValue::Int64(v) => AnyValue::Datetime(*v, tu, tz), AnyValue::Null => AnyValue::Null, @@ -875,6 +944,16 @@ impl<'a> AnyValue<'a> { match self { AnyValue::BinaryOwned(data) => AnyValue::Binary(data), AnyValue::StringOwned(data) => AnyValue::String(data.as_str()), + #[cfg(feature = "dtype-datetime")] + AnyValue::DatetimeOwned(v, tu, tz) => { + AnyValue::Datetime(*v, *tu, tz.as_ref().map(AsRef::as_ref)) + }, + #[cfg(feature = "dtype-categorical")] + AnyValue::CategoricalOwned(v, rev, arr) => { + AnyValue::Categorical(*v, rev.as_ref(), *arr) + }, + #[cfg(feature = "dtype-categorical")] + AnyValue::EnumOwned(v, rev, arr) => AnyValue::Enum(*v, rev.as_ref(), *arr), av => av.clone(), } } @@ -897,8 +976,14 @@ impl<'a> AnyValue<'a> { Boolean(v) => Boolean(v), Float32(v) => Float32(v), Float64(v) => Float64(v), + #[cfg(feature = "dtype-datetime")] + Datetime(v, tu, tz) => DatetimeOwned(v, tu, tz.map(|v| Arc::new(v.clone()))), + #[cfg(feature = "dtype-datetime")] + DatetimeOwned(v, tu, tz) => DatetimeOwned(v, tu, tz), #[cfg(feature = "dtype-date")] Date(v) => Date(v), + #[cfg(feature = "dtype-duration")] + Duration(v, tu) => Duration(v, tu), #[cfg(feature = "dtype-time")] Time(v) => Time(v), List(v) => List(v), @@ -912,7 +997,7 @@ impl<'a> AnyValue<'a> { Object(v) => ObjectOwned(OwnedObject(v.to_boxed())), #[cfg(feature = "dtype-struct")] Struct(idx, arr, fields) => { - let avs = struct_to_avs_static(idx, arr, fields); + let avs = struct_to_avs_static(idx, arr, fields)?; StructOwned(Box::new((avs, fields.to_vec()))) }, #[cfg(feature = "dtype-struct")] @@ -929,8 +1014,14 @@ impl<'a> AnyValue<'a> { }, #[cfg(feature = "dtype-decimal")] Decimal(val, scale) => Decimal(val, scale), - #[allow(unreachable_patterns)] - dt => polars_bail!(ComputeError: "cannot get static any-value from {}", dt), + #[cfg(feature = "dtype-categorical")] + Categorical(v, rev, arr) => CategoricalOwned(v, Arc::new(rev.clone()), arr), + #[cfg(feature = "dtype-categorical")] + CategoricalOwned(v, rev, arr) => CategoricalOwned(v, rev, arr), + #[cfg(feature = "dtype-categorical")] + Enum(v, rev, arr) => EnumOwned(v, Arc::new(rev.clone()), arr), + #[cfg(feature = "dtype-categorical")] + EnumOwned(v, rev, arr) => EnumOwned(v, rev, arr), }; Ok(av) } @@ -949,6 +1040,15 @@ impl<'a> AnyValue<'a> { }; Some(s) }, + #[cfg(feature = "dtype-categorical")] + AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => { + let s = if arr.is_null() { + rev.get(*idx) + } else { + unsafe { arr.deref_unchecked().value(*idx as usize) } + }; + Some(s) + }, _ => None, } } @@ -981,6 +1081,22 @@ impl AnyValue<'_> { (l, BinaryOwned(r)) => *l == AnyValue::Binary(r.as_slice()), #[cfg(feature = "object")] (l, ObjectOwned(r)) => *l == AnyValue::Object(&*r.0), + #[cfg(feature = "dtype-datetime")] + (DatetimeOwned(lv, ltu, ltz), r) => { + Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())) == *r + }, + #[cfg(feature = "dtype-datetime")] + (l, DatetimeOwned(rv, rtu, rtz)) => { + *l == Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref())) + }, + #[cfg(feature = "dtype-categorical")] + (CategoricalOwned(lv, lrev, larr), r) => Categorical(*lv, lrev.as_ref(), *larr) == *r, + #[cfg(feature = "dtype-categorical")] + (l, CategoricalOwned(rv, rrev, rarr)) => *l == Categorical(*rv, rrev.as_ref(), *rarr), + #[cfg(feature = "dtype-categorical")] + (EnumOwned(lv, lrev, larr), r) => Enum(*lv, lrev.as_ref(), *larr) == *r, + #[cfg(feature = "dtype-categorical")] + (l, EnumOwned(rv, rrev, rarr)) => *l == Enum(*rv, rrev.as_ref(), *rarr), // Comparison with null. (Null, Null) => null_equal, @@ -1111,6 +1227,26 @@ impl PartialOrd for AnyValue<'_> { (l, BinaryOwned(r)) => l.partial_cmp(&AnyValue::Binary(r.as_slice())), #[cfg(feature = "object")] (l, ObjectOwned(r)) => l.partial_cmp(&AnyValue::Object(&*r.0)), + #[cfg(feature = "dtype-datetime")] + (DatetimeOwned(lv, ltu, ltz), r) => { + Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())).partial_cmp(r) + }, + #[cfg(feature = "dtype-datetime")] + (l, DatetimeOwned(rv, rtu, rtz)) => { + l.partial_cmp(&Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref()))) + }, + #[cfg(feature = "dtype-categorical")] + (CategoricalOwned(lv, lrev, larr), r) => { + Categorical(*lv, lrev.as_ref(), *larr).partial_cmp(r) + }, + #[cfg(feature = "dtype-categorical")] + (l, CategoricalOwned(rv, rrev, rarr)) => { + l.partial_cmp(&Categorical(*rv, rrev.as_ref(), *rarr)) + }, + #[cfg(feature = "dtype-categorical")] + (EnumOwned(lv, lrev, larr), r) => Enum(*lv, lrev.as_ref(), *larr).partial_cmp(r), + #[cfg(feature = "dtype-categorical")] + (l, EnumOwned(rv, rrev, rarr)) => l.partial_cmp(&Enum(*rv, rrev.as_ref(), *rarr)), // Comparison with null. (Null, Null) => Some(Ordering::Equal), @@ -1224,7 +1360,11 @@ impl TotalEq for AnyValue<'_> { } #[cfg(feature = "dtype-struct")] -fn struct_to_avs_static(idx: usize, arr: &StructArray, fields: &[Field]) -> Vec> { +fn struct_to_avs_static( + idx: usize, + arr: &StructArray, + fields: &[Field], +) -> PolarsResult>> { let arrs = arr.values(); let mut avs = Vec::with_capacity(arrs.len()); // amortize loop counter @@ -1233,10 +1373,10 @@ fn struct_to_avs_static(idx: usize, arr: &StructArray, fields: &[Field]) -> Vec< let arr = &**arrs.get_unchecked_release(i); let field = fields.get_unchecked_release(i); let av = arr_to_any_value(arr, idx, &field.dtype); - avs.push_unchecked(av.into_static().unwrap()); + avs.push_unchecked(av.into_static()?); } } - avs + Ok(avs) } #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index 00455a1a841a..c930b9e94da7 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -616,9 +616,9 @@ impl Display for DataFrame { for i in 0..(half + rest) { let row = self - .columns + .get_columns() .iter() - .map(|s| s.str_value(i).unwrap()) + .map(|c| c.str_value(i).unwrap()) .collect(); let row_strings = @@ -630,9 +630,9 @@ impl Display for DataFrame { rows.push(dots); for i in (height - half)..height { let row = self - .columns + .get_columns() .iter() - .map(|s| s.str_value(i).unwrap()) + .map(|c| c.str_value(i).unwrap()) .collect(); let row_strings = @@ -644,8 +644,7 @@ impl Display for DataFrame { for i in 0..height { if self.width() > 0 { let row = self - .columns - .iter() + .materialized_column_iter() .map(|s| s.str_value(i).unwrap()) .collect(); @@ -908,6 +907,24 @@ fn fmt_float(f: &mut Formatter<'_>, width: usize, v: T) -> fmt } } +#[cfg(feature = "dtype-datetime")] +fn fmt_datetime( + f: &mut Formatter<'_>, + v: i64, + tu: TimeUnit, + tz: Option<&self::datatypes::TimeZone>, +) -> fmt::Result { + let ndt = match tu { + TimeUnit::Nanoseconds => timestamp_ns_to_datetime(v), + TimeUnit::Microseconds => timestamp_us_to_datetime(v), + TimeUnit::Milliseconds => timestamp_ms_to_datetime(v), + }; + match tz { + None => std::fmt::Display::fmt(&ndt, f), + Some(tz) => PlTzAware::new(ndt, tz).fmt(f), + } +} + #[cfg(feature = "dtype-duration")] const NAMES: [&str; 4] = ["d", "h", "m", "s"]; #[cfg(feature = "dtype-duration")] @@ -1024,18 +1041,10 @@ impl Display for AnyValue<'_> { #[cfg(feature = "dtype-date")] AnyValue::Date(v) => write!(f, "{}", date32_to_date(*v)), #[cfg(feature = "dtype-datetime")] - AnyValue::Datetime(v, tu, tz) => { - let ndt = match tu { - TimeUnit::Nanoseconds => timestamp_ns_to_datetime(*v), - TimeUnit::Microseconds => timestamp_us_to_datetime(*v), - TimeUnit::Milliseconds => timestamp_ms_to_datetime(*v), - }; - match tz { - None => write!(f, "{ndt}"), - Some(tz) => { - write!(f, "{}", PlTzAware::new(ndt, tz)) - }, - } + AnyValue::Datetime(v, tu, tz) => fmt_datetime(f, *v, *tu, *tz), + #[cfg(feature = "dtype-datetime")] + AnyValue::DatetimeOwned(v, tu, tz) => { + fmt_datetime(f, *v, *tu, tz.as_ref().map(|v| v.as_ref())) }, #[cfg(feature = "dtype-duration")] AnyValue::Duration(v, tu) => match tu { @@ -1049,7 +1058,10 @@ impl Display for AnyValue<'_> { write!(f, "{nt}") }, #[cfg(feature = "dtype-categorical")] - AnyValue::Categorical(_, _, _) | AnyValue::Enum(_, _, _) => { + AnyValue::Categorical(_, _, _) + | AnyValue::CategoricalOwned(_, _, _) + | AnyValue::Enum(_, _, _) + | AnyValue::EnumOwned(_, _, _) => { let s = self.get_str().unwrap(); write!(f, "\"{s}\"") }, diff --git a/crates/polars-core/src/frame/arithmetic.rs b/crates/polars-core/src/frame/arithmetic.rs index 69e2279cd47f..6d184b2960c9 100644 --- a/crates/polars-core/src/frame/arithmetic.rs +++ b/crates/polars-core/src/frame/arithmetic.rs @@ -20,9 +20,9 @@ macro_rules! impl_arithmetic { let rhs = $rhs.cast(&st)?; let cols = POOL.install(|| { $self - .columns - .par_iter() + .par_materialized_column_iter() .map(|s| $operand(&s.cast(&st)?, &rhs)) + .map(|s| s.map(Column::from)) .collect::>() })?; Ok(unsafe { DataFrame::new_no_checks(cols) }) @@ -122,6 +122,9 @@ impl DataFrame { .par_iter() .zip(other.get_columns().par_iter()) .map(|(l, r)| { + let l = l.as_materialized_series(); + let r = r.as_materialized_series(); + let diff_l = max_len - l.len(); let diff_r = max_len - r.len(); @@ -136,7 +139,7 @@ impl DataFrame { r = r.extend_constant(AnyValue::Null, diff_r)?; }; - f(&l, &r) + f(&l, &r).map(Column::from) }); let mut cols = POOL.install(|| cols.collect::>>())?; @@ -152,7 +155,7 @@ impl DataFrame { // trick to fill a series with nulls let vals: &[Option] = &[None]; let s = Series::new(name.clone(), vals).cast(dtype)?; - cols.push(s.new_from_index(0, max_len)) + cols.push(s.new_from_index(0, max_len).into()) } } DataFrame::new(cols) diff --git a/crates/polars-core/src/frame/chunks.rs b/crates/polars-core/src/frame/chunks.rs index 349a77c56d75..704df0b7d140 100644 --- a/crates/polars-core/src/frame/chunks.rs +++ b/crates/polars-core/src/frame/chunks.rs @@ -9,12 +9,12 @@ impl TryFrom<(RecordBatch, &ArrowSchema)> for DataFrame { type Error = PolarsError; fn try_from(arg: (RecordBatch, &ArrowSchema)) -> PolarsResult { - let columns: PolarsResult> = arg + let columns: PolarsResult> = arg .0 .columns() .iter() .zip(arg.1.iter_values()) - .map(|(arr, field)| Series::try_from((field, arr.clone()))) + .map(|(arr, field)| Series::try_from((field, arr.clone())).map(Column::from)) .collect(); DataFrame::new(columns?) @@ -29,7 +29,8 @@ impl DataFrame { let columns = self .get_columns() .iter() - .map(|s| s.select_chunk(i)) + .map(|column| column.as_materialized_series().select_chunk(i)) + .map(Column::from) .collect::>(); DataFrame::new_no_checks(columns) diff --git a/crates/polars-core/src/frame/column/arithmetic.rs b/crates/polars-core/src/frame/column/arithmetic.rs new file mode 100644 index 000000000000..79fd0053b320 --- /dev/null +++ b/crates/polars-core/src/frame/column/arithmetic.rs @@ -0,0 +1,154 @@ +use num_traits::{Num, NumCast}; +use polars_error::{polars_bail, PolarsResult}; + +use super::{Column, ScalarColumn, Series}; +use crate::utils::Container; + +fn output_length(a: &Column, b: &Column) -> PolarsResult { + match (a.len(), b.len()) { + // broadcasting + (1, o) | (o, 1) => Ok(o), + // equal + (a, b) if a == b => Ok(a), + // unequal + (a, b) => { + polars_bail!(InvalidOperation: "cannot do arithmetic operation on series of different lengths: got {} and {}", a, b) + }, + } +} + +fn unit_series_op PolarsResult>( + l: &Series, + r: &Series, + op: F, + length: usize, +) -> PolarsResult { + debug_assert!(l.len() <= 1); + debug_assert!(r.len() <= 1); + + op(l, r) + .and_then(|s| ScalarColumn::from_single_value_series(s, length)) + .map(Column::from) +} + +fn op_with_broadcast PolarsResult>( + l: &Column, + r: &Column, + op: F, +) -> PolarsResult { + // Here we rely on the underlying broadcast operations. + + let length = output_length(l, r)?; + match (l, r) { + (Column::Series(l), Column::Series(r)) => op(l, r).map(Column::from), + (Column::Series(l), Column::Scalar(r)) => { + let r = r.as_single_value_series(); + if l.len() == 1 { + unit_series_op(l, &r, op, length) + } else { + op(l, &r).map(Column::from) + } + }, + (Column::Scalar(l), Column::Series(r)) => { + let l = l.as_single_value_series(); + if r.len() == 1 { + unit_series_op(&l, r, op, length) + } else { + op(&l, r).map(Column::from) + } + }, + (Column::Scalar(l), Column::Scalar(r)) => unit_series_op( + &l.as_single_value_series(), + &r.as_single_value_series(), + op, + length, + ), + } +} + +fn num_op_with_broadcast Series>( + c: &'_ Column, + n: T, + op: F, +) -> PolarsResult { + match c { + Column::Series(s) => Ok(op(s, n).into()), + Column::Scalar(s) => { + ScalarColumn::from_single_value_series(op(&s.as_single_value_series(), n), s.length) + .map(Column::from) + }, + } +} + +macro_rules! broadcastable_ops { + ($(($trait:ident, $op:ident))+) => { + $( + impl std::ops::$trait for Column { + type Output = PolarsResult; + + #[inline] + fn $op(self, rhs: Self) -> Self::Output { + op_with_broadcast(&self, &rhs, |l, r| l.$op(r)) + } + } + + impl std::ops::$trait for &Column { + type Output = PolarsResult; + + #[inline] + fn $op(self, rhs: Self) -> Self::Output { + op_with_broadcast(self, rhs, |l, r| l.$op(r)) + } + } + )+ + } +} + +macro_rules! broadcastable_num_ops { + ($(($trait:ident, $op:ident))+) => { + $( + impl std::ops::$trait:: for Column + where + T: Num + NumCast, + { + type Output = PolarsResult; + + #[inline] + fn $op(self, rhs: T) -> Self::Output { + num_op_with_broadcast(&self, rhs, |l, r| l.$op(r)) + } + } + + impl std::ops::$trait:: for &Column + where + T: Num + NumCast, + { + type Output = PolarsResult; + + #[inline] + fn $op(self, rhs: T) -> Self::Output { + num_op_with_broadcast(self, rhs, |l, r| l.$op(r)) + } + } + )+ + }; +} + +broadcastable_ops! { + (Add, add) + (Sub, sub) + (Mul, mul) + (Div, div) + (Rem, rem) + (BitAnd, bitand) + (BitOr, bitor) + (BitXor, bitxor) +} + +broadcastable_num_ops! { + (Add, add) + (Sub, sub) + (Mul, mul) + (Div, div) + (Rem, rem) +} diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs new file mode 100644 index 000000000000..2296b09a4a03 --- /dev/null +++ b/crates/polars-core/src/frame/column/mod.rs @@ -0,0 +1,1266 @@ +use std::borrow::Cow; +use std::sync::OnceLock; + +use num_traits::{Num, NumCast}; +use polars_error::PolarsResult; +use polars_utils::index::check_bounds; +use polars_utils::pl_str::PlSmallStr; + +use self::gather::check_bounds_ca; +use crate::chunked_array::cast::CastOptions; +use crate::chunked_array::metadata::MetadataFlags; +use crate::prelude::*; +use crate::series::{BitRepr, IsSorted, SeriesPhysIter}; +use crate::utils::{slice_offsets, Container}; +use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH}; + +mod arithmetic; + +/// A column within a [`DataFrame`]. +/// +/// This is lazily initialized to a [`Series`] with methods like +/// [`as_materialized_series`][Column::as_materialized_series] and +/// [`take_materialized_series`][Column::take_materialized_series]. +/// +/// Currently, there are two ways to represent a [`Column`]. +/// 1. A [`Series`] of values +/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`] +#[derive(Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[cfg_attr(feature = "serde", serde(from = "Series"))] +#[cfg_attr(feature = "serde", serde(into = "_SerdeSeries"))] +pub enum Column { + Series(Series), + Scalar(ScalarColumn), +} + +/// A [`Column`] that consists of a repeated [`Scalar`] +/// +/// This is lazily materialized into a [`Series`]. +#[derive(Debug, Clone)] +pub struct ScalarColumn { + name: PlSmallStr, + // The value of this scalar may be incoherent when `length == 0`. + scalar: Scalar, + length: usize, + + // invariants: + // materialized.name() == name + // materialized.len() == length + // materialized.dtype() == value.dtype + // materialized[i] == value, for all 0 <= i < length + /// A lazily materialized [`Series`] variant of this [`ScalarColumn`] + materialized: OnceLock, +} + +/// Convert `Self` into a [`Column`] +pub trait IntoColumn: Sized { + fn into_column(self) -> Column; +} + +impl Column { + #[inline] + pub fn new(name: PlSmallStr, values: T) -> Self + where + Phantom: ?Sized, + Series: NamedFrom, + { + Self::Series(NamedFrom::new(name, values)) + } + + #[inline] + pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self { + Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0) + } + + #[inline] + pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self { + Self::Scalar(ScalarColumn::new(name, scalar, length)) + } + + // # Materialize + /// Get a reference to a [`Series`] for this [`Column`] + /// + /// This may need to materialize the [`Series`] on the first invocation for a specific column. + #[inline] + pub fn as_materialized_series(&self) -> &Series { + match self { + Column::Series(s) => s, + Column::Scalar(s) => s.as_materialized_series(), + } + } + /// Turn [`Column`] into a [`Column::Series`]. + /// + /// This may need to materialize the [`Series`] on the first invocation for a specific column. + #[inline] + pub fn into_materialized_series(&mut self) -> &mut Series { + match self { + Column::Series(s) => s, + Column::Scalar(s) => { + let series = s.materialized.take().unwrap_or_else(|| s.to_series()); + *self = Column::Series(series); + let Column::Series(s) = self else { + unreachable!(); + }; + s + }, + } + } + /// Take [`Series`] from a [`Column`] + /// + /// This may need to materialize the [`Series`] on the first invocation for a specific column. + #[inline] + pub fn take_materialized_series(self) -> Series { + match self { + Column::Series(s) => s, + Column::Scalar(s) => s.take_materialized_series(), + } + } + + #[inline] + pub fn dtype(&self) -> &DataType { + match self { + Column::Series(s) => s.dtype(), + Column::Scalar(s) => s.scalar.dtype(), + } + } + + #[inline] + pub fn field(&self) -> Cow { + match self { + Column::Series(s) => s.field(), + Column::Scalar(s) => match s.materialized.get() { + None => Cow::Owned(Field::new(s.name.clone(), s.scalar.dtype().clone())), + Some(s) => s.field(), + }, + } + } + + #[inline] + pub fn name(&self) -> &PlSmallStr { + match self { + Column::Series(s) => s.name(), + Column::Scalar(s) => &s.name, + } + } + + #[inline] + pub fn len(&self) -> usize { + match self { + Column::Series(s) => s.len(), + Column::Scalar(s) => s.length, + } + } + + #[inline] + pub fn with_name(mut self, name: PlSmallStr) -> Column { + self.rename(name); + self + } + + #[inline] + pub fn rename(&mut self, name: PlSmallStr) { + match self { + Column::Series(s) => _ = s.rename(name), + Column::Scalar(s) => { + if let Some(series) = s.materialized.get_mut() { + series.rename(name.clone()); + } + + s.name = name; + }, + } + } + + // # Downcasting + #[inline] + pub fn as_series(&self) -> Option<&Series> { + match self { + Column::Series(s) => Some(s), + Column::Scalar(_) => None, + } + } + #[inline] + pub fn as_scalar_column(&self) -> Option<&ScalarColumn> { + match self { + Column::Series(_) => None, + Column::Scalar(s) => Some(s), + } + } + + // # To Chunked Arrays + pub fn bool(&self) -> PolarsResult<&BooleanChunked> { + self.as_materialized_series().bool() + } + pub fn i8(&self) -> PolarsResult<&Int8Chunked> { + self.as_materialized_series().i8() + } + pub fn i16(&self) -> PolarsResult<&Int16Chunked> { + self.as_materialized_series().i16() + } + pub fn i32(&self) -> PolarsResult<&Int32Chunked> { + self.as_materialized_series().i32() + } + pub fn i64(&self) -> PolarsResult<&Int64Chunked> { + self.as_materialized_series().i64() + } + pub fn u8(&self) -> PolarsResult<&UInt8Chunked> { + self.as_materialized_series().u8() + } + pub fn u16(&self) -> PolarsResult<&UInt16Chunked> { + self.as_materialized_series().u16() + } + pub fn u32(&self) -> PolarsResult<&UInt32Chunked> { + self.as_materialized_series().u32() + } + pub fn u64(&self) -> PolarsResult<&UInt64Chunked> { + self.as_materialized_series().u64() + } + pub fn f32(&self) -> PolarsResult<&Float32Chunked> { + self.as_materialized_series().f32() + } + pub fn f64(&self) -> PolarsResult<&Float64Chunked> { + self.as_materialized_series().f64() + } + pub fn str(&self) -> PolarsResult<&StringChunked> { + self.as_materialized_series().str() + } + pub fn list(&self) -> PolarsResult<&ListChunked> { + self.as_materialized_series().list() + } + pub fn binary(&self) -> PolarsResult<&BinaryChunked> { + self.as_materialized_series().binary() + } + pub fn idx(&self) -> PolarsResult<&IdxCa> { + self.as_materialized_series().idx() + } + pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> { + self.as_materialized_series().binary_offset() + } + #[cfg(feature = "dtype-datetime")] + pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> { + self.as_materialized_series().datetime() + } + #[cfg(feature = "dtype-struct")] + pub fn struct_(&self) -> PolarsResult<&StructChunked> { + self.as_materialized_series().struct_() + } + #[cfg(feature = "dtype-decimal")] + pub fn decimal(&self) -> PolarsResult<&DecimalChunked> { + self.as_materialized_series().decimal() + } + #[cfg(feature = "dtype-array")] + pub fn array(&self) -> PolarsResult<&ArrayChunked> { + self.as_materialized_series().array() + } + #[cfg(feature = "dtype-categorical")] + pub fn categorical(&self) -> PolarsResult<&CategoricalChunked> { + self.as_materialized_series().categorical() + } + #[cfg(feature = "dtype-date")] + pub fn date(&self) -> PolarsResult<&DateChunked> { + self.as_materialized_series().date() + } + #[cfg(feature = "dtype-duration")] + pub fn duration(&self) -> PolarsResult<&DurationChunked> { + self.as_materialized_series().duration() + } + + // # Casting + pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + match self { + Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from), + Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from), + } + } + pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult { + match self { + Column::Series(s) => s.strict_cast(dtype).map(Column::from), + Column::Scalar(s) => s.strict_cast(dtype).map(Column::from), + } + } + pub fn cast(&self, dtype: &DataType) -> PolarsResult { + match self { + Column::Series(s) => s.cast(dtype).map(Column::from), + Column::Scalar(s) => s.cast(dtype).map(Column::from), + } + } + /// # Safety + /// + /// This can lead to invalid memory access in downstream code. + pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + match self { + Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from), + Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from), + } + } + + pub fn clear(&self) -> Self { + match self { + Column::Series(s) => s.clear().into(), + Column::Scalar(s) => Self::new_scalar(s.name.clone(), s.scalar.clone(), 0), + } + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + match self { + Column::Series(s) => s.shrink_to_fit(), + Column::Scalar(_) => {}, + } + } + + #[inline] + pub fn new_from_index(&self, index: usize, length: usize) -> Self { + match self { + Column::Series(s) => s.new_from_index(index, length).into(), + Column::Scalar(s) => { + if index >= s.length { + Self::full_null(s.name.clone(), length, s.scalar.dtype()) + } else { + s.resize(length).into() + } + }, + } + } + + #[inline] + pub fn has_nulls(&self) -> bool { + match self { + Self::Series(s) => s.has_nulls(), + Self::Scalar(s) => s.has_nulls(), + } + } + + #[inline] + pub fn is_null(&self) -> BooleanChunked { + match self { + Self::Series(s) => s.is_null(), + Self::Scalar(s) => BooleanChunked::full(s.name.clone(), s.scalar.is_null(), s.length), + } + } + #[inline] + pub fn is_not_null(&self) -> BooleanChunked { + match self { + Self::Series(s) => s.is_not_null(), + Self::Scalar(s) => BooleanChunked::full(s.name.clone(), !s.scalar.is_null(), s.length), + } + } + + pub fn to_physical_repr(&self) -> Column { + // @scalar-opt + self.as_materialized_series() + .to_physical_repr() + .into_owned() + .into() + } + + pub fn head(&self, length: Option) -> Column { + let len = length.unwrap_or(HEAD_DEFAULT_LENGTH); + let len = usize::min(len, self.len()); + self.slice(0, len) + } + pub fn tail(&self, length: Option) -> Column { + let len = length.unwrap_or(TAIL_DEFAULT_LENGTH); + let len = usize::min(len, self.len()); + debug_assert!(len <= i64::MAX as usize); + self.slice(-(len as i64), len) + } + pub fn slice(&self, offset: i64, length: usize) -> Column { + match self { + Column::Series(s) => s.slice(offset, length).into(), + Column::Scalar(s) => { + let (_, length) = slice_offsets(offset, length, s.length); + s.resize(length).into() + }, + } + } + + pub fn split_at(&self, offset: i64) -> (Column, Column) { + // @scalar-opt + let (l, r) = self.as_materialized_series().split_at(offset); + (l.into(), r.into()) + } + + #[inline] + pub fn null_count(&self) -> usize { + match self { + Self::Series(s) => s.null_count(), + Self::Scalar(s) if s.scalar.is_null() => s.length, + Self::Scalar(_) => 0, + } + } + + pub fn take(&self, indices: &IdxCa) -> PolarsResult { + check_bounds_ca(indices, self.len() as IdxSize)?; + Ok(unsafe { self.take_unchecked(indices) }) + } + pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult { + check_bounds(indices, self.len() as IdxSize)?; + Ok(unsafe { self.take_slice_unchecked(indices) }) + } + /// # Safety + /// + /// No bounds on the indexes are performed. + pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column { + debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok()); + + match self { + Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(), + Self::Scalar(s) => s.resize(indices.len()).into(), + } + } + /// # Safety + /// + /// No bounds on the indexes are performed. + pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column { + debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok()); + + match self { + Self::Series(s) => unsafe { s.take_unchecked_from_slice(indices) }.into(), + Self::Scalar(s) => s.resize(indices.len()).into(), + } + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub unsafe fn agg_min(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_min(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub unsafe fn agg_max(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_max(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub unsafe fn agg_mean(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_mean(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub unsafe fn agg_sum(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_sum(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + pub unsafe fn agg_first(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_first(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + pub unsafe fn agg_last(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_last(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + pub unsafe fn agg_n_unique(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_n_unique(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + pub unsafe fn agg_quantile( + &self, + groups: &GroupsProxy, + quantile: f64, + interpol: QuantileInterpolOptions, + ) -> Self { + // @scalar-opt + unsafe { + self.as_materialized_series() + .agg_quantile(groups, quantile, interpol) + } + .into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub unsafe fn agg_median(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_median(groups) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub unsafe fn agg_var(&self, groups: &GroupsProxy, ddof: u8) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub(crate) unsafe fn agg_std(&self, groups: &GroupsProxy, ddof: u8) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into() + } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] + pub unsafe fn agg_list(&self, groups: &GroupsProxy) -> Self { + // @scalar-opt + unsafe { self.as_materialized_series().agg_list(groups) }.into() + } + + pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self { + Series::full_null(name, size, dtype).into() + // @TODO: This causes failures + // Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size) + } + + pub fn is_empty(&self) -> bool { + // @scalar-opt + self.as_materialized_series().is_empty() + } + + pub fn reverse(&self) -> Column { + match self { + Column::Series(s) => s.reverse().into(), + Column::Scalar(_) => self.clone(), + } + } + + pub fn equals(&self, right: &Column) -> bool { + // @scalar-opt + self.as_materialized_series() + .equals(right.as_materialized_series()) + } + + pub fn equals_missing(&self, right: &Column) -> bool { + // @scalar-opt + self.as_materialized_series() + .equals_missing(right.as_materialized_series()) + } + + pub fn set_sorted_flag(&mut self, sorted: IsSorted) { + // @scalar-opt + match self { + Column::Series(s) => s.set_sorted_flag(sorted), + Column::Scalar(_) => {}, + } + } + + pub fn get_flags(&self) -> MetadataFlags { + match self { + Column::Series(s) => s.get_flags(), + // @scalar-opt + Column::Scalar(_) => MetadataFlags::empty(), + } + } + + pub fn get_data_ptr(&self) -> usize { + // @scalar-opt + self.as_materialized_series().get_data_ptr() + } + + pub fn vec_hash(&self, build_hasher: PlRandomState, buf: &mut Vec) -> PolarsResult<()> { + // @scalar-opt? + self.as_materialized_series().vec_hash(build_hasher, buf) + } + + pub fn vec_hash_combine( + &self, + build_hasher: PlRandomState, + hashes: &mut [u64], + ) -> PolarsResult<()> { + // @scalar-opt? + self.as_materialized_series() + .vec_hash_combine(build_hasher, hashes) + } + + /// # Safety + /// + /// Indexes need to be in bounds. + pub(crate) unsafe fn equal_element( + &self, + idx_self: usize, + idx_other: usize, + other: &Column, + ) -> bool { + // @scalar-opt + unsafe { + self.as_materialized_series().equal_element( + idx_self, + idx_other, + other.as_materialized_series(), + ) + } + } + + pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> { + // @scalar-opt + self.into_materialized_series() + .append(other.as_materialized_series())?; + Ok(self) + } + + pub fn arg_sort(&self, options: SortOptions) -> IdxCa { + // @scalar-opt + self.as_materialized_series().arg_sort(options) + } + + pub fn bit_repr(&self) -> Option { + // @scalar-opt + self.as_materialized_series().bit_repr() + } + + pub fn into_frame(self) -> DataFrame { + // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names + unsafe { DataFrame::new_no_checks(vec![self]) } + } + + pub fn unique_stable(&self) -> PolarsResult { + // @scalar-opt? + self.as_materialized_series() + .unique_stable() + .map(Column::from) + } + + pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> { + // @scalar-opt + self.into_materialized_series() + .extend(other.as_materialized_series())?; + Ok(self) + } + + pub fn rechunk(&self) -> Column { + match self { + Column::Series(s) => s.rechunk().into(), + Column::Scalar(_) => self.clone(), + } + } + + pub fn explode(&self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().explode().map(Column::from) + } + + pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult { + // @scalar-opt + self.as_materialized_series() + .fill_null(strategy) + .map(Column::from) + } + + pub fn divide(&self, rhs: &Column) -> PolarsResult { + // @scalar-opt + self.as_materialized_series() + .divide(rhs.as_materialized_series()) + .map(Column::from) + } + + pub fn shift(&self, periods: i64) -> Column { + // @scalar-opt + self.as_materialized_series().shift(periods).into() + } + + #[cfg(feature = "zip_with")] + pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series() + .zip_with(mask, other.as_materialized_series()) + .map(Self::from) + } + + #[cfg(feature = "zip_with")] + pub fn zip_with_same_type( + &self, + mask: &ChunkedArray, + other: &Column, + ) -> PolarsResult { + // @scalar-opt + self.as_materialized_series() + .zip_with_same_type(mask, other.as_materialized_series()) + .map(Column::from) + } + + pub fn drop_nulls(&self) -> Column { + // @scalar-opt + self.as_materialized_series().drop_nulls().into() + } + + pub fn is_sorted_flag(&self) -> IsSorted { + // @scalar-opt + self.as_materialized_series().is_sorted_flag() + } + + pub fn unique(&self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().unique().map(Column::from) + } + + pub fn reshape_list(&self, dimensions: &[i64]) -> PolarsResult { + // @scalar-opt + self.as_materialized_series() + .reshape_list(dimensions) + .map(Self::from) + } + + #[cfg(feature = "dtype-array")] + pub fn reshape_array(&self, dimensions: &[i64]) -> PolarsResult { + // @scalar-opt + self.as_materialized_series() + .reshape_array(dimensions) + .map(Self::from) + } + + pub fn sort(&self, sort_options: SortOptions) -> PolarsResult { + // @scalar-opt + self.as_materialized_series() + .sort(sort_options) + .map(Self::from) + } + + pub fn filter(&self, filter: &ChunkedArray) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().filter(filter).map(Self::from) + } + + #[cfg(feature = "random")] + pub fn shuffle(&self, seed: Option) -> Self { + // @scalar-opt + self.as_materialized_series().shuffle(seed).into() + } + + #[cfg(feature = "random")] + pub fn sample_frac( + &self, + frac: f64, + with_replacement: bool, + shuffle: bool, + seed: Option, + ) -> PolarsResult { + self.as_materialized_series() + .sample_frac(frac, with_replacement, shuffle, seed) + .map(Self::from) + } + + #[cfg(feature = "random")] + pub fn sample_n( + &self, + n: usize, + with_replacement: bool, + shuffle: bool, + seed: Option, + ) -> PolarsResult { + self.as_materialized_series() + .sample_n(n, with_replacement, shuffle, seed) + .map(Self::from) + } + + pub fn gather_every(&self, n: usize, offset: usize) -> Column { + // @scalar-opt + self.as_materialized_series().gather_every(n, offset).into() + } + + pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult { + self.as_materialized_series() + .extend_constant(value, n) + .map(Column::from) + // @scalar-opt: This currently fails because Scalar::partial_cmp cannot deal with Nulls + // + // match self { + // Column::Series(s) => s.extend_constant(value, n).map(Column::from), + // Column::Scalar(s) => { + // if s.scalar.as_any_value() == value && s.len() > 0 { + // Ok(s.resize(s.len() + n).into()) + // } else { + // // @scalar-opt + // s.as_materialized_series().extend_constant(value, n).map(Column::from) + // } + // }, + // } + } + + pub fn is_finite(&self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().is_finite() + } + + pub fn is_infinite(&self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().is_infinite() + } + + pub fn is_nan(&self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().is_nan() + } + + pub fn is_not_nan(&self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().is_not_nan() + } + + pub fn wrapping_trunc_div_scalar(&self, rhs: T) -> Self + where + T: Num + NumCast, + { + // @scalar-opt + self.as_materialized_series() + .wrapping_trunc_div_scalar(rhs) + .into() + } + + pub fn product(&self) -> PolarsResult { + // @scalar-opt + self.as_materialized_series().product() + } + + pub fn phys_iter(&self) -> SeriesPhysIter<'_> { + // @scalar-opt + self.as_materialized_series().phys_iter() + } + + #[inline] + pub fn get(&self, index: usize) -> PolarsResult { + polars_ensure!(index < self.len(), oob = index, self.len()); + + // SAFETY: Bounds check done just before. + Ok(unsafe { self.get_unchecked(index) }) + } + /// # Safety + /// + /// Does not perform bounds check on `index` + #[inline(always)] + pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue { + debug_assert!(index < self.len()); + + match self { + Column::Series(s) => s.get_unchecked(index), + Column::Scalar(s) => s.scalar.as_any_value(), + } + } + + #[cfg(feature = "object")] + pub fn get_object( + &self, + index: usize, + ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> { + self.as_materialized_series().get_object(index) + } + + pub fn bitand(&self, rhs: &Self) -> PolarsResult { + self.as_materialized_series() + .bitand(rhs.as_materialized_series()) + .map(Column::from) + } + + pub(crate) fn str_value(&self, index: usize) -> PolarsResult> { + Ok(self.get(index)?.str_value()) + } + + pub fn max_reduce(&self) -> PolarsResult { + match self { + Column::Series(s) => s.max_reduce(), + Column::Scalar(s) => { + // We don't really want to deal with handling the full semantics here so we just + // cast to a single value series. This is a tiny bit wasteful, but probably fine. + s.as_single_value_series().max_reduce() + }, + } + } + + pub fn min_reduce(&self) -> PolarsResult { + match self { + Column::Series(s) => s.min_reduce(), + Column::Scalar(s) => { + // We don't really want to deal with handling the full semantics here so we just + // cast to a single value series. This is a tiny bit wasteful, but probably fine. + s.as_single_value_series().min_reduce() + }, + } + } + + pub(crate) fn estimated_size(&self) -> usize { + // @scalar-opt + self.as_materialized_series().estimated_size() + } +} + +impl ChunkCompare<&Column> for Column { + type Item = PolarsResult; + + /// Create a boolean mask by checking for equality. + #[inline] + fn equal(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .equal(rhs.as_materialized_series()) + } + + /// Create a boolean mask by checking for equality. + #[inline] + fn equal_missing(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .equal_missing(rhs.as_materialized_series()) + } + + /// Create a boolean mask by checking for inequality. + #[inline] + fn not_equal(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .not_equal(rhs.as_materialized_series()) + } + + /// Create a boolean mask by checking for inequality. + #[inline] + fn not_equal_missing(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .not_equal_missing(rhs.as_materialized_series()) + } + + /// Create a boolean mask by checking if self > rhs. + #[inline] + fn gt(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .gt(rhs.as_materialized_series()) + } + + /// Create a boolean mask by checking if self >= rhs. + #[inline] + fn gt_eq(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .gt_eq(rhs.as_materialized_series()) + } + + /// Create a boolean mask by checking if self < rhs. + #[inline] + fn lt(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .lt(rhs.as_materialized_series()) + } + + /// Create a boolean mask by checking if self <= rhs. + #[inline] + fn lt_eq(&self, rhs: &Column) -> PolarsResult { + self.as_materialized_series() + .lt_eq(rhs.as_materialized_series()) + } +} + +impl Default for Column { + fn default() -> Self { + Self::new_scalar( + PlSmallStr::EMPTY, + Scalar::new(DataType::Int64, AnyValue::Null), + 0, + ) + } +} + +impl PartialEq for Column { + fn eq(&self, other: &Self) -> bool { + // @scalar-opt + self.as_materialized_series() + .eq(other.as_materialized_series()) + } +} + +impl From for Column { + #[inline] + fn from(series: Series) -> Self { + if series.len() == 1 { + // SAFETY: We just did the bounds check + let value = unsafe { series.get_unchecked(0) }; + + if let Ok(value) = value.into_static() { + let value = Scalar::new(series.dtype().clone(), value); + let mut col = ScalarColumn::new(series.name().clone(), value, 1); + col.materialized = OnceLock::from(series); + return Self::Scalar(col); + } + } + + Self::Series(series) + } +} + +impl From for Column { + #[inline] + fn from(value: ScalarColumn) -> Self { + Self::Scalar(value) + } +} + +impl ScalarColumn { + #[inline] + pub fn new(name: PlSmallStr, scalar: Scalar, length: usize) -> Self { + Self { + name, + scalar, + length, + + materialized: OnceLock::new(), + } + } + + #[inline] + pub fn new_empty(name: PlSmallStr, dtype: DataType) -> Self { + Self { + name, + scalar: Scalar::new(dtype, AnyValue::Null), + length: 0, + + materialized: OnceLock::new(), + } + } + + pub fn name(&self) -> &PlSmallStr { + &self.name + } + + pub fn dtype(&self) -> &DataType { + self.scalar.dtype() + } + + pub fn len(&self) -> usize { + self.length + } + + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + fn _to_series(name: PlSmallStr, value: Scalar, length: usize) -> Series { + let series = if length == 0 { + Series::new_empty(name, value.dtype()) + } else { + value.into_series(name).new_from_index(0, length) + }; + + debug_assert_eq!(series.len(), length); + + series + } + + /// Materialize the [`ScalarColumn`] into a [`Series`]. + pub fn to_series(&self) -> Series { + Self::_to_series(self.name.clone(), self.scalar.clone(), self.length) + } + + /// Get the [`ScalarColumn`] as [`Series`] + /// + /// This needs to materialize upon the first call. Afterwards, this is cached. + pub fn as_materialized_series(&self) -> &Series { + self.materialized.get_or_init(|| self.to_series()) + } + + /// Take the [`ScalarColumn`] and materialize as a [`Series`] if not already done. + pub fn take_materialized_series(self) -> Series { + self.materialized + .into_inner() + .unwrap_or_else(|| Self::_to_series(self.name, self.scalar, self.length)) + } + + /// Take the [`ScalarColumn`] as a series with a single value. + /// + /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`. + pub fn as_single_value_series(&self) -> Series { + match self.materialized.get() { + Some(s) => s.head(Some(1)), + None => Self::_to_series( + self.name.clone(), + self.scalar.clone(), + usize::min(1, self.length), + ), + } + } + + /// Create a new [`ScalarColumn`] from a `length=1` Series and expand it `length`. + /// + /// This will panic if the value cannot be made static or if the series has length `0`. + pub fn from_single_value_series(series: Series, length: usize) -> PolarsResult { + debug_assert_eq!(series.len(), 1); + let value = series.get(0)?; + let value = value.into_static()?; + let value = Scalar::new(series.dtype().clone(), value); + Ok(ScalarColumn::new(series.name().clone(), value, length)) + } + + /// Resize the [`ScalarColumn`] to new `length`. + /// + /// This reuses the materialized [`Series`], if `length <= self.length`. + pub fn resize(&self, length: usize) -> ScalarColumn { + let mut resized = Self { + name: self.name.clone(), + scalar: self.scalar.clone(), + length, + materialized: OnceLock::new(), + }; + + if self.length >= length { + if let Some(materialized) = self.materialized.get() { + resized.materialized = OnceLock::from(materialized.head(Some(length))); + debug_assert_eq!(resized.materialized.get().unwrap().len(), length); + } + } + + resized + } + + pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + // @NOTE: We expect that when casting the materialized series mostly does not need change + // the physical array. Therefore, we try to cast the entire materialized array if it is + // available. + + match self.materialized.get() { + Some(s) => { + let materialized = s.cast_with_options(dtype, options)?; + assert_eq!(self.length, materialized.len()); + + let mut casted = if materialized.len() == 0 { + Self::new_empty(materialized.name().clone(), materialized.dtype().clone()) + } else { + // SAFETY: Just did bounds check + let scalar = unsafe { materialized.get_unchecked(0) }.into_static()?; + Self::new( + materialized.name().clone(), + Scalar::new(materialized.dtype().clone(), scalar), + self.length, + ) + }; + casted.materialized = OnceLock::from(materialized); + Ok(casted) + }, + None => { + let s = self + .as_single_value_series() + .cast_with_options(dtype, options)?; + assert_eq!(1, s.len()); + + if self.length == 0 { + Ok(Self::new_empty(s.name().clone(), s.dtype().clone())) + } else { + Self::from_single_value_series(s, self.length) + } + }, + } + } + + pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult { + self.cast_with_options(dtype, CastOptions::Strict) + } + pub fn cast(&self, dtype: &DataType) -> PolarsResult { + self.cast_with_options(dtype, CastOptions::NonStrict) + } + /// # Safety + /// + /// This can lead to invalid memory access in downstream code. + pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + // @NOTE: We expect that when casting the materialized series mostly does not need change + // the physical array. Therefore, we try to cast the entire materialized array if it is + // available. + + match self.materialized.get() { + Some(s) => { + let materialized = s.cast_unchecked(dtype)?; + assert_eq!(self.length, materialized.len()); + + let mut casted = if materialized.len() == 0 { + Self::new_empty(materialized.name().clone(), materialized.dtype().clone()) + } else { + // SAFETY: Just did bounds check + let scalar = unsafe { materialized.get_unchecked(0) }.into_static()?; + Self::new( + materialized.name().clone(), + Scalar::new(materialized.dtype().clone(), scalar), + self.length, + ) + }; + casted.materialized = OnceLock::from(materialized); + Ok(casted) + }, + None => { + let s = self.as_single_value_series().cast_unchecked(dtype)?; + assert_eq!(1, s.len()); + + if self.length == 0 { + Ok(Self::new_empty(s.name().clone(), s.dtype().clone())) + } else { + Self::from_single_value_series(s, self.length) + } + }, + } + } + + pub fn has_nulls(&self) -> bool { + self.length != 0 && self.scalar.is_null() + } +} + +impl IntoColumn for T { + #[inline] + fn into_column(self) -> Column { + self.into_series().into() + } +} + +impl IntoColumn for Column { + #[inline(always)] + fn into_column(self) -> Column { + self + } +} + +impl IntoColumn for ScalarColumn { + #[inline(always)] + fn into_column(self) -> Column { + self.into() + } +} + +/// We don't want to serialize the scalar columns. So this helps pretend that columns are always +/// initialized without implementing From for Series. +/// +/// Those casts should be explicit. +#[derive(Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[cfg_attr(feature = "serde", serde(into = "Series"))] +struct _SerdeSeries(Series); + +impl From for _SerdeSeries { + #[inline] + fn from(value: Column) -> Self { + Self(value.take_materialized_series()) + } +} + +impl From<_SerdeSeries> for Series { + #[inline] + fn from(value: _SerdeSeries) -> Self { + value.0 + } +} diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index 3e597756eb1e..c12086def533 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -29,12 +29,12 @@ pub struct UnpivotArgsIR { } impl DataFrame { - pub fn explode_impl(&self, mut columns: Vec) -> PolarsResult { + pub fn explode_impl(&self, mut columns: Vec) -> PolarsResult { polars_ensure!(!columns.is_empty(), InvalidOperation: "no columns provided in explode"); let mut df = self.clone(); if self.is_empty() { for s in &columns { - df.with_column(s.explode()?)?; + df.with_column(s.as_materialized_series().explode()?)?; } return Ok(df); } @@ -57,14 +57,16 @@ impl DataFrame { let exploded_columns = POOL.install(|| { columns .par_iter() + .map(Column::as_materialized_series) .map(get_exploded) + .map(|s| s.map(|(s, o)| (Column::from(s), o))) .collect::>>() })?; fn process_column( original_df: &DataFrame, df: &mut DataFrame, - exploded: Series, + exploded: Column, ) -> PolarsResult<()> { if exploded.len() == df.height() || df.width() == 0 { let col_idx = original_df.check_name_to_idx(exploded.name().as_str())?; @@ -187,7 +189,7 @@ impl DataFrame { { // We need to sort the column by order of original occurrence. Otherwise the insert by index // below will panic - let columns = self.select_series(columns)?; + let columns = self.select_columns(columns)?; self.explode_impl(columns) } } @@ -203,17 +205,41 @@ mod test { let s0 = Series::new(PlSmallStr::from_static("a"), &[1i8, 2, 3]); let s1 = Series::new(PlSmallStr::from_static("b"), &[1i8, 1, 1]); let s2 = Series::new(PlSmallStr::from_static("c"), &[2i8, 2, 2]); - let list = Series::new(PlSmallStr::from_static("foo"), &[s0, s1, s2]); + let list = Column::new(PlSmallStr::from_static("foo"), &[s0, s1, s2]); - let s0 = Series::new(PlSmallStr::from_static("B"), [1, 2, 3]); - let s1 = Series::new(PlSmallStr::from_static("C"), [1, 1, 1]); + let s0 = Column::new(PlSmallStr::from_static("B"), [1, 2, 3]); + let s1 = Column::new(PlSmallStr::from_static("C"), [1, 1, 1]); let df = DataFrame::new(vec![list, s0.clone(), s1.clone()]).unwrap(); let exploded = df.explode(["foo"]).unwrap(); assert_eq!(exploded.shape(), (9, 3)); - assert_eq!(exploded.column("C").unwrap().i32().unwrap().get(8), Some(1)); - assert_eq!(exploded.column("B").unwrap().i32().unwrap().get(8), Some(3)); assert_eq!( - exploded.column("foo").unwrap().i8().unwrap().get(8), + exploded + .column("C") + .unwrap() + .as_materialized_series() + .i32() + .unwrap() + .get(8), + Some(1) + ); + assert_eq!( + exploded + .column("B") + .unwrap() + .as_materialized_series() + .i32() + .unwrap() + .get(8), + Some(3) + ); + assert_eq!( + exploded + .column("foo") + .unwrap() + .as_materialized_series() + .i8() + .unwrap() + .get(8), Some(2) ); } @@ -223,12 +249,12 @@ mod test { fn test_explode_df_empty_list() -> PolarsResult<()> { let s0 = Series::new(PlSmallStr::from_static("a"), &[1, 2, 3]); let s1 = Series::new(PlSmallStr::from_static("b"), &[1, 1, 1]); - let list = Series::new( + let list = Column::new( PlSmallStr::from_static("foo"), &[s0, s1.clone(), s1.clear()], ); - let s0 = Series::new(PlSmallStr::from_static("B"), [1, 2, 3]); - let s1 = Series::new(PlSmallStr::from_static("C"), [1, 1, 1]); + let s0 = Column::new(PlSmallStr::from_static("B"), [1, 2, 3]); + let s1 = Column::new(PlSmallStr::from_static("C"), [1, 1, 1]); let df = DataFrame::new(vec![list, s0.clone(), s1.clone()])?; let out = df.explode(["foo"])?; @@ -240,9 +266,13 @@ mod test { assert!(out.equals_missing(&expected)); - let list = Series::new( + let list = Column::new( PlSmallStr::from_static("foo"), - [s0.clone(), s1.clear(), s1.clone()], + [ + s0.as_materialized_series().clone(), + s1.as_materialized_series().clear(), + s1.as_materialized_series().clone(), + ], ); let df = DataFrame::new(vec![list, s0, s1])?; let out = df.explode(["foo"])?; @@ -261,12 +291,13 @@ mod test { fn test_explode_single_col() -> PolarsResult<()> { let s0 = Series::new(PlSmallStr::from_static("a"), &[1i32, 2, 3]); let s1 = Series::new(PlSmallStr::from_static("b"), &[1i32, 1, 1]); - let list = Series::new(PlSmallStr::from_static("foo"), &[s0, s1]); + let list = Column::new(PlSmallStr::from_static("foo"), &[s0, s1]); let df = DataFrame::new(vec![list])?; let out = df.explode(["foo"])?; let out = out .column("foo")? + .as_materialized_series() .i32()? .into_no_null_iter() .collect::>(); diff --git a/crates/polars-core/src/frame/from.rs b/crates/polars-core/src/frame/from.rs index 5c3e1a8cb212..5ec5d98a1597 100644 --- a/crates/polars-core/src/frame/from.rs +++ b/crates/polars-core/src/frame/from.rs @@ -23,6 +23,7 @@ impl TryFrom for DataFrame { Some(&fld.metadata), ) } + .map(Column::from) }) .collect::>>()?; DataFrame::new(columns) diff --git a/crates/polars-core/src/frame/group_by/mod.rs b/crates/polars-core/src/frame/group_by/mod.rs index 5dd631a51f0f..e2fbb90d6e74 100644 --- a/crates/polars-core/src/frame/group_by/mod.rs +++ b/crates/polars-core/src/frame/group_by/mod.rs @@ -28,7 +28,7 @@ use crate::prelude::sort::arg_sort_multiple::{ impl DataFrame { pub fn group_by_with_series( &self, - mut by: Vec, + mut by: Vec, multithreaded: bool, sorted: bool, ) -> PolarsResult { @@ -54,8 +54,10 @@ impl DataFrame { }; let groups = if by.len() == 1 { - let series = &by[0]; - series.group_tuples(multithreaded, sorted) + let column = &by[0]; + column + .as_materialized_series() + .group_tuples(multithreaded, sorted) } else if by.iter().any(|s| s.dtype().is_object()) { #[cfg(feature = "object")] { @@ -74,7 +76,7 @@ impl DataFrame { let by = by .iter() .filter(|s| !s.dtype().is_null()) - .cloned() + .map(|c| c.as_materialized_series().clone()) .collect::>(); if by.is_empty() { let groups = if self.is_empty() { @@ -116,7 +118,7 @@ impl DataFrame { I: IntoIterator, S: Into, { - let selected_keys = self.select_series(by)?; + let selected_keys = self.select_columns(by)?; self.group_by_with_series(selected_keys, true, false) } @@ -127,7 +129,7 @@ impl DataFrame { I: IntoIterator, S: Into, { - let selected_keys = self.select_series(by)?; + let selected_keys = self.select_columns(by)?; self.group_by_with_series(selected_keys, true, true) } } @@ -184,7 +186,7 @@ impl DataFrame { #[derive(Debug, Clone)] pub struct GroupBy<'df> { pub df: &'df DataFrame, - pub(crate) selected_keys: Vec, + pub(crate) selected_keys: Vec, // [first idx, [other idx]] groups: GroupsProxy, // columns selected for aggregation @@ -194,7 +196,7 @@ pub struct GroupBy<'df> { impl<'df> GroupBy<'df> { pub fn new( df: &'df DataFrame, - by: Vec, + by: Vec, groups: GroupsProxy, selected_agg: Option>, ) -> Self { @@ -245,7 +247,7 @@ impl<'df> GroupBy<'df> { std::mem::take(&mut self.groups) } - pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec { + pub fn keys_sliced(&self, slice: Option<(i64, usize)>) -> Vec { #[allow(unused_assignments)] // needed to keep the lifetimes valid for this scope let mut groups_owned = None; @@ -260,6 +262,7 @@ impl<'df> GroupBy<'df> { POOL.install(|| { self.selected_keys .par_iter() + .map(Column::as_materialized_series) .map(|s| { match groups { GroupsProxy::Idx(groups) => { @@ -293,19 +296,20 @@ impl<'df> GroupBy<'df> { }, } }) + .map(Column::from) .collect() }) } - pub fn keys(&self) -> Vec { + pub fn keys(&self) -> Vec { self.keys_sliced(None) } - fn prepare_agg(&self) -> PolarsResult<(Vec, Vec)> { + fn prepare_agg(&self) -> PolarsResult<(Vec, Vec)> { let keys = self.keys(); let agg_col = match &self.selected_agg { - Some(selection) => self.df.select_series_impl(selection.as_slice()), + Some(selection) => self.df.select_columns_impl(selection.as_slice()), None => { let by: Vec<_> = self.selected_keys.iter().map(|s| s.name()).collect(); let selection = self @@ -316,7 +320,7 @@ impl<'df> GroupBy<'df> { .cloned() .collect::>(); - self.df.select_series_impl(selection.as_slice()) + self.df.select_columns_impl(selection.as_slice()) }, }?; @@ -579,7 +583,7 @@ impl<'df> GroupBy<'df> { let new_name = fmt_group_by_column(agg_col.name().as_str(), GroupByMethod::NUnique); let mut agg = unsafe { agg_col.agg_n_unique(&self.groups) }; agg.rename(new_name); - cols.push(agg.into_series()); + cols.push(agg); } DataFrame::new(cols) } @@ -614,7 +618,7 @@ impl<'df> GroupBy<'df> { ); let mut agg = unsafe { agg_col.agg_quantile(&self.groups, quantile, interpol) }; agg.rename(new_name); - cols.push(agg.into_series()); + cols.push(agg); } DataFrame::new(cols) } @@ -636,7 +640,7 @@ impl<'df> GroupBy<'df> { let new_name = fmt_group_by_column(agg_col.name().as_str(), GroupByMethod::Median); let mut agg = unsafe { agg_col.agg_median(&self.groups) }; agg.rename(new_name); - cols.push(agg.into_series()); + cols.push(agg); } DataFrame::new(cols) } @@ -649,7 +653,7 @@ impl<'df> GroupBy<'df> { let new_name = fmt_group_by_column(agg_col.name().as_str(), GroupByMethod::Var(ddof)); let mut agg = unsafe { agg_col.agg_var(&self.groups, ddof) }; agg.rename(new_name); - cols.push(agg.into_series()); + cols.push(agg); } DataFrame::new(cols) } @@ -662,7 +666,7 @@ impl<'df> GroupBy<'df> { let new_name = fmt_group_by_column(agg_col.name().as_str(), GroupByMethod::Std(ddof)); let mut agg = unsafe { agg_col.agg_std(&self.groups, ddof) }; agg.rename(new_name); - cols.push(agg.into_series()); + cols.push(agg); } DataFrame::new(cols) } @@ -704,7 +708,7 @@ impl<'df> GroupBy<'df> { ); let mut ca = self.groups.group_count(); ca.rename(new_name); - cols.push(ca.into_series()); + cols.push(ca.into_column()); } DataFrame::new(cols) } @@ -739,7 +743,7 @@ impl<'df> GroupBy<'df> { let mut column = self.groups.as_list_chunked(); let new_name = fmt_group_by_column("", GroupByMethod::Groups); column.rename(new_name); - cols.push(column.into_series()); + cols.push(column.into_column()); DataFrame::new(cols) } @@ -789,7 +793,7 @@ impl<'df> GroupBy<'df> { } else { let mut new_cols = Vec::with_capacity(self.selected_keys.len() + agg.len()); new_cols.extend_from_slice(&self.selected_keys); - let cols = self.df.select_series_impl(agg.as_slice())?; + let cols = self.df.select_columns_impl(agg.as_slice())?; new_cols.extend(cols); Ok(unsafe { DataFrame::new_no_checks(new_cols) }) } @@ -929,7 +933,7 @@ mod test { #[cfg(feature = "dtype-date")] #[cfg_attr(miri, ignore)] fn test_group_by() -> PolarsResult<()> { - let s0 = Series::new( + let s0 = Column::new( PlSmallStr::from_static("date"), &[ "2020-08-21", @@ -939,14 +943,14 @@ mod test { "2020-08-22", ], ); - let s1 = Series::new(PlSmallStr::from_static("temp"), [20, 10, 7, 9, 1]); - let s2 = Series::new(PlSmallStr::from_static("rain"), [0.2, 0.1, 0.3, 0.1, 0.01]); + let s1 = Column::new(PlSmallStr::from_static("temp"), [20, 10, 7, 9, 1]); + let s2 = Column::new(PlSmallStr::from_static("rain"), [0.2, 0.1, 0.3, 0.1, 0.01]); let df = DataFrame::new(vec![s0, s1, s2]).unwrap(); let out = df.group_by_stable(["date"])?.select(["temp"]).count()?; assert_eq!( out.column("temp_count")?, - &Series::new(PlSmallStr::from_static("temp_count"), [2 as IdxSize, 2, 1]) + &Column::new(PlSmallStr::from_static("temp_count"), [2 as IdxSize, 2, 1]) ); // Use of deprecated mean() for testing purposes @@ -958,7 +962,7 @@ mod test { .mean()?; assert_eq!( out.column("temp_mean")?, - &Series::new(PlSmallStr::from_static("temp_mean"), [15.0f64, 4.0, 9.0]) + &Column::new(PlSmallStr::from_static("temp_mean"), [15.0f64, 4.0, 9.0]) ); // Use of deprecated `mean()` for testing purposes @@ -975,7 +979,7 @@ mod test { let out = df.group_by_stable(["date"])?.select(["temp"]).sum()?; assert_eq!( out.column("temp_sum")?, - &Series::new(PlSmallStr::from_static("temp_sum"), [30, 8, 9]) + &Column::new(PlSmallStr::from_static("temp_sum"), [30, 8, 9]) ); // Use of deprecated `n_unique()` for testing purposes @@ -991,19 +995,19 @@ mod test { #[cfg_attr(miri, ignore)] fn test_static_group_by_by_12_columns() { // Build GroupBy DataFrame. - let s0 = Series::new("G1".into(), ["A", "A", "B", "B", "C"].as_ref()); - let s1 = Series::new("N".into(), [1, 2, 2, 4, 2].as_ref()); - let s2 = Series::new("G2".into(), ["k", "l", "m", "m", "l"].as_ref()); - let s3 = Series::new("G3".into(), ["a", "b", "c", "c", "d"].as_ref()); - let s4 = Series::new("G4".into(), ["1", "2", "3", "3", "4"].as_ref()); - let s5 = Series::new("G5".into(), ["X", "Y", "Z", "Z", "W"].as_ref()); - let s6 = Series::new("G6".into(), [false, true, true, true, false].as_ref()); - let s7 = Series::new("G7".into(), ["r", "x", "q", "q", "o"].as_ref()); - let s8 = Series::new("G8".into(), ["R", "X", "Q", "Q", "O"].as_ref()); - let s9 = Series::new("G9".into(), [1, 2, 3, 3, 4].as_ref()); - let s10 = Series::new("G10".into(), [".", "!", "?", "?", "/"].as_ref()); - let s11 = Series::new("G11".into(), ["(", ")", "@", "@", "$"].as_ref()); - let s12 = Series::new("G12".into(), ["-", "_", ";", ";", ","].as_ref()); + let s0 = Column::new("G1".into(), ["A", "A", "B", "B", "C"].as_ref()); + let s1 = Column::new("N".into(), [1, 2, 2, 4, 2].as_ref()); + let s2 = Column::new("G2".into(), ["k", "l", "m", "m", "l"].as_ref()); + let s3 = Column::new("G3".into(), ["a", "b", "c", "c", "d"].as_ref()); + let s4 = Column::new("G4".into(), ["1", "2", "3", "3", "4"].as_ref()); + let s5 = Column::new("G5".into(), ["X", "Y", "Z", "Z", "W"].as_ref()); + let s6 = Column::new("G6".into(), [false, true, true, true, false].as_ref()); + let s7 = Column::new("G7".into(), ["r", "x", "q", "q", "o"].as_ref()); + let s8 = Column::new("G8".into(), ["R", "X", "Q", "Q", "O"].as_ref()); + let s9 = Column::new("G9".into(), [1, 2, 3, 3, 4].as_ref()); + let s10 = Column::new("G10".into(), [".", "!", "?", "?", "/"].as_ref()); + let s11 = Column::new("G11".into(), ["(", ")", "@", "@", "$"].as_ref()); + let s12 = Column::new("G12".into(), ["-", "_", ";", ";", ","].as_ref()); let df = DataFrame::new(vec![s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12]).unwrap(); @@ -1037,20 +1041,20 @@ mod test { ]; // Vector to contain every series. - let mut series = Vec::with_capacity(14); + let mut columns = Vec::with_capacity(14); // Create a series for every group name. for series_name in series_names { - let group_series = Series::new(series_name.into(), series_content.as_ref()); - series.push(group_series); + let group_columns = Column::new(series_name.into(), series_content.as_ref()); + columns.push(group_columns); } // Create a series for the aggregation column. - let agg_series = Series::new("N".into(), [1, 2, 3, 3, 4].as_ref()); - series.push(agg_series); + let agg_series = Column::new("N".into(), [1, 2, 3, 3, 4].as_ref()); + columns.push(agg_series); // Create the dataframe with the computed series. - let df = DataFrame::new(series).unwrap(); + let df = DataFrame::new(columns).unwrap(); // Use of deprecated `sum()` for testing purposes #[allow(deprecated)] @@ -1122,7 +1126,13 @@ mod test { .unwrap(); assert_eq!( - Vec::from(res.column("bar_sum").unwrap().i32().unwrap()), + Vec::from( + res.column("bar_sum") + .unwrap() + .as_materialized_series() + .i32() + .unwrap() + ), &[Some(2), Some(2), Some(1)] ); } @@ -1139,7 +1149,7 @@ mod test { let out = df.group_by_stable(["a"])?.mean()?; assert_eq!( - Vec::from(out.column("b_mean")?.f64()?), + Vec::from(out.column("b_mean")?.as_materialized_series().f64()?), &[Some(1.5), Some(1.0)] ); Ok(()) diff --git a/crates/polars-core/src/frame/horizontal.rs b/crates/polars-core/src/frame/horizontal.rs index bcbf486e0877..31c072991d87 100644 --- a/crates/polars-core/src/frame/horizontal.rs +++ b/crates/polars-core/src/frame/horizontal.rs @@ -1,12 +1,13 @@ use polars_error::{polars_ensure, polars_err, PolarsResult}; use polars_utils::aliases::PlHashSet; +use super::Column; use crate::datatypes::AnyValue; use crate::frame::DataFrame; -use crate::prelude::{PlSmallStr, Series}; +use crate::prelude::PlSmallStr; fn check_hstack( - col: &Series, + col: &Column, names: &mut PlHashSet, height: usize, is_empty: bool, @@ -28,25 +29,25 @@ impl DataFrame { /// /// # Safety /// The caller must ensure: - /// - the length of all [`Series`] is equal to the height of this [`DataFrame`] + /// - the length of all [`Column`] is equal to the height of this [`DataFrame`] /// - the columns names are unique - pub unsafe fn hstack_mut_unchecked(&mut self, columns: &[Series]) -> &mut Self { + pub unsafe fn hstack_mut_unchecked(&mut self, columns: &[Column]) -> &mut Self { self.columns.extend_from_slice(columns); self } - /// Add multiple [`Series`] to a [`DataFrame`]. + /// Add multiple [`Column`] to a [`DataFrame`]. /// The added `Series` are required to have the same length. /// /// # Example /// /// ```rust /// # use polars_core::prelude::*; - /// fn stack(df: &mut DataFrame, columns: &[Series]) { + /// fn stack(df: &mut DataFrame, columns: &[Column]) { /// df.hstack_mut(columns); /// } /// ``` - pub fn hstack_mut(&mut self, columns: &[Series]) -> PolarsResult<&mut Self> { + pub fn hstack_mut(&mut self, columns: &[Column]) -> PolarsResult<&mut Self> { let mut names = self .columns .iter() @@ -83,9 +84,11 @@ pub fn concat_df_horizontal(dfs: &[DataFrame], check_duplicates: bool) -> Polars .map(|mut df| { if df.height() != max_len { let diff = max_len - df.height(); - df.columns - .iter_mut() - .for_each(|s| *s = s.extend_constant(AnyValue::Null, diff).unwrap()); + df.columns.iter_mut().for_each(|s| { + // @scalar-opt + let s = s.into_materialized_series(); + *s = s.extend_constant(AnyValue::Null, diff).unwrap() + }); } df }) diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 648141688db8..a72da39d915d 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -16,6 +16,7 @@ use crate::utils::{slice_offsets, try_get_supertype, NoNull}; #[cfg(feature = "dataframe_arithmetic")] mod arithmetic; mod chunks; +pub mod column; pub mod explode; mod from; #[cfg(feature = "algorithm_group_by")] @@ -120,8 +121,8 @@ where /// /// ```rust /// # use polars_core::prelude::*; -/// let s1 = Series::new("Fruit".into(), ["Apple", "Apple", "Pear"]); -/// let s2 = Series::new("Color".into(), ["Red", "Yellow", "Green"]); +/// let s1 = Column::new("Fruit".into(), ["Apple", "Apple", "Pear"]); +/// let s2 = Column::new("Color".into(), ["Red", "Yellow", "Green"]); /// /// let df: PolarsResult = DataFrame::new(vec![s1, s2]); /// ``` @@ -150,8 +151,8 @@ where /// let df = df!("Fruit" => ["Apple", "Apple", "Pear"], /// "Color" => ["Red", "Yellow", "Green"])?; /// -/// assert_eq!(df[0], Series::new("Fruit".into(), &["Apple", "Apple", "Pear"])); -/// assert_eq!(df[1], Series::new("Color".into(), &["Red", "Yellow", "Green"])); +/// assert_eq!(df[0], Column::new("Fruit".into(), &["Apple", "Apple", "Pear"])); +/// assert_eq!(df[1], Column::new("Color".into(), &["Red", "Yellow", "Green"])); /// # Ok::<(), PolarsError>(()) /// ``` /// @@ -162,16 +163,27 @@ where /// let df = df!("Fruit" => ["Apple", "Apple", "Pear"], /// "Color" => ["Red", "Yellow", "Green"])?; /// -/// assert_eq!(df["Fruit"], Series::new("Fruit".into(), &["Apple", "Apple", "Pear"])); -/// assert_eq!(df["Color"], Series::new("Color".into(), &["Red", "Yellow", "Green"])); +/// assert_eq!(df["Fruit"], Column::new("Fruit".into(), &["Apple", "Apple", "Pear"])); +/// assert_eq!(df["Color"], Column::new("Color".into(), &["Red", "Yellow", "Green"])); /// # Ok::<(), PolarsError>(()) /// ``` #[derive(Clone)] pub struct DataFrame { - pub(crate) columns: Vec, + // invariant: Column.len() is the same for each column + pub(crate) columns: Vec, } impl DataFrame { + #[inline] + pub fn materialized_column_iter(&self) -> impl ExactSizeIterator { + self.columns.iter().map(Column::as_materialized_series) + } + + #[inline] + pub fn par_materialized_column_iter(&self) -> impl ParallelIterator { + self.columns.par_iter().map(Column::as_materialized_series) + } + /// Returns an estimation of the total (heap) allocated size of the `DataFrame` in bytes. /// /// # Implementation @@ -185,36 +197,52 @@ impl DataFrame { /// /// FFI buffers are included in this estimation. pub fn estimated_size(&self) -> usize { - self.columns.iter().map(|s| s.estimated_size()).sum() + self.columns.iter().map(Column::estimated_size).sum() } // Reduce monomorphization. - pub fn _apply_columns(&self, func: &(dyn Fn(&Series) -> Series)) -> Vec { - self.columns.iter().map(func).collect() + pub fn _apply_columns(&self, func: &(dyn Fn(&Series) -> Series)) -> Vec { + self.materialized_column_iter() + .map(func) + .map(Column::from) + .collect() } // Reduce monomorphization. pub fn _apply_columns_par( &self, func: &(dyn Fn(&Series) -> Series + Send + Sync), - ) -> Vec { - POOL.install(|| self.columns.par_iter().map(func).collect()) + ) -> Vec { + POOL.install(|| { + self.par_materialized_column_iter() + .map(func) + .map(Column::from) + .collect() + }) } // Reduce monomorphization. fn try_apply_columns_par( &self, func: &(dyn Fn(&Series) -> PolarsResult + Send + Sync), - ) -> PolarsResult> { - POOL.install(|| self.columns.par_iter().map(func).collect()) + ) -> PolarsResult> { + POOL.install(|| { + self.par_materialized_column_iter() + .map(func) + .map(|s| s.map(Column::from)) + .collect() + }) } // Reduce monomorphization. fn try_apply_columns( &self, func: &(dyn Fn(&Series) -> PolarsResult + Send + Sync), - ) -> PolarsResult> { - self.columns.iter().map(func).collect() + ) -> PolarsResult> { + self.materialized_column_iter() + .map(func) + .map(|s| s.map(Column::from)) + .collect() } /// Get the index of the column. @@ -234,9 +262,11 @@ impl DataFrame { /// Reserve additional slots into the chunks of the series. pub(crate) fn reserve_chunks(&mut self, additional: usize) { for s in &mut self.columns { - // SAFETY: - // do not modify the data, simply resize. - unsafe { s.chunks_mut().reserve(additional) } + if let Column::Series(s) = s { + // SAFETY: + // do not modify the data, simply resize. + unsafe { s.chunks_mut().reserve(additional) } + } } } @@ -246,13 +276,13 @@ impl DataFrame { /// /// ``` /// # use polars_core::prelude::*; - /// let s0 = Series::new("days".into(), [0, 1, 2].as_ref()); - /// let s1 = Series::new("temp".into(), [22.1, 19.9, 7.].as_ref()); + /// let s0 = Column::new("days".into(), [0, 1, 2].as_ref()); + /// let s1 = Column::new("temp".into(), [22.1, 19.9, 7.].as_ref()); /// /// let df = DataFrame::new(vec![s0, s1])?; /// # Ok::<(), PolarsError>(()) /// ``` - pub fn new(columns: Vec) -> PolarsResult { + pub fn new(columns: Vec) -> PolarsResult { ensure_names_unique(&columns, |s| s.name().as_str())?; if columns.len() > 1 { @@ -271,7 +301,7 @@ impl DataFrame { /// Converts a sequence of columns into a DataFrame, broadcasting length-1 /// columns to match the other columns. - pub fn new_with_broadcast(columns: Vec) -> PolarsResult { + pub fn new_with_broadcast(columns: Vec) -> PolarsResult { ensure_names_unique(&columns, |s| s.name().as_str())?; unsafe { Self::new_with_broadcast_no_checks(columns) } } @@ -281,7 +311,7 @@ impl DataFrame { /// /// # Safety /// Does not check that the column names are unique (which they must be). - pub unsafe fn new_with_broadcast_no_checks(mut columns: Vec) -> PolarsResult { + pub unsafe fn new_with_broadcast_no_checks(mut columns: Vec) -> PolarsResult { // The length of the longest non-unit length column determines the // broadcast length. If all columns are unit-length the broadcast length // is one. @@ -327,7 +357,7 @@ impl DataFrame { pub fn empty_with_schema(schema: &Schema) -> Self { let cols = schema .iter() - .map(|(name, dtype)| Series::new_empty(name.clone(), dtype)) + .map(|(name, dtype)| Column::from(Series::new_empty(name.clone(), dtype))) .collect(); unsafe { DataFrame::new_no_checks(cols) } } @@ -336,7 +366,7 @@ impl DataFrame { pub fn empty_with_arrow_schema(schema: &ArrowSchema) -> Self { let cols = schema .iter_values() - .map(|fld| Series::new_empty(fld.name.clone(), &(fld.dtype().into()))) + .map(|fld| Column::from(Series::new_empty(fld.name.clone(), &(fld.dtype().into())))) .collect(); unsafe { DataFrame::new_no_checks(cols) } } @@ -347,8 +377,8 @@ impl DataFrame { /// /// ```rust /// # use polars_core::prelude::*; - /// let s1 = Series::new("Ocean".into(), ["Atlantic", "Indian"]); - /// let s2 = Series::new("Area (km²)".into(), [106_460_000, 70_560_000]); + /// let s1 = Column::new("Ocean".into(), ["Atlantic", "Indian"]); + /// let s2 = Column::new("Area (km²)".into(), [106_460_000, 70_560_000]); /// let mut df = DataFrame::new(vec![s1.clone(), s2.clone()])?; /// /// assert_eq!(df.pop(), Some(s2)); @@ -357,7 +387,7 @@ impl DataFrame { /// assert!(df.is_empty()); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn pop(&mut self) -> Option { + pub fn pop(&mut self) -> Option { self.columns.pop() } @@ -404,7 +434,7 @@ impl DataFrame { (offset..(self.height() as IdxSize) + offset).collect(), ); ca.set_sorted_flag(IsSorted::Ascending); - columns.push(ca.into_series()); + columns.push(ca.into_series().into()); columns.extend_from_slice(&self.columns); DataFrame::new(columns) @@ -419,7 +449,7 @@ impl DataFrame { ); ca.set_sorted_flag(IsSorted::Ascending); - self.columns.insert(0, ca.into_series()); + self.columns.insert(0, ca.into_series().into()); self } @@ -431,7 +461,7 @@ impl DataFrame { /// /// It is the callers responsibility to uphold the contract of all `Series` /// having an equal length and a unique name, if not this may panic down the line. - pub const unsafe fn new_no_checks(columns: Vec) -> DataFrame { + pub const unsafe fn new_no_checks(columns: Vec) -> DataFrame { DataFrame { columns } } @@ -444,7 +474,7 @@ impl DataFrame { /// /// It is the callers responsibility to uphold the contract of all `Series` /// having an equal length, if not this may panic down the line. - pub unsafe fn new_no_length_checks(columns: Vec) -> PolarsResult { + pub unsafe fn new_no_length_checks(columns: Vec) -> PolarsResult { ensure_names_unique(&columns, |s| s.name().as_str())?; Ok(DataFrame { columns }) } @@ -461,7 +491,9 @@ impl DataFrame { pub fn as_single_chunk(&mut self) -> &mut Self { // Don't parallelize this. Memory overhead for s in &mut self.columns { - *s = s.rechunk(); + if let Column::Series(s) = s { + *s = s.rechunk(); + } } self } @@ -480,12 +512,17 @@ impl DataFrame { pub fn should_rechunk(&self) -> bool { // Fast check. It is also needed for correctness, as code below doesn't check if the number // of chunks is equal. - if !self.get_columns().iter().map(|s| s.n_chunks()).all_equal() { + if !self + .get_columns() + .iter() + .filter_map(|c| c.as_series().map(|s| s.n_chunks())) + .all_equal() + { return true; } // From here we check chunk lengths. - let mut chunk_lengths = self.columns.iter().map(|s| s.chunk_lengths()); + let mut chunk_lengths = self.materialized_column_iter().map(|s| s.chunk_lengths()); match chunk_lengths.next() { None => false, Some(first_column_chunk_lengths) => { @@ -552,14 +589,14 @@ impl DataFrame { /// # use polars_core::prelude::*; /// let df: DataFrame = df!("Name" => ["Adenine", "Cytosine", "Guanine", "Thymine"], /// "Symbol" => ["A", "C", "G", "T"])?; - /// let columns: &[Series] = df.get_columns(); + /// let columns: &[Column] = df.get_columns(); /// /// assert_eq!(columns[0].name(), "Name"); /// assert_eq!(columns[1].name(), "Symbol"); /// # Ok::<(), PolarsError>(()) /// ``` #[inline] - pub fn get_columns(&self) -> &[Series] { + pub fn get_columns(&self) -> &[Column] { &self.columns } @@ -568,12 +605,12 @@ impl DataFrame { /// /// # Safety /// The caller must ensure the length of all [`Series`] remains equal. - pub unsafe fn get_columns_mut(&mut self) -> &mut Vec { + pub unsafe fn get_columns_mut(&mut self) -> &mut Vec { &mut self.columns } /// Take ownership of the underlying columns vec. - pub fn take_columns(self) -> Vec { + pub fn take_columns(self) -> Vec { self.columns } @@ -583,19 +620,19 @@ impl DataFrame { /// /// ```rust /// # use polars_core::prelude::*; - /// let s1: Series = Series::new("Name".into(), ["Pythagoras' theorem", "Shannon entropy"]); - /// let s2: Series = Series::new("Formula".into(), ["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]); + /// let s1 = Column::new("Name".into(), ["Pythagoras' theorem", "Shannon entropy"]); + /// let s2 = Column::new("Formula".into(), ["a²+b²=c²", "H=-Σ[P(x)log|P(x)|]"]); /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2.clone()])?; /// /// let mut iterator = df.iter(); /// - /// assert_eq!(iterator.next(), Some(&s1)); - /// assert_eq!(iterator.next(), Some(&s2)); + /// assert_eq!(iterator.next(), Some(s1.as_materialized_series())); + /// assert_eq!(iterator.next(), Some(s2.as_materialized_series())); /// assert_eq!(iterator.next(), None); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn iter(&self) -> std::slice::Iter<'_, Series> { - self.columns.iter() + pub fn iter(&self) -> impl ExactSizeIterator { + self.materialized_column_iter() } /// # Example @@ -678,10 +715,16 @@ impl DataFrame { self.columns.iter().map(|s| s.dtype().clone()).collect() } + pub(crate) fn first_series_column(&self) -> Option<&Series> { + self.columns.iter().find_map(|col| col.as_series()) + } + /// The number of chunks per column pub fn n_chunks(&self) -> usize { - match self.columns.first() { - None => 0, + // @scalar-correctness? + match self.first_series_column() { + None if self.columns.is_empty() => 0, + None => 1, Some(s) => s.n_chunks(), } } @@ -796,8 +839,8 @@ impl DataFrame { /// ```rust /// # use polars_core::prelude::*; /// let df1: DataFrame = df!("Element" => ["Copper", "Silver", "Gold"])?; - /// let s1: Series = Series::new("Proton".into(), [29, 47, 79]); - /// let s2: Series = Series::new("Electron".into(), [29, 47, 79]); + /// let s1 = Column::new("Proton".into(), [29, 47, 79]); + /// let s2 = Column::new("Electron".into(), [29, 47, 79]); /// /// let df2: DataFrame = df1.hstack(&[s1, s2])?; /// assert_eq!(df2.shape(), (3, 3)); @@ -821,7 +864,7 @@ impl DataFrame { /// | Gold | 79 | 79 | /// +---------+--------+----------+ /// ``` - pub fn hstack(&self, columns: &[Series]) -> PolarsResult { + pub fn hstack(&self, columns: &[Column]) -> PolarsResult { let mut new_cols = self.columns.clone(); new_cols.extend_from_slice(columns); DataFrame::new(new_cols) @@ -929,7 +972,7 @@ impl DataFrame { .iter_mut() .zip(other.columns.iter()) .try_for_each::<_, PolarsResult<_>>(|(left, right)| { - ensure_can_extend(left, right)?; + ensure_can_extend(&*left, right)?; left.append(right)?; Ok(()) })?; @@ -976,7 +1019,7 @@ impl DataFrame { .iter_mut() .zip(other.columns.iter()) .try_for_each::<_, PolarsResult<_>>(|(left, right)| { - ensure_can_extend(left, right)?; + ensure_can_extend(&*left, right)?; left.extend(right)?; Ok(()) }) @@ -991,14 +1034,14 @@ impl DataFrame { /// let mut df: DataFrame = df!("Animal" => ["Tiger", "Lion", "Great auk"], /// "IUCN" => ["Endangered", "Vulnerable", "Extinct"])?; /// - /// let s1: PolarsResult = df.drop_in_place("Average weight"); + /// let s1: PolarsResult = df.drop_in_place("Average weight"); /// assert!(s1.is_err()); /// - /// let s2: Series = df.drop_in_place("Animal")?; - /// assert_eq!(s2, Series::new("Animal".into(), &["Tiger", "Lion", "Great auk"])); + /// let s2: Column = df.drop_in_place("Animal")?; + /// assert_eq!(s2, Column::new("Animal".into(), &["Tiger", "Lion", "Great auk"])); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn drop_in_place(&mut self, name: &str) -> PolarsResult { + pub fn drop_in_place(&mut self, name: &str) -> PolarsResult { let idx = self.check_name_to_idx(name)?; Ok(self.columns.remove(idx)) } @@ -1036,14 +1079,14 @@ impl DataFrame { for<'a> &'a S: Into, { if let Some(v) = subset { - let v = self.select_series(v)?; + let v = self.select_columns(v)?; self._drop_nulls_impl(v.as_slice()) } else { self._drop_nulls_impl(self.columns.as_slice()) } } - fn _drop_nulls_impl(&self, subset: &[Series]) -> PolarsResult { + fn _drop_nulls_impl(&self, subset: &[Column]) -> PolarsResult { // fast path for no nulls in df if subset.iter().all(|s| !s.has_nulls()) { return Ok(self.clone()); @@ -1056,8 +1099,8 @@ impl DataFrame { .ok_or_else(|| polars_err!(NoData: "no data to drop nulls from"))?; let mut mask = mask.is_not_null(); - for s in iter { - mask = mask & s.is_not_null(); + for c in iter { + mask = mask & c.is_not_null(); } self.filter(&mask) } @@ -1119,63 +1162,63 @@ impl DataFrame { fn insert_column_no_name_check( &mut self, index: usize, - series: Series, + column: Column, ) -> PolarsResult<&mut Self> { polars_ensure!( - self.width() == 0 || series.len() == self.height(), + self.width() == 0 || column.len() == self.height(), ShapeMismatch: "unable to add a column of length {} to a DataFrame of height {}", - series.len(), self.height(), + column.len(), self.height(), ); - self.columns.insert(index, series); + self.columns.insert(index, column); Ok(self) } /// Insert a new column at a given index. - pub fn insert_column( + pub fn insert_column( &mut self, index: usize, column: S, ) -> PolarsResult<&mut Self> { - let series = column.into_series(); - self.check_already_present(series.name().as_str())?; - self.insert_column_no_name_check(index, series) + let column = column.into_column(); + self.check_already_present(column.name().as_str())?; + self.insert_column_no_name_check(index, column) } - fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> { - if let Some(idx) = self.get_column_index(series.name().as_str()) { - self.replace_column(idx, series)?; + fn add_column_by_search(&mut self, column: Column) -> PolarsResult<()> { + if let Some(idx) = self.get_column_index(column.name().as_str()) { + self.replace_column(idx, column)?; } else { - self.columns.push(series); + self.columns.push(column); } Ok(()) } /// Add a new column to this [`DataFrame`] or replace an existing one. - pub fn with_column(&mut self, column: S) -> PolarsResult<&mut Self> { - fn inner(df: &mut DataFrame, mut series: Series) -> PolarsResult<&mut DataFrame> { + pub fn with_column(&mut self, column: C) -> PolarsResult<&mut Self> { + fn inner(df: &mut DataFrame, mut column: Column) -> PolarsResult<&mut DataFrame> { let height = df.height(); - if series.len() == 1 && height > 1 { - series = series.new_from_index(0, height); + if column.len() == 1 && height > 1 { + column = column.new_from_index(0, height); } - if series.len() == height || df.get_columns().is_empty() { - df.add_column_by_search(series)?; + if column.len() == height || df.get_columns().is_empty() { + df.add_column_by_search(column)?; Ok(df) } // special case for literals - else if height == 0 && series.len() == 1 { - let s = series.clear(); + else if height == 0 && column.len() == 1 { + let s = column.clear(); df.add_column_by_search(s)?; Ok(df) } else { polars_bail!( ShapeMismatch: "unable to add a column of length {} to a DataFrame of height {}", - series.len(), height, + column.len(), height, ); } } - let series = column.into_series(); - inner(self, series) + let column = column.into_column(); + inner(self, column) } /// Adds a column to the [`DataFrame`] without doing any checks @@ -1184,33 +1227,43 @@ impl DataFrame { /// # Safety /// The caller must ensure `column.len() == self.height()` . pub unsafe fn with_column_unchecked(&mut self, column: Series) -> &mut Self { - #[cfg(debug_assertions)] - { - return self.with_column(column).unwrap(); - } - #[cfg(not(debug_assertions))] - { - self.get_columns_mut().push(column); + if cfg!(debug_assertions) { + self.with_column(column).unwrap() + } else { + self.get_columns_mut().push(column.into_column()); self } } - fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> { - let name = s.name(); + fn add_column_by_schema(&mut self, c: Column, schema: &Schema) -> PolarsResult<()> { + let name = c.name(); if let Some((idx, _, _)) = schema.get_full(name.as_str()) { // schema is incorrect fallback to search if self.columns.get(idx).map(|s| s.name()) != Some(name) { - self.add_column_by_search(s)?; + self.add_column_by_search(c)?; } else { - self.replace_column(idx, s)?; + self.replace_column(idx, c)?; } } else { - self.columns.push(s); + self.columns.push(c); + } + Ok(()) + } + + pub fn _add_series(&mut self, series: Vec, schema: &Schema) -> PolarsResult<()> { + for (i, s) in series.into_iter().enumerate() { + // we need to branch here + // because users can add multiple columns with the same name + if i == 0 || schema.get(s.name().as_str()).is_some() { + self.with_column_and_schema(s.into_column(), schema)?; + } else { + self.with_column(s.clone().into_column())?; + } } Ok(()) } - pub fn _add_columns(&mut self, columns: Vec, schema: &Schema) -> PolarsResult<()> { + pub fn _add_columns(&mut self, columns: Vec, schema: &Schema) -> PolarsResult<()> { for (i, s) in columns.into_iter().enumerate() { // we need to branch here // because users can add multiple columns with the same name @@ -1226,31 +1279,31 @@ impl DataFrame { /// Add a new column to this [`DataFrame`] or replace an existing one. /// Uses an existing schema to amortize lookups. /// If the schema is incorrect, we will fallback to linear search. - pub fn with_column_and_schema( + pub fn with_column_and_schema( &mut self, - column: S, + column: C, schema: &Schema, ) -> PolarsResult<&mut Self> { - let mut series = column.into_series(); + let mut column = column.into_column(); let height = self.height(); - if series.len() == 1 && height > 1 { - series = series.new_from_index(0, height); + if column.len() == 1 && height > 1 { + column = column.new_from_index(0, height); } - if series.len() == height || self.columns.is_empty() { - self.add_column_by_schema(series, schema)?; + if column.len() == height || self.columns.is_empty() { + self.add_column_by_schema(column, schema)?; Ok(self) } // special case for literals - else if height == 0 && series.len() == 1 { - let s = series.clear(); + else if height == 0 && column.len() == 1 { + let s = column.clear(); self.add_column_by_schema(s, schema)?; Ok(self) } else { polars_bail!( ShapeMismatch: "unable to add a column of length {} to a DataFrame of height {}", - series.len(), height, + column.len(), height, ); } } @@ -1275,7 +1328,7 @@ impl DataFrame { None => return None, } // SAFETY: we just checked bounds - unsafe { Some(self.columns.iter().map(|s| s.get_unchecked(idx)).collect()) } + unsafe { Some(self.columns.iter().map(|c| c.get_unchecked(idx)).collect()) } } /// Select a [`Series`] by index. @@ -1287,13 +1340,13 @@ impl DataFrame { /// let df: DataFrame = df!("Star" => ["Sun", "Betelgeuse", "Sirius A", "Sirius B"], /// "Absolute magnitude" => [4.83, -5.85, 1.42, 11.18])?; /// - /// let s1: Option<&Series> = df.select_at_idx(0); - /// let s2: Series = Series::new("Star".into(), ["Sun", "Betelgeuse", "Sirius A", "Sirius B"]); + /// let s1: Option<&Column> = df.select_at_idx(0); + /// let s2 = Column::new("Star".into(), ["Sun", "Betelgeuse", "Sirius A", "Sirius B"]); /// /// assert_eq!(s1, Some(&s2)); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn select_at_idx(&self, idx: usize) -> Option<&Series> { + pub fn select_at_idx(&self, idx: usize) -> Option<&Column> { self.columns.get(idx) } @@ -1301,7 +1354,7 @@ impl DataFrame { /// /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.* /// For this reason the method is not public - fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Series> { + fn select_at_idx_mut(&mut self, idx: usize) -> Option<&mut Column> { self.columns.get_mut(idx) } @@ -1400,14 +1453,14 @@ impl DataFrame { /// /// ```rust /// # use polars_core::prelude::*; - /// let s1: Series = Series::new("Password".into(), ["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]); - /// let s2: Series = Series::new("Robustness".into(), ["Weak", "Strong"]); + /// let s1 = Column::new("Password".into(), ["123456", "[]B$u$g$s$B#u#n#n#y[]{}"]); + /// let s2 = Column::new("Robustness".into(), ["Weak", "Strong"]); /// let df: DataFrame = DataFrame::new(vec![s1.clone(), s2])?; /// /// assert_eq!(df.column("Password")?, &s1); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn column(&self, name: &str) -> PolarsResult<&Series> { + pub fn column(&self, name: &str) -> PolarsResult<&Column> { let idx = self.try_get_column_index(name)?; Ok(self.select_at_idx(idx).unwrap()) } @@ -1420,13 +1473,13 @@ impl DataFrame { /// # use polars_core::prelude::*; /// let df: DataFrame = df!("Latin name" => ["Oncorhynchus kisutch", "Salmo salar"], /// "Max weight (kg)" => [16.0, 35.89])?; - /// let sv: Vec<&Series> = df.columns(["Latin name", "Max weight (kg)"])?; + /// let sv: Vec<&Column> = df.columns(["Latin name", "Max weight (kg)"])?; /// /// assert_eq!(&df[0], sv[0]); /// assert_eq!(&df[1], sv[1]); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn columns(&self, names: I) -> PolarsResult> + pub fn columns(&self, names: I) -> PolarsResult> where I: IntoIterator, S: AsRef, @@ -1462,7 +1515,7 @@ impl DataFrame { } pub fn _select_impl_unchecked(&self, cols: &[PlSmallStr]) -> PolarsResult { - let selected = self.select_series_impl(cols)?; + let selected = self.select_columns_impl(cols)?; Ok(unsafe { DataFrame::new_no_checks(selected) }) } @@ -1499,16 +1552,16 @@ impl DataFrame { if check_duplicates { ensure_names_unique(cols, |s| s.as_str())?; } - let selected = self.select_series_impl_with_schema(cols, schema)?; + let selected = self.select_columns_impl_with_schema(cols, schema)?; Ok(unsafe { DataFrame::new_no_checks(selected) }) } /// A non generic implementation to reduce compiler bloat. - fn select_series_impl_with_schema( + fn select_columns_impl_with_schema( &self, cols: &[PlSmallStr], schema: &Schema, - ) -> PolarsResult> { + ) -> PolarsResult> { cols.iter() .map(|name| { let index = schema.try_get_full(name.as_str())?.0; @@ -1528,7 +1581,7 @@ impl DataFrame { fn select_physical_impl(&self, cols: &[PlSmallStr]) -> PolarsResult { ensure_names_unique(cols, |s| s.as_str())?; - let selected = self.select_series_physical_impl(cols)?; + let selected = self.select_columns_physical_impl(cols)?; Ok(unsafe { DataFrame::new_no_checks(selected) }) } @@ -1541,15 +1594,15 @@ impl DataFrame { /// let df: DataFrame = df!("Name" => ["Methane", "Ethane", "Propane"], /// "Carbon" => [1, 2, 3], /// "Hydrogen" => [4, 6, 8])?; - /// let sv: Vec = df.select_series(["Carbon", "Hydrogen"])?; + /// let sv: Vec = df.select_columns(["Carbon", "Hydrogen"])?; /// /// assert_eq!(df["Carbon"], sv[0]); /// assert_eq!(df["Hydrogen"], sv[1]); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn select_series(&self, selection: impl IntoVec) -> PolarsResult> { + pub fn select_columns(&self, selection: impl IntoVec) -> PolarsResult> { let cols = selection.into_vec(); - self.select_series_impl(&cols) + self.select_columns_impl(&cols) } fn _names_to_idx_map(&self) -> PlHashMap<&str, usize> { @@ -1561,7 +1614,7 @@ impl DataFrame { } /// A non generic implementation to reduce compiler bloat. - fn select_series_physical_impl(&self, cols: &[PlSmallStr]) -> PolarsResult> { + fn select_columns_physical_impl(&self, cols: &[PlSmallStr]) -> PolarsResult> { let selected = if cols.len() > 1 && self.columns.len() > 10 { let name_to_idx = self._names_to_idx_map(); cols.iter() @@ -1569,19 +1622,12 @@ impl DataFrame { let idx = *name_to_idx .get(name.as_str()) .ok_or_else(|| polars_err!(col_not_found = name))?; - Ok(self - .select_at_idx(idx) - .unwrap() - .to_physical_repr() - .into_owned()) + Ok(self.select_at_idx(idx).unwrap().to_physical_repr()) }) .collect::>>()? } else { cols.iter() - .map(|c| { - self.column(c.as_str()) - .map(|s| s.to_physical_repr().into_owned()) - }) + .map(|c| self.column(c.as_str()).map(|s| s.to_physical_repr())) .collect::>>()? }; @@ -1589,7 +1635,7 @@ impl DataFrame { } /// A non generic implementation to reduce compiler bloat. - fn select_series_impl(&self, cols: &[PlSmallStr]) -> PolarsResult> { + fn select_columns_impl(&self, cols: &[PlSmallStr]) -> PolarsResult> { let selected = if cols.len() > 1 && self.columns.len() > 10 { // we hash, because there are user that having millions of columns. // # https://github.com/pola-rs/polars/issues/1023 @@ -1615,7 +1661,7 @@ impl DataFrame { /// Select a mutable series by name. /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.* /// For this reason the method is not public - fn select_mut(&mut self, name: &str) -> Option<&mut Series> { + fn select_mut(&mut self, name: &str) -> Option<&mut Column> { let opt_idx = self.get_column_index(name); opt_idx.and_then(|idx| self.select_at_idx_mut(idx)) @@ -1672,7 +1718,10 @@ impl DataFrame { let cols = if allow_threads { POOL.install(|| self._apply_columns_par(&|s| s.take_unchecked(idx))) } else { - self.columns.iter().map(|s| s.take_unchecked(idx)).collect() + self.materialized_column_iter() + .map(|s| s.take_unchecked(idx)) + .map(Column::from) + .collect() }; unsafe { DataFrame::new_no_checks(cols) } } @@ -1685,9 +1734,9 @@ impl DataFrame { let cols = if allow_threads { POOL.install(|| self._apply_columns_par(&|s| s.take_slice_unchecked(idx))) } else { - self.columns - .iter() + self.materialized_column_iter() .map(|s| s.take_slice_unchecked(idx)) + .map(Column::from) .collect() }; unsafe { DataFrame::new_no_checks(cols) } @@ -1715,7 +1764,7 @@ impl DataFrame { ); self.select_mut(column) .ok_or_else(|| polars_err!(col_not_found = column)) - .map(|s| s.rename(name))?; + .map(|c| c.rename(name))?; Ok(self) } @@ -1727,7 +1776,7 @@ impl DataFrame { by: impl IntoVec, sort_options: SortMultipleOptions, ) -> PolarsResult<&mut Self> { - let by_column = self.select_series(by)?; + let by_column = self.select_columns(by)?; self.columns = self.sort_impl(by_column, sort_options, None)?.columns; Ok(self) } @@ -1736,7 +1785,7 @@ impl DataFrame { /// This is the dispatch of Self::sort, and exists to reduce compile bloat by monomorphization. pub fn sort_impl( &self, - by_column: Vec, + by_column: Vec, mut sort_options: SortMultipleOptions, slice: Option<(i64, usize)>, ) -> PolarsResult { @@ -1786,7 +1835,7 @@ impl DataFrame { let df = df.as_single_chunk_par(); let mut take = match (by_column.len(), has_struct) { (1, false) => { - let s = &by_column[0]; + let s = &by_column[0].as_materialized_series(); let options = SortOptions { descending: sort_options.descending[0], nulls_last: sort_options.nulls_last[0], @@ -1818,7 +1867,9 @@ impl DataFrame { )? } else { let (first, other) = prepare_arg_sort(by_column, &mut sort_options)?; - first.arg_sort_multiple(&other, &sort_options)? + first + .as_materialized_series() + .arg_sort_multiple(&other, &sort_options)? } }, }; @@ -1925,10 +1976,10 @@ impl DataFrame { /// df.replace_column(1, df.select_at_idx(1).unwrap() + 32); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn replace_column( + pub fn replace_column( &mut self, index: usize, - new_column: S, + new_column: C, ) -> PolarsResult<&mut Self> { polars_ensure!( index < self.width(), @@ -1936,7 +1987,7 @@ impl DataFrame { "unable to replace at index {}, the DataFrame has only {} columns", index, self.width(), ); - let mut new_column = new_column.into_series(); + let mut new_column = new_column.into_column(); polars_ensure!( new_column.len() == self.height(), ShapeMismatch: @@ -1954,11 +2005,11 @@ impl DataFrame { /// /// ```rust /// # use polars_core::prelude::*; - /// let s0 = Series::new("foo".into(), ["ham", "spam", "egg"]); - /// let s1 = Series::new("names".into(), ["Jean", "Claude", "van"]); + /// let s0 = Column::new("foo".into(), ["ham", "spam", "egg"]); + /// let s1 = Column::new("names".into(), ["Jean", "Claude", "van"]); /// let mut df = DataFrame::new(vec![s0, s1])?; /// - /// fn str_to_len(str_val: &Series) -> Series { + /// fn str_to_len(str_val: &Column) -> Column { /// str_val.str() /// .unwrap() /// .into_iter() @@ -1966,7 +2017,7 @@ impl DataFrame { /// opt_name.map(|name: &str| name.len() as u32) /// }) /// .collect::() - /// .into_series() + /// .into_column() /// } /// /// // Replace the names column by the length of the names. @@ -1988,10 +2039,10 @@ impl DataFrame { /// | "egg" | 3 | /// +--------+-------+ /// ``` - pub fn apply(&mut self, name: &str, f: F) -> PolarsResult<&mut Self> + pub fn apply(&mut self, name: &str, f: F) -> PolarsResult<&mut Self> where - F: FnOnce(&Series) -> S, - S: IntoSeries, + F: FnOnce(&Column) -> C, + C: IntoColumn, { let idx = self.check_name_to_idx(name)?; self.apply_at_idx(idx, f) @@ -2004,12 +2055,12 @@ impl DataFrame { /// /// ```rust /// # use polars_core::prelude::*; - /// let s0 = Series::new("foo".into(), ["ham", "spam", "egg"]); - /// let s1 = Series::new("ascii".into(), [70, 79, 79]); + /// let s0 = Column::new("foo".into(), ["ham", "spam", "egg"]); + /// let s1 = Column::new("ascii".into(), [70, 79, 79]); /// let mut df = DataFrame::new(vec![s0, s1])?; /// /// // Add 32 to get lowercase ascii values - /// df.apply_at_idx(1, |s| s + 32); + /// df.apply_at_idx(1, |s| (s + 32).unwrap()); /// # Ok::<(), PolarsError>(()) /// ``` /// Results in: @@ -2027,10 +2078,10 @@ impl DataFrame { /// | "egg" | 111 | /// +--------+-------+ /// ``` - pub fn apply_at_idx(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self> + pub fn apply_at_idx(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self> where - F: FnOnce(&Series) -> S, - S: IntoSeries, + F: FnOnce(&Column) -> C, + C: IntoColumn, { let df_height = self.height(); let width = self.width(); @@ -2041,7 +2092,7 @@ impl DataFrame { ) })?; let name = col.name().clone(); - let new_col = f(col).into_series(); + let new_col = f(col).into_column(); match new_col.len() { 1 => { let new_col = new_col.new_from_index(0, df_height); @@ -2074,14 +2125,14 @@ impl DataFrame { /// /// ```rust /// # use polars_core::prelude::*; - /// let s0 = Series::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]); - /// let s1 = Series::new("values".into(), [1, 2, 3, 4, 5]); + /// let s0 = Column::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]); + /// let s1 = Column::new("values".into(), [1, 2, 3, 4, 5]); /// let mut df = DataFrame::new(vec![s0, s1])?; /// /// let idx = vec![0, 1, 4]; /// - /// df.try_apply("foo", |s| { - /// s.str()? + /// df.try_apply("foo", |c| { + /// c.str()? /// .scatter_with(idx, |opt_val| opt_val.map(|string| format!("{}-is-modified", string))) /// }); /// # Ok::<(), PolarsError>(()) @@ -2105,10 +2156,10 @@ impl DataFrame { /// | "quack-is-modified" | 5 | /// +---------------------+--------+ /// ``` - pub fn try_apply_at_idx(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self> + pub fn try_apply_at_idx(&mut self, idx: usize, f: F) -> PolarsResult<&mut Self> where - F: FnOnce(&Series) -> PolarsResult, - S: IntoSeries, + F: FnOnce(&Column) -> PolarsResult, + C: IntoColumn, { let width = self.width(); let col = self.columns.get_mut(idx).ok_or_else(|| { @@ -2119,7 +2170,7 @@ impl DataFrame { })?; let name = col.name().clone(); - let _ = mem::replace(col, f(col).map(|s| s.into_series())?); + let _ = mem::replace(col, f(col).map(|c| c.into_column())?); // make sure the name remains the same after applying the closure unsafe { @@ -2138,16 +2189,16 @@ impl DataFrame { /// /// ```rust /// # use polars_core::prelude::*; - /// let s0 = Series::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]); - /// let s1 = Series::new("values".into(), [1, 2, 3, 4, 5]); + /// let s0 = Column::new("foo".into(), ["ham", "spam", "egg", "bacon", "quack"]); + /// let s1 = Column::new("values".into(), [1, 2, 3, 4, 5]); /// let mut df = DataFrame::new(vec![s0, s1])?; /// /// // create a mask - /// let values = df.column("values")?; + /// let values = df.column("values")?.as_materialized_series(); /// let mask = values.lt_eq(1)? | values.gt_eq(5_i32)?; /// - /// df.try_apply("foo", |s| { - /// s.str()? + /// df.try_apply("foo", |c| { + /// c.str()? /// .set(&mask, Some("not_within_bounds")) /// }); /// # Ok::<(), PolarsError>(()) @@ -2171,13 +2222,13 @@ impl DataFrame { /// | "not_within_bounds" | 5 | /// +---------------------+--------+ /// ``` - pub fn try_apply(&mut self, column: &str, f: F) -> PolarsResult<&mut Self> + pub fn try_apply(&mut self, column: &str, f: F) -> PolarsResult<&mut Self> where - F: FnOnce(&Series) -> PolarsResult, - S: IntoSeries, + F: FnOnce(&Series) -> PolarsResult, + C: IntoColumn, { let idx = self.try_get_column_index(column)?; - self.try_apply_at_idx(idx, f) + self.try_apply_at_idx(idx, |c| f(c.as_materialized_series())) } /// Slice the [`DataFrame`] along the rows. @@ -2243,6 +2294,7 @@ impl DataFrame { if offset == 0 && length == self.height() { return self.clone(); } + // @scalar-opt let columns = self._apply_columns_par(&|s| s.slice(offset, length)); unsafe { DataFrame::new_no_checks(columns) } } @@ -2252,6 +2304,7 @@ impl DataFrame { if offset == 0 && length == self.height() { return self.clone(); } + // @scalar-opt let columns = self._apply_columns(&|s| { let mut out = s.slice(offset, length); out.shrink_to_fit(); @@ -2298,7 +2351,7 @@ impl DataFrame { let col = self .columns .iter() - .map(|s| s.head(length)) + .map(|c| c.head(length)) .collect::>(); unsafe { DataFrame::new_no_checks(col) } } @@ -2338,7 +2391,7 @@ impl DataFrame { let col = self .columns .iter() - .map(|s| s.tail(length)) + .map(|c| c.tail(length)) .collect::>(); unsafe { DataFrame::new_no_checks(col) } } @@ -2385,7 +2438,10 @@ impl DataFrame { /// as well. pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> { PhysRecordBatchIter { - iters: self.columns.iter().map(|s| s.chunks().iter()).collect(), + iters: self + .materialized_column_iter() + .map(|s| s.chunks().iter()) + .collect(), } } @@ -2427,14 +2483,19 @@ impl DataFrame { match self.columns.len() { 0 => Ok(None), - 1 => Ok(Some(self.columns[0].clone())), - 2 => min_fn(&self.columns[0], &self.columns[1]).map(Some), + 1 => Ok(Some( + self.columns[0].clone().as_materialized_series().clone(), + )), + 2 => min_fn( + self.columns[0].as_materialized_series(), + self.columns[1].as_materialized_series(), + ) + .map(Some), _ => { // the try_reduce_with is a bit slower in parallelism, // but I don't think it matters here as we parallelize over columns, not over elements POOL.install(|| { - self.columns - .par_iter() + self.par_materialized_column_iter() .map(|s| Ok(Cow::Borrowed(s))) .try_reduce_with(|l, r| min_fn(&l, &r).map(Cow::Owned)) // we can unwrap the option, because we are certain there is a column @@ -2453,14 +2514,17 @@ impl DataFrame { match self.columns.len() { 0 => Ok(None), - 1 => Ok(Some(self.columns[0].clone())), - 2 => max_fn(&self.columns[0], &self.columns[1]).map(Some), + 1 => Ok(Some(self.columns[0].as_materialized_series().clone())), + 2 => max_fn( + self.columns[0].as_materialized_series(), + self.columns[1].as_materialized_series(), + ) + .map(Some), _ => { // the try_reduce_with is a bit slower in parallelism, // but I don't think it matters here as we parallelize over columns, not over elements POOL.install(|| { - self.columns - .par_iter() + self.par_materialized_column_iter() .map(|s| Ok(Cow::Borrowed(s))) .try_reduce_with(|l, r| max_fn(&l, &r).map(Cow::Owned)) // we can unwrap the option, because we are certain there is a column @@ -2494,8 +2558,7 @@ impl DataFrame { }; let non_null_cols = self - .columns - .iter() + .materialized_column_iter() .filter(|x| x.dtype() != &DataType::Null) .collect::>(); @@ -2505,7 +2568,7 @@ impl DataFrame { Ok(None) } else { // all columns are null dtype, so result is null dtype - Ok(Some(self.columns[0].clone())) + Ok(Some(self.columns[0].as_materialized_series().clone())) } }, 1 => Ok(Some(apply_null_strategy( @@ -2545,9 +2608,11 @@ impl DataFrame { 0 => Ok(None), 1 => Ok(Some(match self.columns[0].dtype() { dt if dt != &DataType::Float32 && (dt.is_numeric() || dt == &DataType::Boolean) => { - self.columns[0].cast(&DataType::Float64)? + self.columns[0] + .as_materialized_series() + .cast(&DataType::Float64)? }, - _ => self.columns[0].clone(), + _ => self.columns[0].as_materialized_series().clone(), })), _ => { let columns = self @@ -2565,8 +2630,7 @@ impl DataFrame { let null_count = || { numeric_df - .columns - .par_iter() + .par_materialized_column_iter() .map(|s| { s.is_null() .cast_with_options(&DataType::UInt32, CastOptions::NonStrict) @@ -2817,7 +2881,7 @@ impl DataFrame { let cols = self .columns .iter() - .map(|s| Series::new(s.name().clone(), [s.null_count() as IdxSize])) + .map(|c| Column::new(c.name().clone(), [c.null_count() as IdxSize])) .collect(); unsafe { Self::new_no_checks(cols) } } @@ -2983,7 +3047,7 @@ impl DataFrame { for s in &self.columns { if cols.contains(s.name()) { let ca = s.struct_()?.clone(); - new_cols.extend_from_slice(&ca.fields_as_series()); + new_cols.extend(ca.fields_as_series().into_iter().map(Column::from)); count += 1; } else { new_cols.push(s.clone()) @@ -3004,7 +3068,7 @@ impl DataFrame { } pub struct RecordBatchIter<'a> { - columns: &'a Vec, + columns: &'a Vec, idx: usize, n_chunks: usize, compat_level: CompatLevel, @@ -3023,11 +3087,13 @@ impl<'a> Iterator for RecordBatchIter<'a> { let iter = self .columns .par_iter() + .map(Column::as_materialized_series) .map(|s| s.to_arrow(self.idx, self.compat_level)); POOL.install(|| iter.collect()) } else { self.columns .iter() + .map(Column::as_materialized_series) .map(|s| s.to_arrow(self.idx, self.compat_level)) .collect() }; @@ -3073,14 +3139,14 @@ impl Default for DataFrame { } } -impl From for Vec { +impl From for Vec { fn from(df: DataFrame) -> Self { df.columns } } // utility to test if we can vstack/extend the columns -fn ensure_can_extend(left: &Series, right: &Series) -> PolarsResult<()> { +fn ensure_can_extend(left: &Column, right: &Column) -> PolarsResult<()> { polars_ensure!( left.name() == right.name(), ShapeMismatch: "unable to vstack, column names don't match: {:?} and {:?}", @@ -3094,8 +3160,8 @@ mod test { use super::*; fn create_frame() -> DataFrame { - let s0 = Series::new("days".into(), [0, 1, 2].as_ref()); - let s1 = Series::new("temp".into(), [22.1, 19.9, 7.].as_ref()); + let s0 = Column::new("days".into(), [0, 1, 2].as_ref()); + let s1 = Column::new("temp".into(), [22.1, 19.9, 7.].as_ref()); DataFrame::new(vec![s0, s1]).unwrap() } @@ -3115,7 +3181,16 @@ mod test { #[cfg_attr(miri, ignore)] fn test_select() { let df = create_frame(); - assert_eq!(df.column("days").unwrap().equal(1).unwrap().sum(), Some(1)); + assert_eq!( + df.column("days") + .unwrap() + .as_series() + .unwrap() + .equal(1) + .unwrap() + .sum(), + Some(1) + ); } #[test] @@ -3123,13 +3198,25 @@ mod test { fn test_filter_broadcast_on_string_col() { let col_name = "some_col"; let v = vec!["test".to_string()]; - let s0 = Series::new(PlSmallStr::from_str(col_name), v); + let s0 = Column::new(PlSmallStr::from_str(col_name), v); let mut df = DataFrame::new(vec![s0]).unwrap(); df = df - .filter(&df.column(col_name).unwrap().equal("").unwrap()) + .filter( + &df.column(col_name) + .unwrap() + .as_materialized_series() + .equal("") + .unwrap(), + ) .unwrap(); - assert_eq!(df.column(col_name).unwrap().n_chunks(), 1); + assert_eq!( + df.column(col_name) + .unwrap() + .as_materialized_series() + .n_chunks(), + 1 + ); } #[test] @@ -3235,9 +3322,9 @@ mod test { #[cfg(feature = "zip_with")] #[cfg_attr(miri, ignore)] fn test_horizontal_agg() { - let a = Series::new("a".into(), [1, 2, 6]); - let b = Series::new("b".into(), [Some(1), None, None]); - let c = Series::new("c".into(), [Some(4), None, Some(3)]); + let a = Column::new("a".into(), [1, 2, 6]); + let b = Column::new("b".into(), [Some(1), None, None]); + let c = Column::new("c".into(), [Some(4), None, Some(3)]); let df = DataFrame::new(vec![a, b, c]).unwrap(); assert_eq!( diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 608d6ec820af..f46332021ef1 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -624,7 +624,7 @@ impl<'a> AnyValueBufferTrusted<'a> { s }) .collect::>(); - StructChunked::from_series(PlSmallStr::EMPTY, &v) + StructChunked::from_series(PlSmallStr::EMPTY, v.iter()) .unwrap() .into_series() }, diff --git a/crates/polars-core/src/frame/row/dataframe.rs b/crates/polars-core/src/frame/row/dataframe.rs index 4a40a9ed6d6f..1d11dcd9ecc0 100644 --- a/crates/polars-core/src/frame/row/dataframe.rs +++ b/crates/polars-core/src/frame/row/dataframe.rs @@ -4,8 +4,7 @@ impl DataFrame { /// Get a row from a [`DataFrame`]. Use of this is discouraged as it will likely be slow. pub fn get_row(&self, idx: usize) -> PolarsResult { let values = self - .columns - .iter() + .materialized_column_iter() .map(|s| s.get(idx)) .collect::>>()?; Ok(Row(values)) @@ -15,7 +14,7 @@ impl DataFrame { /// The caller is responsible to make sure that the row has at least the capacity for the number /// of columns in the [`DataFrame`] pub fn get_row_amortized<'a>(&'a self, idx: usize, row: &mut Row<'a>) -> PolarsResult<()> { - for (s, any_val) in self.columns.iter().zip(&mut row.0) { + for (s, any_val) in self.materialized_column_iter().zip(&mut row.0) { *any_val = s.get(idx)?; } Ok(()) @@ -29,8 +28,7 @@ impl DataFrame { /// Does not do any bounds checking. #[inline] pub unsafe fn get_row_amortized_unchecked<'a>(&'a self, idx: usize, row: &mut Row<'a>) { - self.columns - .iter() + self.materialized_column_iter() .zip(&mut row.0) .for_each(|(s, any_val)| { *any_val = s.get_unchecked(idx); @@ -75,14 +73,14 @@ impl DataFrame { .into_iter() .zip(schema.iter_names()) .map(|(b, name)| { - let mut s = b.into_series(); + let mut c = b.into_series().into_column(); // if the schema adds a column not in the rows, we // fill it with nulls - if s.is_empty() { - Series::full_null(name.clone(), expected_len, s.dtype()) + if c.is_empty() { + Column::full_null(name.clone(), expected_len, c.dtype()) } else { - s.rename(name.clone()); - s + c.rename(name.clone()); + c } }) .collect(); @@ -117,14 +115,14 @@ impl DataFrame { .into_iter() .zip(schema.iter_names()) .map(|(b, name)| { - let mut s = b.into_series(); + let mut c = b.into_series().into_column(); // if the schema adds a column not in the rows, we // fill it with nulls - if s.is_empty() { - Series::full_null(name.clone(), expected_len, s.dtype()) + if c.is_empty() { + Column::full_null(name.clone(), expected_len, c.dtype()) } else { - s.rename(name.clone()); - s + c.rename(name.clone()); + c } }) .collect(); diff --git a/crates/polars-core/src/frame/row/mod.rs b/crates/polars-core/src/frame/row/mod.rs index 44e445b0874e..87904e6f98cb 100644 --- a/crates/polars-core/src/frame/row/mod.rs +++ b/crates/polars-core/src/frame/row/mod.rs @@ -68,7 +68,7 @@ impl DataFrame { let width = self.width(); let size = width * self.height(); let mut buf = vec![AnyValue::Null; size]; - for (col_i, s) in self.columns.iter().enumerate() { + for (col_i, s) in self.materialized_column_iter().enumerate() { match s.dtype() { #[cfg(feature = "object")] DataType::Object(_, _) => { diff --git a/crates/polars-core/src/frame/row/transpose.rs b/crates/polars-core/src/frame/row/transpose.rs index 1984a085116f..0f41bb2749d5 100644 --- a/crates/polars-core/src/frame/row/transpose.rs +++ b/crates/polars-core/src/frame/row/transpose.rs @@ -15,15 +15,15 @@ impl DataFrame { let new_height = self.width(); // Allocate space for the transposed columns, putting the "row names" first if needed let mut cols_t = match keep_names_as { - None => Vec::::with_capacity(new_width), + None => Vec::::with_capacity(new_width), Some(name) => { - let mut tmp = Vec::::with_capacity(new_width + 1); + let mut tmp = Vec::::with_capacity(new_width + 1); tmp.push( StringChunked::from_iter_values( name, self.get_column_names_owned().into_iter(), ) - .into(), + .into_column(), ); tmp }, @@ -60,8 +60,7 @@ impl DataFrame { .collect::>(); let columns = self - .columns - .iter() + .materialized_column_iter() // first cast to supertype before casting to physical to ensure units are correct .map(|s| s.cast(dtype).unwrap().cast(&phys_dtype).unwrap()) .collect::>(); @@ -81,7 +80,7 @@ impl DataFrame { // SAFETY: we are casting back to the supertype let mut s = unsafe { buf.into_series().cast_unchecked(dtype).unwrap() }; s.rename(name.clone()); - s + s.into() })); }, }; @@ -183,9 +182,9 @@ unsafe fn add_value( // This just fills a pre-allocated mutable series vector, which may have a name column. // Nothing is returned and the actual DataFrame is constructed above. pub(super) fn numeric_transpose( - cols: &[Series], + cols: &[Column], names_out: &[PlSmallStr], - cols_t: &mut Vec, + cols_t: &mut Vec, ) where T: PolarsNumericType, //S: AsRef, @@ -211,43 +210,46 @@ pub(super) fn numeric_transpose( let validity_buf_ptr = &mut validity_buf as *mut Vec> as usize; POOL.install(|| { - cols.iter().enumerate().for_each(|(row_idx, s)| { - let s = s.cast(&T::get_dtype()).unwrap(); - let ca = s.unpack::().unwrap(); + cols.iter() + .map(Column::as_materialized_series) + .enumerate() + .for_each(|(row_idx, s)| { + let s = s.cast(&T::get_dtype()).unwrap(); + let ca = s.unpack::().unwrap(); - // SAFETY: - // we access in parallel, but every access is unique, so we don't break aliasing rules - // we also ensured we allocated enough memory, so we never reallocate and thus - // the pointers remain valid. - if has_nulls { - for (col_idx, opt_v) in ca.iter().enumerate() { - match opt_v { - None => unsafe { - let column = (*(validity_buf_ptr as *mut Vec>)) + // SAFETY: + // we access in parallel, but every access is unique, so we don't break aliasing rules + // we also ensured we allocated enough memory, so we never reallocate and thus + // the pointers remain valid. + if has_nulls { + for (col_idx, opt_v) in ca.iter().enumerate() { + match opt_v { + None => unsafe { + let column = (*(validity_buf_ptr as *mut Vec>)) + .get_unchecked_mut(col_idx); + let el_ptr = column.as_mut_ptr(); + *el_ptr.add(row_idx) = false; + // we must initialize this memory otherwise downstream code + // might access uninitialized memory when the masked out values + // are changed. + add_value(values_buf_ptr, col_idx, row_idx, T::Native::default()); + }, + Some(v) => unsafe { + add_value(values_buf_ptr, col_idx, row_idx, v); + }, + } + } + } else { + for (col_idx, v) in ca.into_no_null_iter().enumerate() { + unsafe { + let column = (*(values_buf_ptr as *mut Vec>)) .get_unchecked_mut(col_idx); let el_ptr = column.as_mut_ptr(); - *el_ptr.add(row_idx) = false; - // we must initialize this memory otherwise downstream code - // might access uninitialized memory when the masked out values - // are changed. - add_value(values_buf_ptr, col_idx, row_idx, T::Native::default()); - }, - Some(v) => unsafe { - add_value(values_buf_ptr, col_idx, row_idx, v); - }, - } - } - } else { - for (col_idx, v) in ca.into_no_null_iter().enumerate() { - unsafe { - let column = (*(values_buf_ptr as *mut Vec>)) - .get_unchecked_mut(col_idx); - let el_ptr = column.as_mut_ptr(); - *el_ptr.add(row_idx) = v; + *el_ptr.add(row_idx) = v; + } } } - } - }) + }) }); let par_iter = values_buf @@ -277,7 +279,7 @@ pub(super) fn numeric_transpose( values.into(), validity, ); - ChunkedArray::with_chunk(name.clone(), arr).into_series() + ChunkedArray::with_chunk(name.clone(), arr).into_column() }); POOL.install(|| cols_t.par_extend(par_iter)); } diff --git a/crates/polars-core/src/frame/top_k.rs b/crates/polars-core/src/frame/top_k.rs index af3351d79fba..dd610a2383d4 100644 --- a/crates/polars-core/src/frame/top_k.rs +++ b/crates/polars-core/src/frame/top_k.rs @@ -5,7 +5,7 @@ impl DataFrame { pub(crate) fn bottom_k_impl( &self, k: usize, - by_column: Vec, + by_column: Vec, mut sort_options: SortMultipleOptions, ) -> PolarsResult { let first_descending = sort_options.descending[0]; diff --git a/crates/polars-core/src/frame/upstream_traits.rs b/crates/polars-core/src/frame/upstream_traits.rs index e2f28aefdb33..38b346ace652 100644 --- a/crates/polars-core/src/frame/upstream_traits.rs +++ b/crates/polars-core/src/frame/upstream_traits.rs @@ -7,13 +7,23 @@ impl FromIterator for DataFrame { /// /// Panics if Series have different lengths. fn from_iter>(iter: T) -> Self { + let v = iter.into_iter().map(Column::from).collect(); + DataFrame::new(v).expect("could not create DataFrame from iterator") + } +} + +impl FromIterator for DataFrame { + /// # Panics + /// + /// Panics if Column have different lengths. + fn from_iter>(iter: T) -> Self { let v = iter.into_iter().collect(); DataFrame::new(v).expect("could not create DataFrame from iterator") } } impl Index for DataFrame { - type Output = Series; + type Output = Column; fn index(&self, index: usize) -> &Self::Output { &self.columns[index] @@ -23,7 +33,7 @@ impl Index for DataFrame { macro_rules! impl_ranges { ($range_type:ty) => { impl Index<$range_type> for DataFrame { - type Output = [Series]; + type Output = [Column]; fn index(&self, index: $range_type) -> &Self::Output { &self.columns[index] @@ -41,7 +51,7 @@ impl_ranges!(RangeFull); // we don't implement Borrow or AsRef as upstream crates may add impl of trait for usize. impl Index<&str> for DataFrame { - type Output = Series; + type Output = Column; fn index(&self, index: &str) -> &Self::Output { let idx = self.check_name_to_idx(index).unwrap(); diff --git a/crates/polars-core/src/functions.rs b/crates/polars-core/src/functions.rs index 57cbee3a01dc..50ce5d14e491 100644 --- a/crates/polars-core/src/functions.rs +++ b/crates/polars-core/src/functions.rs @@ -35,7 +35,7 @@ pub fn concat_df_diagonal(dfs: &[DataFrame]) -> PolarsResult { for (name, dtype) in &schema { match df.column(name.as_str()).ok() { Some(s) => columns.push(s.clone()), - None => columns.push(Series::full_null(name.clone(), height, dtype)), + None => columns.push(Column::full_null(name.clone(), height, dtype)), } } unsafe { DataFrame::new_no_checks(columns) } diff --git a/crates/polars-core/src/hashing/vector_hasher.rs b/crates/polars-core/src/hashing/vector_hasher.rs index 277c1c009ba0..7dfb07c64d58 100644 --- a/crates/polars-core/src/hashing/vector_hasher.rs +++ b/crates/polars-core/src/hashing/vector_hasher.rs @@ -450,7 +450,7 @@ pub fn _df_rows_to_hashes_threaded_vertical( .map(|df| { let hb = hasher_builder.clone(); let mut hashes = vec![]; - series_to_hashes(df.get_columns(), Some(hb), &mut hashes)?; + columns_to_hashes(df.get_columns(), Some(hb), &mut hashes)?; Ok(UInt64Chunked::from_vec(PlSmallStr::EMPTY, hashes)) }) .collect::>>() @@ -458,8 +458,8 @@ pub fn _df_rows_to_hashes_threaded_vertical( Ok((hashes, hasher_builder)) } -pub(crate) fn series_to_hashes( - keys: &[Series], +pub(crate) fn columns_to_hashes( + keys: &[Column], build_hasher: Option, hashes: &mut Vec, ) -> PolarsResult { diff --git a/crates/polars-core/src/lib.rs b/crates/polars-core/src/lib.rs index 117f462619dc..a7e74b230410 100644 --- a/crates/polars-core/src/lib.rs +++ b/crates/polars-core/src/lib.rs @@ -69,3 +69,8 @@ pub static POOL: Lazy = Lazy::new(|| polars_utils::was // utility for the tests to ensure a single thread can execute pub static SINGLE_LOCK: Lazy> = Lazy::new(|| Mutex::new(())); + +/// Default length for a `.head()` call +pub(crate) const HEAD_DEFAULT_LENGTH: usize = 10; +/// Default length for a `.tail()` call +pub(crate) const TAIL_DEFAULT_LENGTH: usize = 10; diff --git a/crates/polars-core/src/prelude.rs b/crates/polars-core/src/prelude.rs index 996c9b83c5c5..bd1ade2d9b90 100644 --- a/crates/polars-core/src/prelude.rs +++ b/crates/polars-core/src/prelude.rs @@ -40,6 +40,7 @@ pub use crate::datatypes::{ArrayCollectIterExt, *}; pub use crate::error::{ polars_bail, polars_ensure, polars_err, polars_warn, PolarsError, PolarsResult, }; +pub use crate::frame::column::{Column, IntoColumn}; pub use crate::frame::explode::UnpivotArgsIR; #[cfg(feature = "algorithm_group_by")] pub(crate) use crate::frame::group_by::aggregations::*; diff --git a/crates/polars-core/src/scalar/from.rs b/crates/polars-core/src/scalar/from.rs new file mode 100644 index 000000000000..35345b2a6527 --- /dev/null +++ b/crates/polars-core/src/scalar/from.rs @@ -0,0 +1,27 @@ +use super::{AnyValue, DataType, Scalar}; + +macro_rules! impl_from { + ($(($t:ty, $av:ident, $dt:ident))+) => { + $( + impl From<$t> for Scalar { + #[inline] + fn from(v: $t) -> Self { + Self::new(DataType::$dt, AnyValue::$av(v)) + } + } + )+ + } +} + +impl_from! { + (i8, Int8, Int8) + (i16, Int16, Int16) + (i32, Int32, Int32) + (i64, Int64, Int64) + (u8, UInt8, UInt8) + (u16, UInt16, UInt16) + (u32, UInt32, UInt32) + (u64, UInt64, UInt64) + (f32, Float32, Float32) + (f64, Float64, Float64) +} diff --git a/crates/polars-core/src/scalar/mod.rs b/crates/polars-core/src/scalar/mod.rs index 3220e3468999..3e456837e534 100644 --- a/crates/polars-core/src/scalar/mod.rs +++ b/crates/polars-core/src/scalar/mod.rs @@ -1,3 +1,4 @@ +mod from; pub mod reduce; use polars_utils::pl_str::PlSmallStr; @@ -5,7 +6,7 @@ use polars_utils::pl_str::PlSmallStr; use serde::{Deserialize, Serialize}; use crate::datatypes::{AnyValue, DataType}; -use crate::prelude::Series; +use crate::prelude::{Column, Series}; #[derive(Clone, Debug, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -45,6 +46,11 @@ impl Scalar { Series::from_any_values_and_dtype(name, &[self.as_any_value()], &self.dtype, true).unwrap() } + /// Turn a scalar into a column with `length=1`. + pub fn into_column(self, name: PlSmallStr) -> Column { + Column::new_scalar(name, self, 1) + } + #[inline(always)] pub fn dtype(&self) -> &DataType { &self.dtype diff --git a/crates/polars-core/src/serde/df.rs b/crates/polars-core/src/serde/df.rs index 31d1934504f5..52d6a0ee6eae 100644 --- a/crates/polars-core/src/serde/df.rs +++ b/crates/polars-core/src/serde/df.rs @@ -2,7 +2,7 @@ use polars_error::PolarsError; use serde::de::Error; use serde::*; -use crate::prelude::{DataFrame, Series}; +use crate::prelude::{Column, DataFrame}; // utility to ensure we serde to a struct // { @@ -12,12 +12,12 @@ use crate::prelude::{DataFrame, Series}; // and is backwards compatible #[derive(Deserialize)] struct Util { - columns: Vec, + columns: Vec, } #[derive(Serialize)] struct UtilBorrowed<'a> { - columns: &'a [Series], + columns: &'a [Column], } impl<'de> Deserialize<'de> for DataFrame { diff --git a/crates/polars-core/src/serde/mod.rs b/crates/polars-core/src/serde/mod.rs index 86fbf5c52007..d355f959fd15 100644 --- a/crates/polars-core/src/serde/mod.rs +++ b/crates/polars-core/src/serde/mod.rs @@ -42,9 +42,9 @@ mod test { let s1 = Series::new("foo".into(), &[1, 2, 3]); let s2 = Series::new("bar".into(), &[Some(true), None, Some(false)]); let s3 = Series::new("string".into(), &["mouse", "elephant", "dog"]); - let s_list = Series::new("list".into(), &[s1.clone(), s1.clone(), s1.clone()]); + let s_list = Column::new("list".into(), &[s1.clone(), s1.clone(), s1.clone()]); - DataFrame::new(vec![s1, s2, s3, s_list]).unwrap() + DataFrame::new(vec![s1.into(), s2.into(), s3.into(), s_list]).unwrap() } #[test] @@ -89,7 +89,7 @@ mod test { #[test] fn test_serde_binary_series_owned_bincode() { - let s1 = Series::new( + let s1 = Column::new( "foo".into(), &[ vec![1u8, 2u8, 3u8], @@ -142,7 +142,7 @@ mod test { let s = Series::from_any_values_and_dtype("item".into(), &[row_1, row_2, row_3], &dtype, false) .unwrap(); - let df = DataFrame::new(vec![s]).unwrap(); + let df = DataFrame::new(vec![s.into()]).unwrap(); let df_str = serde_json::to_string(&df).unwrap(); let out = serde_json::from_str::(&df_str).unwrap(); diff --git a/crates/polars-core/src/serde/series.rs b/crates/polars-core/src/serde/series.rs index 3506a0e9cc89..0ef07e702374 100644 --- a/crates/polars-core/src/serde/series.rs +++ b/crates/polars-core/src/serde/series.rs @@ -277,7 +277,7 @@ impl<'de> Deserialize<'de> for Series { #[cfg(feature = "dtype-struct")] DataType::Struct(_) => { let values: Vec = map.next_value()?; - let ca = StructChunked::from_series(name.clone(), &values).unwrap(); + let ca = StructChunked::from_series(name.clone(), values.iter()).unwrap(); let mut s = ca.into_series(); s.rename(name); Ok(s) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index aaa4bc753443..30fba0a9cb14 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -743,7 +743,7 @@ fn any_values_to_struct( series_fields.push(s) } - let mut out = StructChunked::from_series(PlSmallStr::EMPTY, &series_fields)?; + let mut out = StructChunked::from_series(PlSmallStr::EMPTY, series_fields.iter())?; if has_outer_validity { let mut validity = MutableBitmap::new(); validity.extend_constant(values.len(), true); diff --git a/crates/polars-core/src/series/implementations/binary.rs b/crates/polars-core/src/series/implementations/binary.rs index 8cdf326302d1..7c5af2b9ccc7 100644 --- a/crates/polars-core/src/series/implementations/binary.rs +++ b/crates/polars-core/src/series/implementations/binary.rs @@ -88,7 +88,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/binary_offset.rs b/crates/polars-core/src/series/implementations/binary_offset.rs index 9ff8cd6704d0..481b5c5bf47e 100644 --- a/crates/polars-core/src/series/implementations/binary_offset.rs +++ b/crates/polars-core/src/series/implementations/binary_offset.rs @@ -54,7 +54,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs index aae8a5837af8..30c78b95943d 100644 --- a/crates/polars-core/src/series/implementations/boolean.rs +++ b/crates/polars-core/src/series/implementations/boolean.rs @@ -91,7 +91,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/categorical.rs b/crates/polars-core/src/series/implementations/categorical.rs index 497ff5267d88..4e7b6efe04a2 100644 --- a/crates/polars-core/src/series/implementations/categorical.rs +++ b/crates/polars-core/src/series/implementations/categorical.rs @@ -117,7 +117,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/date.rs b/crates/polars-core/src/series/implementations/date.rs index 834449e73992..479478a94530 100644 --- a/crates/polars-core/src/series/implementations/date.rs +++ b/crates/polars-core/src/series/implementations/date.rs @@ -132,7 +132,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.deref().arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/datetime.rs b/crates/polars-core/src/series/implementations/datetime.rs index a6a5f111d541..b91df29a0a38 100644 --- a/crates/polars-core/src/series/implementations/datetime.rs +++ b/crates/polars-core/src/series/implementations/datetime.rs @@ -130,7 +130,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.deref().arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/duration.rs b/crates/polars-core/src/series/implementations/duration.rs index 73d2e4f730fb..13b121aee0ca 100644 --- a/crates/polars-core/src/series/implementations/duration.rs +++ b/crates/polars-core/src/series/implementations/duration.rs @@ -244,7 +244,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.deref().arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs index cc52d73cdc60..de349c2a22f5 100644 --- a/crates/polars-core/src/series/implementations/floats.rs +++ b/crates/polars-core/src/series/implementations/floats.rs @@ -148,7 +148,7 @@ macro_rules! impl_dyn_series { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs index 3e4e41395b0b..4116df5a42fa 100644 --- a/crates/polars-core/src/series/implementations/mod.rs +++ b/crates/polars-core/src/series/implementations/mod.rs @@ -221,7 +221,7 @@ macro_rules! impl_dyn_series { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/string.rs b/crates/polars-core/src/series/implementations/string.rs index c8d85825e84b..8b64afcd9895 100644 --- a/crates/polars-core/src/series/implementations/string.rs +++ b/crates/polars-core/src/series/implementations/string.rs @@ -87,7 +87,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/implementations/time.rs b/crates/polars-core/src/series/implementations/time.rs index 3808f7d977af..ed810d34b3f4 100644 --- a/crates/polars-core/src/series/implementations/time.rs +++ b/crates/polars-core/src/series/implementations/time.rs @@ -107,7 +107,7 @@ impl private::PrivateSeries for SeriesWrap { fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], options: &SortMultipleOptions, ) -> PolarsResult { self.0.deref().arg_sort_multiple(by, options) diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index a629a8fd1c5c..274b1e310282 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -1,6 +1,7 @@ //! Type agnostic columnar data structure. pub use crate::prelude::ChunkCompare; use crate::prelude::*; +use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH}; pub mod amortized_iter; mod any_value; @@ -255,7 +256,7 @@ impl Series { pub fn into_frame(self) -> DataFrame { // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names - unsafe { DataFrame::new_no_checks(vec![self]) } + unsafe { DataFrame::new_no_checks(vec![self.into()]) } } /// Rename series. @@ -463,6 +464,7 @@ impl Series { /// Cast from physical to logical types without any checks on the validity of the cast. /// /// # Safety + /// /// This can lead to invalid memory access in downstream code. pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { match self.dtype() { @@ -628,7 +630,8 @@ impl Series { .iter() .map(|s| s.to_physical_repr().into_owned()) .collect(); - let mut ca = StructChunked::from_series(self.name().clone(), &fields).unwrap(); + let mut ca = + StructChunked::from_series(self.name().clone(), fields.iter()).unwrap(); if arr.null_count() > 0 { ca.zip_outer_validity(arr); @@ -799,35 +802,18 @@ impl Series { // used for formatting pub fn str_value(&self, index: usize) -> PolarsResult> { - let out = match self.0.get(index)? { - AnyValue::String(s) => Cow::Borrowed(s), - AnyValue::Null => Cow::Borrowed("null"), - #[cfg(feature = "dtype-categorical")] - AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => { - if arr.is_null() { - Cow::Borrowed(rev.get(idx)) - } else { - unsafe { Cow::Borrowed(arr.deref_unchecked().value(idx as usize)) } - } - }, - av => Cow::Owned(format!("{av}")), - }; - Ok(out) + Ok(self.0.get(index)?.str_value()) } /// Get the head of the Series. pub fn head(&self, length: Option) -> Series { - match length { - Some(len) => self.slice(0, std::cmp::min(len, self.len())), - None => self.slice(0, std::cmp::min(10, self.len())), - } + let len = length.unwrap_or(HEAD_DEFAULT_LENGTH); + self.slice(0, std::cmp::min(len, self.len())) } /// Get the tail of the Series. pub fn tail(&self, length: Option) -> Series { - let len = match length { - Some(len) => std::cmp::min(len, self.len()), - None => std::cmp::min(10, self.len()), - }; + let len = length.unwrap_or(TAIL_DEFAULT_LENGTH); + let len = std::cmp::min(len, self.len()); self.slice(-(len as i64), len) } diff --git a/crates/polars-core/src/series/ops/null.rs b/crates/polars-core/src/series/ops/null.rs index ee33c309687e..edff23e5d31f 100644 --- a/crates/polars-core/src/series/ops/null.rs +++ b/crates/polars-core/src/series/ops/null.rs @@ -55,7 +55,7 @@ impl Series { .iter() .map(|fld| Series::full_null(fld.name().clone(), size, fld.dtype())) .collect::>(); - let ca = StructChunked::from_series(name, &fields).unwrap(); + let ca = StructChunked::from_series(name, fields.iter()).unwrap(); if !fields.is_empty() { ca.with_outer_validity(Some(Bitmap::new_zeroed(size))) diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index b5b60c5eff33..2804c5ce1840 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -105,10 +105,17 @@ pub(crate) mod private { ) -> PolarsResult<()> { polars_bail!(opq = vec_hash_combine, self._dtype()); } + + /// # Safety + /// + /// Does no bounds checks, groups must be correct. #[cfg(feature = "algorithm_group_by")] unsafe fn agg_min(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. #[cfg(feature = "algorithm_group_by")] unsafe fn agg_max(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) @@ -119,14 +126,23 @@ pub(crate) mod private { unsafe fn agg_sum(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. #[cfg(feature = "algorithm_group_by")] unsafe fn agg_std(&self, groups: &GroupsProxy, _ddof: u8) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. #[cfg(feature = "algorithm_group_by")] unsafe fn agg_var(&self, groups: &GroupsProxy, _ddof: u8) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. #[cfg(feature = "algorithm_group_by")] unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) @@ -163,7 +179,7 @@ pub(crate) mod private { #[allow(unused_variables)] fn arg_sort_multiple( &self, - by: &[Series], + by: &[Column], _options: &SortMultipleOptions, ) -> PolarsResult { polars_bail!(opq = arg_sort_multiple, self._dtype()); diff --git a/crates/polars-core/src/testing.rs b/crates/polars-core/src/testing.rs index bf056b5f7769..f227f2bfe861 100644 --- a/crates/polars-core/src/testing.rs +++ b/crates/polars-core/src/testing.rs @@ -199,8 +199,8 @@ mod test { #[test] fn test_df_equal() { - let a = Series::new("a".into(), [1, 2, 3].as_ref()); - let b = Series::new("b".into(), [1, 2, 3].as_ref()); + let a = Column::new("a".into(), [1, 2, 3].as_ref()); + let b = Column::new("b".into(), [1, 2, 3].as_ref()); let df1 = DataFrame::new(vec![a, b]).unwrap(); assert!(df1.equals(&df1)) diff --git a/crates/polars-core/src/tests.rs b/crates/polars-core/src/tests.rs index e8a8111225b7..b1c042e80a4b 100644 --- a/crates/polars-core/src/tests.rs +++ b/crates/polars-core/src/tests.rs @@ -4,9 +4,9 @@ use crate::prelude::*; fn test_initial_empty_sort() -> PolarsResult<()> { // https://github.com/pola-rs/polars/issues/1396 let data = vec![1.3; 42]; - let mut series = Series::new("data".into(), Vec::::new()); - let series2 = Series::new("data2".into(), data.clone()); - let series3 = Series::new("data3".into(), data); + let mut series = Column::new("data".into(), Vec::::new()); + let series2 = Column::new("data2".into(), data.clone()); + let series3 = Column::new("data3".into(), data); let df = DataFrame::new(vec![series2, series3])?; for column in df.get_columns().iter() { diff --git a/crates/polars-core/src/utils/flatten.rs b/crates/polars-core/src/utils/flatten.rs index 52b1c69ea6d9..b96ce61dab82 100644 --- a/crates/polars-core/src/utils/flatten.rs +++ b/crates/polars-core/src/utils/flatten.rs @@ -15,7 +15,7 @@ pub fn flatten_df_iter(df: &DataFrame) -> impl Iterator + '_ { Series::from_chunks_and_dtype_unchecked(s.name().clone(), vec![arr], s.dtype()) }; out.set_sorted_flag(s.is_sorted_flag()); - out + Column::from(out) }) .collect(); let df = unsafe { DataFrame::new_no_checks(columns) }; diff --git a/crates/polars-core/src/utils/mod.rs b/crates/polars-core/src/utils/mod.rs index a516626e1abb..4773a9dd0d20 100644 --- a/crates/polars-core/src/utils/mod.rs +++ b/crates/polars-core/src/utils/mod.rs @@ -141,7 +141,8 @@ impl Container for DataFrame { } fn chunk_lengths(&self) -> impl Iterator { - self.get_columns()[0].chunk_lengths() + // @scalar-correctness? + self.columns[0].as_materialized_series().chunk_lengths() } } @@ -684,7 +685,7 @@ macro_rules! apply_method_physical_numeric { macro_rules! df { ($($col_name:expr => $slice:expr), + $(,)?) => { $crate::prelude::DataFrame::new(vec![ - $(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice),)+ + $($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+ ]) } } @@ -1135,10 +1136,10 @@ pub fn coalesce_nulls<'a, T: PolarsDataType>( } } -pub fn coalesce_nulls_series(a: &Series, b: &Series) -> (Series, Series) { +pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) { if a.null_count() > 0 || b.null_count() > 0 { - let mut a = a.rechunk(); - let mut b = b.rechunk(); + let mut a = a.as_materialized_series().rechunk(); + let mut b = b.as_materialized_series().rechunk(); for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } { let validity = match (arr_a.validity(), arr_b.validity()) { (None, Some(b)) => Some(b.clone()), @@ -1151,7 +1152,7 @@ pub fn coalesce_nulls_series(a: &Series, b: &Series) -> (Series, Series) { } a.compute_len(); b.compute_len(); - (a, b) + (a.into(), b.into()) } else { (a.clone(), b.clone()) } diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 297c77b19e00..cdac9a46610a 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -502,9 +502,11 @@ impl PartitionedAggregation for AggregationExpr { }; let mut count_s = series.agg_valid_count(groups); count_s.rename(PlSmallStr::from_static("__POLARS_COUNT")); - Ok(StructChunked::from_series(new_name, &[agg_s, count_s]) - .unwrap() - .into_series()) + Ok( + StructChunked::from_series(new_name, [agg_s, count_s].iter()) + .unwrap() + .into_series(), + ) } }, GroupByMethod::Implode => { diff --git a/crates/polars-expr/src/expressions/apply.rs b/crates/polars-expr/src/expressions/apply.rs index a5ea16d0f22f..0eeb8555071b 100644 --- a/crates/polars-expr/src/expressions/apply.rs +++ b/crates/polars-expr/src/expressions/apply.rs @@ -15,7 +15,7 @@ use crate::expressions::{ pub struct ApplyExpr { inputs: Vec>, - function: SpecialEq>, + function: SpecialEq>, expr: Expr, collect_groups: ApplyOptions, function_returns_scalar: bool, @@ -33,7 +33,7 @@ impl ApplyExpr { #[allow(clippy::too_many_arguments)] pub(crate) fn new( inputs: Vec>, - function: SpecialEq>, + function: SpecialEq>, expr: Expr, options: FunctionOptions, allow_threading: bool, @@ -67,7 +67,7 @@ impl ApplyExpr { pub(crate) fn new_minimal( inputs: Vec>, - function: SpecialEq>, + function: SpecialEq>, expr: Expr, collect_groups: ApplyOptions, ) -> Self { @@ -129,13 +129,13 @@ impl ApplyExpr { } } - /// Evaluates and flattens `Option` to `Series`. - fn eval_and_flatten(&self, inputs: &mut [Series]) -> PolarsResult { + /// Evaluates and flattens `Option` to `Column`. + fn eval_and_flatten(&self, inputs: &mut [Column]) -> PolarsResult { if let Some(out) = self.function.call_udf(inputs)? { Ok(out) } else { let field = self.to_field(self.input_schema.as_ref().unwrap()).unwrap(); - Ok(Series::full_null(field.name().clone(), 1, field.dtype())) + Ok(Column::full_null(field.name().clone(), 1, field.dtype())) } } fn apply_single_group_aware<'a>( @@ -157,10 +157,10 @@ impl ApplyExpr { // Create input for the function to determine the output dtype, see #3946. let agg = agg.list().unwrap(); let input_dtype = agg.inner_dtype(); - let input = Series::full_null(PlSmallStr::EMPTY, 0, input_dtype); + let input = Column::full_null(PlSmallStr::EMPTY, 0, input_dtype); let output = self.eval_and_flatten(&mut [input])?; - let ca = ListChunked::full(name, &output, 0); + let ca = ListChunked::full(name, output.as_materialized_series(), 0); return self.finish_apply_groups(ac, ca); } @@ -170,7 +170,10 @@ impl ApplyExpr { if self.pass_name_to_apply { s.rename(name.clone()); } - self.function.call_udf(&mut [s]) + Ok(self + .function + .call_udf(&mut [Column::from(s)])? + .map(|c| c.as_materialized_series().clone())) }, }; @@ -215,16 +218,27 @@ impl ApplyExpr { let (s, aggregated) = match ac.agg_state() { AggState::AggregatedList(s) => { let ca = s.list().unwrap(); - let out = ca.apply_to_inner(&|s| self.eval_and_flatten(&mut [s]))?; + let out = ca.apply_to_inner(&|s| { + self.eval_and_flatten(&mut [s.into()]) + .map(|c| c.as_materialized_series().clone()) + })?; (out.into_series(), true) }, AggState::NotAggregated(s) => { - let (out, aggregated) = (self.eval_and_flatten(&mut [s.clone()])?, false); + let (out, aggregated) = ( + self.eval_and_flatten(&mut [s.clone().into()])? + .as_materialized_series() + .clone(), + false, + ); check_map_output_len(s.len(), out.len(), &self.expr)?; (out, aggregated) }, agg_state => { - ac.with_agg_state(agg_state.try_map(|s| self.eval_and_flatten(&mut [s.clone()]))?); + ac.with_agg_state(agg_state.try_map(|s| { + self.eval_and_flatten(&mut [s.clone().into()]) + .map(|c| c.as_materialized_series().clone()) + })?); return Ok(ac); }, }; @@ -282,10 +296,12 @@ impl ApplyExpr { for iter in &mut iters { match iter.next().unwrap() { None => return Ok(None), - Some(s) => container.push(s.deep_clone()), + Some(s) => container.push(s.deep_clone().into()), } } - self.function.call_udf(&mut container) + self.function + .call_udf(&mut container) + .map(|r| r.map(|c| c.as_materialized_series().clone())) }) .collect::>()? .with_name(field.name.clone()); @@ -326,17 +342,27 @@ impl PhysicalExpr for ApplyExpr { self.inputs .par_iter() .map(f) + .map(|v| v.map(Column::from)) .collect::>>() }) } else { - self.inputs.iter().map(f).collect::>>() + self.inputs + .iter() + .map(f) + .map(|v| v.map(Column::from)) + .collect::>>() }?; if self.allow_rename { self.eval_and_flatten(&mut inputs) + .map(|c| c.as_materialized_series().clone()) } else { let in_name = inputs[0].name().clone(); - Ok(self.eval_and_flatten(&mut inputs)?.with_name(in_name)) + Ok(self + .eval_and_flatten(&mut inputs)? + .as_materialized_series() + .clone() + .with_name(in_name)) } } @@ -357,7 +383,10 @@ impl PhysicalExpr for ApplyExpr { match self.collect_groups { ApplyOptions::ApplyList => { - let s = self.eval_and_flatten(&mut [ac.aggregated()])?; + let s = self + .eval_and_flatten(&mut [ac.aggregated().into()])? + .as_materialized_series() + .clone(); ac.with_series(s, true, Some(&self.expr))?; Ok(ac) }, @@ -369,8 +398,14 @@ impl PhysicalExpr for ApplyExpr { match self.collect_groups { ApplyOptions::ApplyList => { - let mut s = acs.iter_mut().map(|ac| ac.aggregated()).collect::>(); - let s = self.eval_and_flatten(&mut s)?; + let mut s = acs + .iter_mut() + .map(|ac| ac.aggregated().into()) + .collect::>(); + let s = self + .eval_and_flatten(&mut s)? + .as_materialized_series() + .clone(); // take the first aggregation context that as that is the input series let mut ac = acs.swap_remove(0); ac.with_update_groups(UpdateGroups::WithGroupsLen); @@ -438,7 +473,7 @@ impl PhysicalExpr for ApplyExpr { fn apply_multiple_elementwise<'a>( mut acs: Vec>, - function: &dyn SeriesUdf, + function: &dyn ColumnsUdf, expr: &Expr, check_lengths: bool, ) -> PolarsResult> { @@ -450,14 +485,18 @@ fn apply_multiple_elementwise<'a>( let other = acs[1..] .iter() - .map(|ac| ac.flat_naive().into_owned()) + .map(|ac| ac.flat_naive().into_owned().into()) .collect::>(); let out = ca.apply_to_inner(&|s| { let mut args = Vec::with_capacity(other.len() + 1); - args.push(s); + args.push(s.into()); args.extend_from_slice(&other); - Ok(function.call_udf(&mut args)?.unwrap()) + Ok(function + .call_udf(&mut args)? + .unwrap() + .as_materialized_series() + .clone()) })?; let mut ac = acs.swap_remove(0); ac.with_series(out.into_series(), true, None)?; @@ -479,10 +518,15 @@ fn apply_multiple_elementwise<'a>( ac.flat_naive().into_owned() }) + .map(Column::from) .collect::>(); let input_len = s[0].len(); - let s = function.call_udf(&mut s)?.unwrap(); + let s = function + .call_udf(&mut s)? + .unwrap() + .as_materialized_series() + .clone(); if check_lengths { check_map_output_len(input_len, s.len(), expr)?; } @@ -661,13 +705,18 @@ impl PartitionedAggregation for ApplyExpr { state: &ExecutionState, ) -> PolarsResult { let a = self.inputs[0].as_partitioned_aggregator().unwrap(); - let s = a.evaluate_partitioned(df, groups, state)?; + let s = a.evaluate_partitioned(df, groups, state)?.into(); if self.allow_rename { self.eval_and_flatten(&mut [s]) + .map(|c| c.as_materialized_series().clone()) } else { let in_name = s.name().clone(); - Ok(self.eval_and_flatten(&mut [s])?.with_name(in_name)) + Ok(self + .eval_and_flatten(&mut [s])? + .as_materialized_series() + .clone() + .with_name(in_name)) } } diff --git a/crates/polars-expr/src/expressions/column.rs b/crates/polars-expr/src/expressions/column.rs index 74a20dcdb0ba..6bac214f140c 100644 --- a/crates/polars-expr/src/expressions/column.rs +++ b/crates/polars-expr/src/expressions/column.rs @@ -33,7 +33,7 @@ impl ColumnExpr { for df in state.ext_contexts.as_ref() { let out = df.column(&self.name); if out.is_ok() { - return out.cloned(); + return out.map(Column::as_materialized_series).cloned(); } } Err(e) @@ -75,7 +75,9 @@ impl ColumnExpr { // in release we fallback to linear search #[allow(unreachable_code)] { - df.column(&self.name).cloned() + df.column(&self.name) + .map(Column::as_materialized_series) + .cloned() } } else { Ok(out.clone()) @@ -98,7 +100,9 @@ impl ColumnExpr { } // in release we fallback to linear search #[allow(unreachable_code)] - df.column(&self.name).cloned() + df.column(&self.name) + .map(Column::as_materialized_series) + .cloned() } fn process_from_state_schema( @@ -110,7 +114,9 @@ impl ColumnExpr { match schema.get_full(&self.name) { None => self.process_by_linear_search(df, state, true), Some((idx, _, _)) => match df.get_columns().get(idx) { - Some(out) => self.process_by_idx(out, state, schema, df, false), + Some(out) => { + self.process_by_idx(out.as_materialized_series(), state, schema, df, false) + }, None => self.process_by_linear_search(df, state, true), }, } @@ -125,6 +131,7 @@ impl ColumnExpr { .iter() .find(|s| s.name() == &self.name) .unwrap() + .as_materialized_series() .clone()) } } @@ -142,7 +149,13 @@ impl PhysicalExpr for ColumnExpr { // check if the schema was correct // if not do O(n) search match df.get_columns().get(idx) { - Some(out) => self.process_by_idx(out, state, schema, df, true), + Some(out) => self.process_by_idx( + out.as_materialized_series(), + state, + schema, + df, + true, + ), None => { // partitioned group_by special case if let Some(schema) = state.get_schema() { diff --git a/crates/polars-expr/src/expressions/sortby.rs b/crates/polars-expr/src/expressions/sortby.rs index 71825c971329..00ace093856e 100644 --- a/crates/polars-expr/src/expressions/sortby.rs +++ b/crates/polars-expr/src/expressions/sortby.rs @@ -152,6 +152,7 @@ fn sort_by_groups_multiple_by( let groups = sort_by_s .iter() .map(|s| unsafe { s.take_slice_unchecked(idx) }) + .map(Column::from) .collect::>(); let options = SortMultipleOptions { @@ -161,13 +162,17 @@ fn sort_by_groups_multiple_by( maintain_order, }; - let sorted_idx = groups[0].arg_sort_multiple(&groups[1..], &options).unwrap(); + let sorted_idx = groups[0] + .as_materialized_series() + .arg_sort_multiple(&groups[1..], &options) + .unwrap(); map_sorted_indices_to_group_idx(&sorted_idx, idx) }, GroupsIndicator::Slice([first, len]) => { let groups = sort_by_s .iter() .map(|s| s.slice(first as i64, len as usize)) + .map(Column::from) .collect::>(); let options = SortMultipleOptions { @@ -176,7 +181,10 @@ fn sort_by_groups_multiple_by( multithreaded, maintain_order, }; - let sorted_idx = groups[0].arg_sort_multiple(&groups[1..], &options).unwrap(); + let sorted_idx = groups[0] + .as_materialized_series() + .arg_sort_multiple(&groups[1..], &options) + .unwrap(); map_sorted_indices_to_group_slice(&sorted_idx, first) }, }; @@ -208,11 +216,13 @@ impl PhysicalExpr for SortByExpr { .by .iter() .map(|e| { - e.evaluate(df, state).map(|s| match s.dtype() { - #[cfg(feature = "dtype-categorical")] - DataType::Categorical(_, _) | DataType::Enum(_, _) => s, - _ => s.to_physical_repr().into_owned(), - }) + e.evaluate(df, state) + .map(|s| match s.dtype() { + #[cfg(feature = "dtype-categorical")] + DataType::Categorical(_, _) | DataType::Enum(_, _) => s, + _ => s.to_physical_repr().into_owned(), + }) + .map(Column::from) }) .collect::>>()?; @@ -231,7 +241,9 @@ impl PhysicalExpr for SortByExpr { ); } - s_sort_by[0].arg_sort_multiple(&s_sort_by[1..], &options) + s_sort_by[0] + .as_materialized_series() + .arg_sort_multiple(&s_sort_by[1..], &options) }; POOL.install(|| rayon::join(series_f, sorted_idx_f)) }; diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs index 47ea0847507c..5a455cf5932b 100644 --- a/crates/polars-expr/src/expressions/window.rs +++ b/crates/polars-expr/src/expressions/window.rs @@ -127,7 +127,7 @@ impl WindowExpr { out_column: Series, flattened: Series, mut ac: AggregationContext, - group_by_columns: &[Series], + group_by_columns: &[Column], gb: GroupBy, state: &ExecutionState, cache_key: &str, @@ -412,7 +412,7 @@ impl PhysicalExpr for WindowExpr { let group_by_columns = self .group_by .iter() - .map(|e| e.evaluate(df, state)) + .map(|e| e.evaluate(df, state).map(Column::from)) .collect::>>()?; // if the keys are sorted @@ -584,7 +584,12 @@ impl PhysicalExpr for WindowExpr { let right = &keys[0]; PolarsResult::Ok( group_by_columns[0] - .hash_join_left(right, JoinValidation::ManyToMany, true) + .as_materialized_series() + .hash_join_left( + right.as_materialized_series(), + JoinValidation::ManyToMany, + true, + ) .unwrap() .1, ) diff --git a/crates/polars-expr/src/planner.rs b/crates/polars-expr/src/planner.rs index e578b8da9679..3f942cf55d59 100644 --- a/crates/polars-expr/src/planner.rs +++ b/crates/polars-expr/src/planner.rs @@ -566,9 +566,9 @@ fn create_physical_expr_inner( }, Explode(expr) => { let input = create_physical_expr_inner(*expr, ctxt, expr_arena, schema, state)?; - let function = - SpecialEq::new(Arc::new(move |s: &mut [Series]| s[0].explode().map(Some)) - as Arc); + let function = SpecialEq::new(Arc::new( + move |c: &mut [polars_core::frame::column::Column]| c[0].explode().map(Some), + ) as Arc); Ok(Arc::new(ApplyExpr::new_minimal( vec![input], function, diff --git a/crates/polars-expr/src/state/node_timer.rs b/crates/polars-expr/src/state/node_timer.rs index 8102aa8fcf83..48aa65e12c17 100644 --- a/crates/polars-expr/src/state/node_timer.rs +++ b/crates/polars-expr/src/state/node_timer.rs @@ -42,7 +42,7 @@ impl NodeTimer { polars_ensure!(!ticks.is_empty(), ComputeError: "no data to time"); let start = ticks[0].0; ticks.push((self.query_start, start)); - let nodes_s = Series::new(PlSmallStr::from_static("node"), nodes); + let nodes_s = Column::new(PlSmallStr::from_static("node"), nodes); let start: NoNull = ticks .iter() .map(|(start, _)| (start.duration_since(self.query_start)).as_micros() as u64) @@ -57,7 +57,7 @@ impl NodeTimer { let mut end = end.into_inner(); end.rename(PlSmallStr::from_static("end")); - let columns = vec![nodes_s, start.into_series(), end.into_series()]; + let columns = vec![nodes_s, start.into_column(), end.into_column()]; let df = unsafe { DataFrame::new_no_checks(columns) }; df.sort(vec!["start"], SortMultipleOptions::default()) } diff --git a/crates/polars-ffi/src/version_0.rs b/crates/polars-ffi/src/version_0.rs index 0fc29055f66d..3cffd4425045 100644 --- a/crates/polars-ffi/src/version_0.rs +++ b/crates/polars-ffi/src/version_0.rs @@ -1,4 +1,4 @@ -use polars_core::prelude::CompatLevel; +use polars_core::prelude::{Column, CompatLevel}; use super::*; @@ -53,6 +53,10 @@ unsafe extern "C" fn c_release_series_export(e: *mut SeriesExport) { e.release = None; } +pub fn export_column(c: &Column) -> SeriesExport { + export_series(c.as_materialized_series()) +} + pub fn export_series(s: &Series) -> SeriesExport { let field = ArrowField::new( s.name().clone(), diff --git a/crates/polars-io/src/csv/read/read_impl.rs b/crates/polars-io/src/csv/read/read_impl.rs index 048df2dc2fb2..bfe4e45fd286 100644 --- a/crates/polars-io/src/csv/read/read_impl.rs +++ b/crates/polars-io/src/csv/read/read_impl.rs @@ -34,22 +34,22 @@ pub(crate) fn cast_columns( parallel: bool, ignore_errors: bool, ) -> PolarsResult<()> { - let cast_fn = |s: &Series, fld: &Field| { - let out = match (s.dtype(), fld.dtype()) { + let cast_fn = |c: &Column, fld: &Field| { + let out = match (c.dtype(), fld.dtype()) { #[cfg(feature = "temporal")] - (DataType::String, DataType::Date) => s + (DataType::String, DataType::Date) => c .str() .unwrap() .as_date(None, false) - .map(|ca| ca.into_series()), + .map(|ca| ca.into_column()), #[cfg(feature = "temporal")] - (DataType::String, DataType::Time) => s + (DataType::String, DataType::Time) => c .str() .unwrap() .as_time(None, false) - .map(|ca| ca.into_series()), + .map(|ca| ca.into_column()), #[cfg(feature = "temporal")] - (DataType::String, DataType::Datetime(tu, _)) => s + (DataType::String, DataType::Datetime(tu, _)) => c .str() .unwrap() .as_datetime( @@ -60,11 +60,11 @@ pub(crate) fn cast_columns( None, &StringChunked::from_iter(std::iter::once("raise")), ) - .map(|ca| ca.into_series()), - (_, dt) => s.cast(dt), + .map(|ca| ca.into_column()), + (_, dt) => c.cast(dt), }?; - if !ignore_errors && s.null_count() != out.null_count() { - handle_casting_failures(s, &out)?; + if !ignore_errors && c.null_count() != out.null_count() { + handle_casting_failures(c.as_materialized_series(), out.as_materialized_series())?; } Ok(out) }; @@ -554,7 +554,7 @@ impl<'a> CoreReader<'a> { let columns = buffers .into_iter() - .map(|buf| buf.into_series()) + .map(|buf| buf.into_series().map(Column::from)) .collect::>()?; let mut local_df = unsafe { DataFrame::new_no_checks(columns) }; let current_row_count = local_df.height() as IdxSize; @@ -659,7 +659,7 @@ impl<'a> CoreReader<'a> { let columns = buffers .into_iter() - .map(|buf| buf.into_series()) + .map(|buf| buf.into_series().map(Column::from)) .collect::>()?; unsafe { DataFrame::new_no_checks(columns) } }; @@ -766,7 +766,7 @@ fn read_chunk( let columns = buffers .into_iter() - .map(|buf| buf.into_series()) + .map(|buf| buf.into_series().map(Column::from)) .collect::>()?; Ok(unsafe { DataFrame::new_no_checks(columns) }) } diff --git a/crates/polars-io/src/csv/read/reader.rs b/crates/polars-io/src/csv/read/reader.rs index 49fb576fff8a..857b223bae0b 100644 --- a/crates/polars-io/src/csv/read/reader.rs +++ b/crates/polars-io/src/csv/read/reader.rs @@ -325,22 +325,22 @@ fn parse_dates(mut df: DataFrame, fixed_schema: &Schema) -> DataFrame { let cols = unsafe { std::mem::take(df.get_columns_mut()) } .into_par_iter() - .map(|s| { - match s.dtype() { + .map(|c| { + match c.dtype() { DataType::String => { - let ca = s.str().unwrap(); + let ca = c.str().unwrap(); // don't change columns that are in the fixed schema. - if fixed_schema.index_of(s.name()).is_some() { - return s; + if fixed_schema.index_of(c.name()).is_some() { + return c; } #[cfg(feature = "dtype-time")] if let Ok(ca) = ca.as_time(None, false) { - return ca.into_series(); + return ca.into_column(); } - s + c }, - _ => s, + _ => c, } }); let cols = POOL.install(|| cols.collect::>()); diff --git a/crates/polars-io/src/csv/write/write_impl.rs b/crates/polars-io/src/csv/write/write_impl.rs index a3f72b56161f..faeb8d0e449d 100644 --- a/crates/polars-io/src/csv/write/write_impl.rs +++ b/crates/polars-io/src/csv/write/write_impl.rs @@ -140,7 +140,7 @@ pub(crate) fn write( // the bck thinks the lifetime is bounded to write_buffer_pool, but at the time we return // the vectors the buffer pool, the series have already been removed from the buffers // in other words, the lifetime does not leave this scope - let cols = unsafe { std::mem::transmute::<&[Series], &[Series]>(cols) }; + let cols = unsafe { std::mem::transmute::<&[Column], &[Column]>(cols) }; let mut write_buffer = write_buffer_pool.get(); if df.is_empty() { @@ -154,7 +154,7 @@ pub(crate) fn write( .enumerate() .map(|(i, col)| { serializer_for( - &*col.chunks()[0], + &*col.as_materialized_series().chunks()[0], options, col.dtype(), datetime_formats[i], @@ -165,7 +165,7 @@ pub(crate) fn write( } else { debug_assert_eq!(serializers_vec.len(), cols.len()); for (col_iter, col) in std::iter::zip(&mut serializers_vec, cols) { - col_iter.update_array(&*col.chunks()[0]); + col_iter.update_array(&*col.as_materialized_series().chunks()[0]); } } diff --git a/crates/polars-io/src/hive.rs b/crates/polars-io/src/hive.rs index 17ace26d6be7..77e65647fa56 100644 --- a/crates/polars-io/src/hive.rs +++ b/crates/polars-io/src/hive.rs @@ -22,7 +22,9 @@ pub(crate) fn materialize_hive_partitions( return; } - let hive_columns_iter = hive_columns.iter().map(|s| s.new_from_index(0, num_rows)); + let hive_columns_iter = hive_columns + .iter() + .map(|s| s.new_from_index(0, num_rows).into()); if reader_schema.index_of(hive_columns[0].name()).is_none() || df.width() == 0 { // Fast-path - all hive columns are at the end diff --git a/crates/polars-io/src/ipc/ipc_file.rs b/crates/polars-io/src/ipc/ipc_file.rs index feaea44f5417..64598ca8c848 100644 --- a/crates/polars-io/src/ipc/ipc_file.rs +++ b/crates/polars-io/src/ipc/ipc_file.rs @@ -12,8 +12,8 @@ //! use std::io::Cursor; //! //! -//! let s0 = Series::new("days".into(), &[0, 1, 2, 3, 4]); -//! let s1 = Series::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); +//! let s0 = Column::new("days".into(), &[0, 1, 2, 3, 4]); +//! let s1 = Column::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); //! let mut df = DataFrame::new(vec![s0, s1]).unwrap(); //! //! // Create an in memory file handler. diff --git a/crates/polars-io/src/ipc/ipc_stream.rs b/crates/polars-io/src/ipc/ipc_stream.rs index 545f19168f9f..6b16579ac93d 100644 --- a/crates/polars-io/src/ipc/ipc_stream.rs +++ b/crates/polars-io/src/ipc/ipc_stream.rs @@ -13,9 +13,9 @@ //! use std::io::Cursor; //! //! -//! let s0 = Series::new("days".into(), &[0, 1, 2, 3, 4]); -//! let s1 = Series::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); -//! let mut df = DataFrame::new(vec![s0, s1]).unwrap(); +//! let c0 = Column::new("days".into(), &[0, 1, 2, 3, 4]); +//! let c1 = Column::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); +//! let mut df = DataFrame::new(vec![c0, c1]).unwrap(); //! //! // Create an in memory file handler. //! // Vec: Read + Write diff --git a/crates/polars-io/src/ndjson/core.rs b/crates/polars-io/src/ndjson/core.rs index c3754f9403d1..a72b4ccf7038 100644 --- a/crates/polars-io/src/ndjson/core.rs +++ b/crates/polars-io/src/ndjson/core.rs @@ -309,7 +309,7 @@ impl<'a> CoreJsonReader<'a> { let mut local_df = DataFrame::new( buffers .into_values() - .map(|buf| buf.into_series()) + .map(|buf| buf.into_series().into_column()) .collect::<_>(), )?; diff --git a/crates/polars-io/src/parquet/read/read_impl.rs b/crates/polars-io/src/parquet/read/read_impl.rs index 63fb51464038..693dcc1ea31c 100644 --- a/crates/polars-io/src/parquet/read/read_impl.rs +++ b/crates/polars-io/src/parquet/read/read_impl.rs @@ -156,12 +156,14 @@ fn rg_to_dfs( if let Some(row_index) = row_index { let placeholder = NullChunkedBuilder::new(PlSmallStr::from_static("__PL_TMP"), slice.1).finish(); - return Ok(vec![DataFrame::new(vec![placeholder.into_series()])? - .with_row_index( - row_index.name.clone(), - Some(row_index.offset + IdxSize::try_from(slice.0).unwrap()), - )? - .select(std::iter::once(row_index.name))?]); + return Ok(vec![DataFrame::new(vec![placeholder + .into_series() + .into_column()])? + .with_row_index( + row_index.name.clone(), + Some(row_index.offset + IdxSize::try_from(slice.0).unwrap()), + )? + .select(std::iter::once(row_index.name))?]); } } @@ -322,6 +324,7 @@ fn rg_to_dfs_prefiltered( .collect::>(); column_idx_to_series(col_idx, field_md.as_slice(), None, schema, store) + .map(Column::from) }) .collect::>>()?; @@ -428,9 +431,9 @@ fn rg_to_dfs_prefiltered( debug_assert_eq!(array.len(), filter_mask.set_bits()); - Ok(array) + Ok(array.into_column()) }) - .collect::>>()?; + .collect::>>()?; let mut rearranged_schema = df.schema(); rearranged_schema.merge(Schema::from_arrow_schema(schema.as_ref())); @@ -516,6 +519,7 @@ fn rg_to_dfs_optionally_par_over_columns( schema, store, ) + .map(Column::from) }) .collect::>>() })? @@ -533,6 +537,7 @@ fn rg_to_dfs_optionally_par_over_columns( schema, store, ) + .map(Column::from) }) .collect::>>()? }; @@ -633,6 +638,7 @@ fn rg_to_dfs_par_over_rg( schema, store, ) + .map(Column::from) }) .collect::>>()?; diff --git a/crates/polars-io/src/shared.rs b/crates/polars-io/src/shared.rs index 7fbb5eb96e7f..4babd4f65bd5 100644 --- a/crates/polars-io/src/shared.rs +++ b/crates/polars-io/src/shared.rs @@ -98,7 +98,10 @@ pub(crate) fn finish_reader( // Create an empty dataframe with the correct data types let empty_cols = arrow_schema .iter_values() - .map(|fld| Series::try_from((fld.name.clone(), new_empty_array(fld.dtype.clone())))) + .map(|fld| { + Series::try_from((fld.name.clone(), new_empty_array(fld.dtype.clone()))) + .map(Column::from) + }) .collect::>()?; DataFrame::new(empty_cols)? } else { diff --git a/crates/polars-io/src/utils/other.rs b/crates/polars-io/src/utils/other.rs index 8999ecb657d4..45300d80d319 100644 --- a/crates/polars-io/src/utils/other.rs +++ b/crates/polars-io/src/utils/other.rs @@ -87,7 +87,7 @@ pub(crate) fn update_row_counts(dfs: &mut [(DataFrame, IdxSize)], offset: IdxSiz let mut previous = dfs[0].1 + offset; for (df, n_read) in &mut dfs[1..] { if let Some(s) = unsafe { df.get_columns_mut() }.get_mut(0) { - *s = &*s + previous; + *s = (&*s + previous).unwrap(); } previous += *n_read; } @@ -103,7 +103,7 @@ pub(crate) fn update_row_counts2(dfs: &mut [DataFrame], offset: IdxSize) { for df in &mut dfs[1..] { let n_read = df.height() as IdxSize; if let Some(s) = unsafe { df.get_columns_mut() }.get_mut(0) { - *s = &*s + previous; + *s = (&*s + previous).unwrap(); } previous += n_read; } @@ -122,7 +122,7 @@ pub(crate) fn update_row_counts3(dfs: &mut [DataFrame], heights: &[IdxSize], off let n_read = heights[i]; if let Some(s) = unsafe { df.get_columns_mut() }.get_mut(0) { - *s = &*s + previous; + *s = (&*s + previous).unwrap(); } previous += n_read; @@ -203,6 +203,7 @@ pub(crate) fn chunk_df_for_writing( // See: #16403 if !df.get_columns().is_empty() && df.get_columns()[0] + .as_materialized_series() .chunk_lengths() .take(5) .all(|len| len < row_group_size) diff --git a/crates/polars-lazy/src/dsl/eval.rs b/crates/polars-lazy/src/dsl/eval.rs index 574c2b336407..dcb4853f0671 100644 --- a/crates/polars-lazy/src/dsl/eval.rs +++ b/crates/polars-lazy/src/dsl/eval.rs @@ -45,12 +45,12 @@ pub trait ExprEvalExtension: IntoExpr + Sized { fn cumulative_eval(self, expr: Expr, min_periods: usize, parallel: bool) -> Expr { let this = self.into_expr(); let expr2 = expr.clone(); - let func = move |mut s: Series| { - let name = s.name().clone(); - s.rename(PlSmallStr::EMPTY); + let func = move |mut c: Column| { + let name = c.name().clone(); + c.rename(PlSmallStr::EMPTY); // Ensure we get the new schema. - let output_field = eval_field_to_dtype(s.field().as_ref(), &expr, false); + let output_field = eval_field_to_dtype(c.field().as_ref(), &expr, false); let expr = expr.clone(); let mut arena = Arena::with_capacity(10); @@ -65,7 +65,7 @@ pub trait ExprEvalExtension: IntoExpr + Sized { let state = ExecutionState::new(); - let finish = |out: Series| { + let finish = |out: Column| { polars_ensure!( out.len() <= 1, ComputeError: @@ -76,13 +76,13 @@ pub trait ExprEvalExtension: IntoExpr + Sized { }; let avs = if parallel { - (1..s.len() + 1) + (1..c.len() + 1) .into_par_iter() .map(|len| { - let s = s.slice(0, len); + let s = c.slice(0, len); if (len - s.null_count()) >= min_periods { - let df = s.into_frame(); - let out = phys_expr.evaluate(&df, &state)?; + let df = c.clone().into_frame(); + let out = phys_expr.evaluate(&df, &state)?.into_column(); finish(out) } else { Ok(AnyValue::Null) @@ -91,13 +91,13 @@ pub trait ExprEvalExtension: IntoExpr + Sized { .collect::>>()? } else { let mut df_container = DataFrame::empty(); - (1..s.len() + 1) + (1..c.len() + 1) .map(|len| { - let s = s.slice(0, len); - if (len - s.null_count()) >= min_periods { + let c = c.slice(0, len); + if (len - c.null_count()) >= min_periods { unsafe { - df_container.get_columns_mut().push(s); - let out = phys_expr.evaluate(&df_container, &state)?; + df_container.get_columns_mut().push(c.into_column()); + let out = phys_expr.evaluate(&df_container, &state)?.into_column(); df_container.get_columns_mut().clear(); finish(out) } @@ -107,12 +107,12 @@ pub trait ExprEvalExtension: IntoExpr + Sized { }) .collect::>>()? }; - let s = Series::new(name, avs); + let c = Column::new(name, avs); - if s.dtype() != output_field.dtype() { - s.cast(output_field.dtype()).map(Some) + if c.dtype() != output_field.dtype() { + c.cast(output_field.dtype()).map(Some) } else { - Ok(Some(s)) + Ok(Some(c)) } }; diff --git a/crates/polars-lazy/src/dsl/list.rs b/crates/polars-lazy/src/dsl/list.rs index fb1594196e41..4dae2529bc14 100644 --- a/crates/polars-lazy/src/dsl/list.rs +++ b/crates/polars-lazy/src/dsl/list.rs @@ -44,12 +44,12 @@ fn offsets_to_groups(offsets: &[i64]) -> Option { } fn run_per_sublist( - s: Series, + s: Column, lst: &ListChunked, expr: &Expr, parallel: bool, output_field: Field, -) -> PolarsResult> { +) -> PolarsResult> { let phys_expr = prepare_expression_for_context( PlSmallStr::EMPTY, expr, @@ -86,7 +86,7 @@ fn run_per_sublist( lst.into_iter() .map(|s| { s.and_then(|s| unsafe { - df_container.get_columns_mut().push(s); + df_container.get_columns_mut().push(s.into_column()); let out = phys_expr.evaluate(&df_container, &state); df_container.get_columns_mut().clear(); match out { @@ -107,9 +107,9 @@ fn run_per_sublist( ca.rename(s.name().clone()); if ca.dtype() != output_field.dtype() { - ca.cast(output_field.dtype()).map(Some) + ca.cast(output_field.dtype()).map(Column::from).map(Some) } else { - Ok(Some(ca.into_series())) + Ok(Some(ca.into_column())) } } @@ -117,7 +117,7 @@ fn run_on_group_by_engine( name: PlSmallStr, lst: &ListChunked, expr: &Expr, -) -> PolarsResult> { +) -> PolarsResult> { let lst = lst.rechunk(); let arr = lst.downcast_iter().next().unwrap(); let groups = offsets_to_groups(arr.offsets()).unwrap(); @@ -142,7 +142,7 @@ fn run_on_group_by_engine( }, _ => ac.aggregated(), }; - Ok(Some(out.with_name(name))) + Ok(Some(out.with_name(name).into_column())) } pub trait ListNameSpaceExtension: IntoListNameSpace + Sized { @@ -151,7 +151,7 @@ pub trait ListNameSpaceExtension: IntoListNameSpace + Sized { let this = self.into_list_name_space(); let expr2 = expr.clone(); - let func = move |s: Series| { + let func = move |c: Column| { for e in expr.into_iter() { match e { #[cfg(feature = "dtype-categorical")] @@ -173,19 +173,19 @@ pub trait ListNameSpaceExtension: IntoListNameSpace + Sized { _ => {}, } } - let lst = s.list()?.clone(); + let lst = c.list()?.clone(); // # fast returns // ensure we get the new schema let output_field = eval_field_to_dtype(lst.ref_field(), &expr, true); if lst.is_empty() { - return Ok(Some(Series::new_empty( - s.name().clone(), + return Ok(Some(Column::new_empty( + c.name().clone(), output_field.dtype(), ))); } if lst.null_count() == lst.len() { - return Ok(Some(s.cast(output_field.dtype())?)); + return Ok(Some(c.cast(output_field.dtype())?.into_column())); } let fits_idx_size = lst.get_values_size() <= (IdxSize::MAX as usize); @@ -195,10 +195,10 @@ pub trait ListNameSpaceExtension: IntoListNameSpace + Sized { expr.into_iter().any(|e| matches!(e, Expr::AnonymousFunction { options, .. } if options.fmt_str == MAP_LIST_NAME)) }; - if fits_idx_size && s.null_count() == 0 && !is_user_apply() { - run_on_group_by_engine(s.name().clone(), &lst, &expr) + if fits_idx_size && c.null_count() == 0 && !is_user_apply() { + run_on_group_by_engine(c.name().clone(), &lst, &expr) } else { - run_per_sublist(s, &lst, &expr, parallel, output_field) + run_per_sublist(c, &lst, &expr, parallel, output_field) } }; diff --git a/crates/polars-lazy/src/lib.rs b/crates/polars-lazy/src/lib.rs index 005a09186ba2..3059384a1c8c 100644 --- a/crates/polars-lazy/src/lib.rs +++ b/crates/polars-lazy/src/lib.rs @@ -61,7 +61,7 @@ //! assert!(new.column("new_column") //! .unwrap() //! .equals( -//! &Series::new("new_column".into(), &[50, 40, 30, 20, 10]) +//! &Column::new("new_column".into(), &[50, 40, 30, 20, 10]) //! ) //! ); //! ``` @@ -94,7 +94,7 @@ //! assert!(new.column("new_column") //! .unwrap() //! .equals( -//! &Series::new("new_column".into(), &[100, 100, 3, 4, 5]) +//! &Column::new("new_column".into(), &[100, 100, 3, 4, 5]) //! ) //! ); //! ``` @@ -147,7 +147,7 @@ //! col("column_a") //! // apply a custom closure Series => Result //! .map(|_s| { -//! Ok(Some(Series::new("".into(), &[6.0f32, 6.0, 6.0, 6.0, 6.0]))) +//! Ok(Some(Column::new("".into(), &[6.0f32, 6.0, 6.0, 6.0, 6.0]))) //! }, //! // return type of the closure //! GetOutput::from_type(DataType::Float64)).alias("new_column") diff --git a/crates/polars-lazy/src/tests/aggregations.rs b/crates/polars-lazy/src/tests/aggregations.rs index 54387451a8b7..7bb21eb5bcd6 100644 --- a/crates/polars-lazy/src/tests/aggregations.rs +++ b/crates/polars-lazy/src/tests/aggregations.rs @@ -31,7 +31,7 @@ fn test_agg_exprs() -> PolarsResult<()> { .lazy() .group_by_stable([col("cars")]) .agg([(lit(1) - col("A")) - .map(|s| Ok(Some(&s * 2)), GetOutput::same_type()) + .map(|s| Ok(Some((&s * 2)?)), GetOutput::same_type()) .alias("foo")]) .collect()?; let ca = out.column("foo")?.list()?; @@ -63,12 +63,12 @@ fn test_agg_unique_first() -> PolarsResult<()> { .collect()?; let a = out.column("v_first").unwrap(); - let a = a.sum::().unwrap(); + let a = a.as_materialized_series().sum::().unwrap(); // can be both because unique does not guarantee order assert!(a == 10 || a == 11); let a = out.column("true_first").unwrap(); - let a = a.sum::().unwrap(); + let a = a.as_materialized_series().sum::().unwrap(); // can be both because unique does not guarantee order assert_eq!(a, 10); diff --git a/crates/polars-lazy/src/tests/arity.rs b/crates/polars-lazy/src/tests/arity.rs index c6f7b4381b53..740678af0af4 100644 --- a/crates/polars-lazy/src/tests/arity.rs +++ b/crates/polars-lazy/src/tests/arity.rs @@ -72,5 +72,12 @@ fn test_lazy_ternary() { ) .collect() .unwrap(); - assert_eq!(43, df.column("new").unwrap().sum::().unwrap()); + assert_eq!( + 43, + df.column("new") + .unwrap() + .as_materialized_series() + .sum::() + .unwrap() + ); } diff --git a/crates/polars-lazy/src/tests/optimization_checks.rs b/crates/polars-lazy/src/tests/optimization_checks.rs index e01ad342f061..4a99413d48cc 100644 --- a/crates/polars-lazy/src/tests/optimization_checks.rs +++ b/crates/polars-lazy/src/tests/optimization_checks.rs @@ -310,7 +310,7 @@ pub fn test_predicate_block_cast() -> PolarsResult<()> { let s = out.column("value").unwrap(); assert_eq!( s, - &Series::new(PlSmallStr::from_static("value"), [1.0f32, 2.0]) + &Column::new(PlSmallStr::from_static("value"), [1.0f32, 2.0]) ); } @@ -325,7 +325,7 @@ fn test_lazy_filter_and_rename() { .lazy() .rename(["a"], ["x"]) .filter(col("x").map( - |s: Series| Ok(Some(s.gt(3)?.into_series())), + |s: Column| Ok(Some(s.as_materialized_series().gt(3)?.into_column())), GetOutput::from_type(DataType::Boolean), )) .select([col("x")]); @@ -338,7 +338,7 @@ fn test_lazy_filter_and_rename() { // now we check if the column is rename or added when we don't select let lf = df.lazy().rename(["a"], ["x"]).filter(col("x").map( - |s: Series| Ok(Some(s.gt(3)?.into_series())), + |s: Column| Ok(Some(s.as_materialized_series().gt(3)?.into_column())), GetOutput::from_type(DataType::Boolean), )); // the rename function should not interfere with the predicate pushdown diff --git a/crates/polars-lazy/src/tests/predicate_queries.rs b/crates/polars-lazy/src/tests/predicate_queries.rs index 855d9463f814..71d24d1207e1 100644 --- a/crates/polars-lazy/src/tests/predicate_queries.rs +++ b/crates/polars-lazy/src/tests/predicate_queries.rs @@ -72,7 +72,7 @@ fn test_pass_unrelated_apply() -> PolarsResult<()> { let q = df .lazy() .with_column(col("A").map( - |s| Ok(Some(s.is_null().into_series())), + |s| Ok(Some(s.is_null().into_column())), GetOutput::from_type(DataType::Boolean), )) .filter(col("B").gt(lit(10i32))); diff --git a/crates/polars-lazy/src/tests/projection_queries.rs b/crates/polars-lazy/src/tests/projection_queries.rs index b2cff519c05a..d1594a461a86 100644 --- a/crates/polars-lazy/src/tests/projection_queries.rs +++ b/crates/polars-lazy/src/tests/projection_queries.rs @@ -130,7 +130,7 @@ fn concat_str_regex_expansion() -> PolarsResult<()> { let s = out.column("concatenated")?; assert_eq!( s, - &Series::new("concatenated".into(), ["a--;;", ";b--;", ";;c--"]) + &Column::new("concatenated".into(), ["a--;;", ";b--;", ";;c--"]) ); Ok(()) diff --git a/crates/polars-lazy/src/tests/queries.rs b/crates/polars-lazy/src/tests/queries.rs index d1566cbb8680..ff4894b99857 100644 --- a/crates/polars-lazy/src/tests/queries.rs +++ b/crates/polars-lazy/src/tests/queries.rs @@ -88,7 +88,7 @@ fn test_lazy_udf() { let df = get_df(); let new = df .lazy() - .select([col("sepal_width").map(|s| Ok(Some(s * 200.0)), GetOutput::same_type())]) + .select([col("sepal_width").map(|s| Ok(Some((s * 200.0)?)), GetOutput::same_type())]) .collect() .unwrap(); assert_eq!( @@ -219,7 +219,7 @@ fn test_lazy_ternary_and_predicates() { let length = new.column("sepal_length").unwrap(); assert_eq!( length, - &Series::new("sepal_length".into(), &[5.1f64, 5.0, 5.4]) + &Column::new("sepal_length".into(), &[5.1f64, 5.0, 5.4]) ); assert_eq!(new.shape(), (3, 6)); } @@ -232,7 +232,14 @@ fn test_lazy_binary_ops() { .select([col("a").eq(lit(2)).alias("foo")]) .collect() .unwrap(); - assert_eq!(new.column("foo").unwrap().sum::().unwrap(), 1); + assert_eq!( + new.column("foo") + .unwrap() + .as_materialized_series() + .sum::() + .unwrap(), + 1 + ); } #[test] @@ -240,7 +247,7 @@ fn test_lazy_query_2() { let df = load_df(); let ldf = df .lazy() - .with_column(col("a").map(|s| Ok(Some(s * 2)), GetOutput::same_type())) + .with_column(col("a").map(|s| Ok(Some((s * 2)?)), GetOutput::same_type())) .filter(col("a").lt(lit(2))) .select([col("b"), col("a")]); @@ -277,7 +284,7 @@ fn test_lazy_query_4() -> PolarsResult<()> { col("day").alias("day"), col("cumcases") .apply( - |s: Series| (&s - &(s.shift(1))).map(Some), + |s: Column| (&s - &(s.shift(1))).map(Some), GetOutput::same_type(), ) .alias("diff_cases"), @@ -414,7 +421,7 @@ fn test_lazy_query_9() -> PolarsResult<()> { fn test_lazy_query_10() { use polars_core::export::chrono::Duration as ChronoDuration; let date = NaiveDate::from_ymd_opt(2021, 3, 5).unwrap(); - let x: Series = DatetimeChunked::from_naive_datetime( + let x = DatetimeChunked::from_naive_datetime( "x".into(), [ NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 0, 0).unwrap()), @@ -423,8 +430,8 @@ fn test_lazy_query_10() { ], TimeUnit::Nanoseconds, ) - .into(); - let y: Series = DatetimeChunked::from_naive_datetime( + .into_column(); + let y = DatetimeChunked::from_naive_datetime( "y".into(), [ NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()), @@ -433,14 +440,14 @@ fn test_lazy_query_10() { ], TimeUnit::Nanoseconds, ) - .into(); + .into_column(); let df = DataFrame::new(vec![x, y]).unwrap(); let out = df .lazy() .select(&[(col("x") - col("y")).alias("z")]) .collect() .unwrap(); - let z: Series = DurationChunked::from_duration( + let z = DurationChunked::from_duration( "z".into(), [ ChronoDuration::try_hours(1).unwrap(), @@ -449,9 +456,9 @@ fn test_lazy_query_10() { ], TimeUnit::Nanoseconds, ) - .into(); + .into_column(); assert!(out.column("z").unwrap().equals(&z)); - let x: Series = DatetimeChunked::from_naive_datetime( + let x = DatetimeChunked::from_naive_datetime( "x".into(), [ NaiveDateTime::new(date, NaiveTime::from_hms_opt(2, 0, 0).unwrap()), @@ -460,8 +467,8 @@ fn test_lazy_query_10() { ], TimeUnit::Milliseconds, ) - .into(); - let y: Series = DatetimeChunked::from_naive_datetime( + .into_column(); + let y = DatetimeChunked::from_naive_datetime( "y".into(), [ NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()), @@ -470,7 +477,7 @@ fn test_lazy_query_10() { ], TimeUnit::Nanoseconds, ) - .into(); + .into_column(); let df = DataFrame::new(vec![x, y]).unwrap(); let out = df .lazy() @@ -501,8 +508,9 @@ fn test_lazy_query_7() { ]; let data = vec![Some(1.), Some(2.), Some(3.), Some(4.), None, None]; let df = DataFrame::new(vec![ - DatetimeChunked::from_naive_datetime("date".into(), dates, TimeUnit::Nanoseconds).into(), - Series::new("data".into(), data), + DatetimeChunked::from_naive_datetime("date".into(), dates, TimeUnit::Nanoseconds) + .into_column(), + Column::new("data".into(), data), ]) .unwrap(); // this tests if predicate pushdown not interferes with the shift data. @@ -516,14 +524,20 @@ fn test_lazy_query_7() { )))) .collect() .unwrap(); - let a = out.column("shifted").unwrap().sum::().unwrap() - 7.0; + let a = out + .column("shifted") + .unwrap() + .as_materialized_series() + .sum::() + .unwrap() + - 7.0; assert!(a < 0.01 && a > -0.01); } #[test] fn test_lazy_shift_and_fill_all() { let data = &[1, 2, 3]; - let df = DataFrame::new(vec![Series::new("data".into(), data)]).unwrap(); + let df = DataFrame::new(vec![Column::new("data".into(), data)]).unwrap(); let out = df .lazy() .with_column(col("data").shift(lit(1)).fill_null(lit(0)).alias("output")) @@ -714,7 +728,7 @@ fn test_lazy_group_by_apply() { df.lazy() .group_by([col("fruits")]) .agg([col("cars").apply( - |s: Series| Ok(Some(Series::new("".into(), &[s.len() as u32]))), + |s: Column| Ok(Some(Column::new("".into(), &[s.len() as u32]))), GetOutput::from_type(DataType::UInt32), )]) .collect() @@ -1120,7 +1134,7 @@ fn test_filter_lit() { // see https://github.com/pola-rs/polars/issues/790 // failed due to broadcasting filters and splitting threads. let iter = (0..100).map(|i| ('A'..='Z').nth(i % 26).unwrap().to_string()); - let a = Series::from_iter(iter); + let a = Series::from_iter(iter).into_column(); let df = DataFrame::new([a].into()).unwrap(); let out = df.lazy().filter(lit(true)).collect().unwrap(); @@ -1471,10 +1485,10 @@ fn test_singleton_broadcast() -> PolarsResult<()> { #[test] fn test_list_in_select_context() -> PolarsResult<()> { - let s = Series::new("a".into(), &[1, 2, 3]); + let s = Column::new("a".into(), &[1, 2, 3]); let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name().clone()).unwrap(); - builder.append_series(&s).unwrap(); - let expected = builder.finish().into_series(); + builder.append_series(s.as_materialized_series()).unwrap(); + let expected = builder.finish().into_column(); let df = DataFrame::new(vec![s])?; @@ -1549,8 +1563,8 @@ fn test_round_after_agg() -> PolarsResult<()> { #[test] #[cfg(feature = "dtype-date")] fn test_fill_nan() -> PolarsResult<()> { - let s0 = Series::new("date".into(), &[1, 2, 3]).cast(&DataType::Date)?; - let s1 = Series::new("float".into(), &[Some(1.0), Some(f32::NAN), Some(3.0)]); + let s0 = Column::new("date".into(), &[1, 2, 3]).cast(&DataType::Date)?; + let s1 = Column::new("float".into(), &[Some(1.0), Some(f32::NAN), Some(3.0)]); let df = DataFrame::new(vec![s0, s1])?; let out = df.lazy().fill_nan(Null {}.lit()).collect()?; diff --git a/crates/polars-mem-engine/src/executors/group_by.rs b/crates/polars-mem-engine/src/executors/group_by.rs index 8542f9fbb338..1ae612f64d67 100644 --- a/crates/polars-mem-engine/src/executors/group_by.rs +++ b/crates/polars-mem-engine/src/executors/group_by.rs @@ -56,7 +56,7 @@ impl GroupByExec { #[allow(clippy::too_many_arguments)] pub(super) fn group_by_helper( mut df: DataFrame, - keys: Vec, + keys: Vec, aggs: &[Arc], apply: Option>, state: &ExecutionState, @@ -88,9 +88,8 @@ pub(super) fn group_by_helper( rayon::join(get_columns, get_agg) }); - let agg_columns = agg_columns?; - columns.extend_from_slice(&agg_columns); + columns.extend(agg_columns?.into_iter().map(Column::from)); DataFrame::new(columns) } @@ -99,7 +98,7 @@ impl GroupByExec { let keys = self .keys .iter() - .map(|e| e.evaluate(&df, state)) + .map(|e| e.evaluate(&df, state).map(Column::from)) .collect::>()?; group_by_helper( df, diff --git a/crates/polars-mem-engine/src/executors/group_by_dynamic.rs b/crates/polars-mem-engine/src/executors/group_by_dynamic.rs index 5fe6dca17015..b5f98666d281 100644 --- a/crates/polars-mem-engine/src/executors/group_by_dynamic.rs +++ b/crates/polars-mem-engine/src/executors/group_by_dynamic.rs @@ -25,7 +25,7 @@ impl GroupByDynamicExec { let keys = self .keys .iter() - .map(|e| e.evaluate(&df, state)) + .map(|e| e.evaluate(&df, state).map(Column::from)) .collect::>>()?; let (mut time_key, mut keys, groups) = df.group_by_dynamic(keys, &self.options)?; @@ -63,7 +63,7 @@ impl GroupByDynamicExec { let mut columns = Vec::with_capacity(agg_columns.len() + 1 + keys.len()); columns.extend_from_slice(&keys); columns.push(time_key); - columns.extend_from_slice(&agg_columns); + columns.extend(agg_columns.into_iter().map(Column::from)); DataFrame::new(columns) } diff --git a/crates/polars-mem-engine/src/executors/group_by_partitioned.rs b/crates/polars-mem-engine/src/executors/group_by_partitioned.rs index ec4a691eb547..83c6ec2e5bda 100644 --- a/crates/polars-mem-engine/src/executors/group_by_partitioned.rs +++ b/crates/polars-mem-engine/src/executors/group_by_partitioned.rs @@ -48,7 +48,7 @@ impl PartitionGroupByExec { } } - fn keys(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult> { + fn keys(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult> { compute_keys(&self.phys_keys, df, state) } } @@ -57,8 +57,10 @@ fn compute_keys( keys: &[Arc], df: &DataFrame, state: &ExecutionState, -) -> PolarsResult> { - keys.iter().map(|s| s.evaluate(df, state)).collect() +) -> PolarsResult> { + keys.iter() + .map(|s| s.evaluate(df, state).map(Column::from)) + .collect() } fn run_partitions( @@ -67,7 +69,7 @@ fn run_partitions( state: &ExecutionState, n_threads: usize, maintain_order: bool, -) -> PolarsResult<(Vec, Vec>)> { +) -> PolarsResult<(Vec, Vec>)> { // We do a partitioned group_by. // Meaning that we first do the group_by operation arbitrarily // split on several threads. Than the final result we apply the same group_by again. @@ -102,7 +104,8 @@ fn run_partitions( } } else { agg - }) + } + .into_column()) }) .collect::>>()?; @@ -115,7 +118,7 @@ fn run_partitions( }) } -fn estimate_unique_count(keys: &[Series], mut sample_size: usize) -> PolarsResult { +fn estimate_unique_count(keys: &[Column], mut sample_size: usize) -> PolarsResult { // https://stats.stackexchange.com/a/19090/147321 // estimated unique size // u + ui / m (s - m) @@ -144,13 +147,14 @@ fn estimate_unique_count(keys: &[Series], mut sample_size: usize) -> PolarsResul // not that sampling without replacement is very very expensive. don't do that. let s = keys[0].sample_n(sample_size, true, false, None).unwrap(); // fast multi-threaded way to get unique. - let groups = s.group_tuples(true, false)?; + let groups = s.as_materialized_series().group_tuples(true, false)?; Ok(finish(&groups)) } else { let offset = (keys[0].len() / 2) as i64; let keys = keys .iter() .map(|s| s.slice(offset, sample_size)) + .map(Column::from) .collect::>(); let df = unsafe { DataFrame::new_no_checks(keys) }; let names = df.get_column_names().into_iter().cloned(); @@ -168,7 +172,7 @@ const PARTITION_LIMIT: usize = 1000; // Checks if we should run normal or default aggregation // by sampling data. fn can_run_partitioned( - keys: &[Series], + keys: &[Column], original_df: &DataFrame, state: &ExecutionState, from_partitioned_ds: bool, @@ -327,7 +331,13 @@ impl PartitionGroupByExec { .zip(&df.get_columns()[self.phys_keys.len()..]) .map(|(expr, partitioned_s)| { let agg_expr = expr.as_partitioned_aggregator().unwrap(); - agg_expr.finalize(partitioned_s.clone(), groups, state) + agg_expr + .finalize( + partitioned_s.as_materialized_series().clone(), + groups, + state, + ) + .map(Column::from) }) .collect(); diff --git a/crates/polars-mem-engine/src/executors/group_by_rolling.rs b/crates/polars-mem-engine/src/executors/group_by_rolling.rs index 810365b25bc6..8ad2352572a0 100644 --- a/crates/polars-mem-engine/src/executors/group_by_rolling.rs +++ b/crates/polars-mem-engine/src/executors/group_by_rolling.rs @@ -13,7 +13,7 @@ pub(crate) struct GroupByRollingExec { } #[cfg(feature = "dynamic_group_by")] -unsafe fn update_keys(keys: &mut [Series], groups: &GroupsProxy) { +unsafe fn update_keys(keys: &mut [Column], groups: &GroupsProxy) { match groups { GroupsProxy::Idx(groups) => { let first = groups.first(); @@ -21,7 +21,7 @@ unsafe fn update_keys(keys: &mut [Series], groups: &GroupsProxy) { // can be empty, but we still want to know the first value // of that group for key in keys.iter_mut() { - *key = key.take_unchecked_from_slice(first); + *key = key.take_slice_unchecked(first); } }, GroupsProxy::Slice { groups, .. } => { @@ -48,7 +48,7 @@ impl GroupByRollingExec { let keys = self .keys .iter() - .map(|e| e.evaluate(&df, state)) + .map(|e| e.evaluate(&df, state).map(Column::from)) .collect::>>()?; let (mut time_key, mut keys, groups) = df.rolling(keys, &self.options)?; @@ -85,7 +85,7 @@ impl GroupByRollingExec { let mut columns = Vec::with_capacity(agg_columns.len() + 1 + keys.len()); columns.extend_from_slice(&keys); columns.push(time_key); - columns.extend_from_slice(&agg_columns); + columns.extend(agg_columns.into_iter().map(Column::from)); DataFrame::new(columns) } diff --git a/crates/polars-mem-engine/src/executors/projection_utils.rs b/crates/polars-mem-engine/src/executors/projection_utils.rs index bd0e189f0b14..8287c923969a 100644 --- a/crates/polars-mem-engine/src/executors/projection_utils.rs +++ b/crates/polars-mem-engine/src/executors/projection_utils.rs @@ -334,6 +334,9 @@ pub(super) fn check_expand_literals( .collect::>()? } + // @scalar-opt + let selected_columns = selected_columns.into_iter().map(Column::from).collect(); + let df = unsafe { DataFrame::new_no_checks(selected_columns) }; // a literal could be projected to a zero length dataframe. diff --git a/crates/polars-mem-engine/src/executors/scan/python_scan.rs b/crates/polars-mem-engine/src/executors/scan/python_scan.rs index 270c52ea963c..067895ed593f 100644 --- a/crates/polars-mem-engine/src/executors/scan/python_scan.rs +++ b/crates/polars-mem-engine/src/executors/scan/python_scan.rs @@ -24,7 +24,7 @@ fn python_df_to_rust(py: Python, df: Bound) -> PolarsResult { let (ptr, len, cap) = raw_parts; unsafe { Ok(DataFrame::new_no_checks(Vec::from_raw_parts( - ptr as *mut Series, + ptr as *mut Column, len, cap, ))) diff --git a/crates/polars-mem-engine/src/executors/sort.rs b/crates/polars-mem-engine/src/executors/sort.rs index 23374abea7ac..a50e38af2750 100644 --- a/crates/polars-mem-engine/src/executors/sort.rs +++ b/crates/polars-mem-engine/src/executors/sort.rs @@ -25,7 +25,7 @@ impl SortExec { .iter() .enumerate() .map(|(i, e)| { - let mut s = e.evaluate(&df, state)?; + let mut s = e.evaluate(&df, state)?.into_column(); // Polars core will try to set the sorted columns as sorted. // This should only be done with simple col("foo") expressions, // therefore we rename more complex expressions so that diff --git a/crates/polars-mem-engine/src/executors/stack.rs b/crates/polars-mem-engine/src/executors/stack.rs index 440fbdd619ca..43c884b1f563 100644 --- a/crates/polars-mem-engine/src/executors/stack.rs +++ b/crates/polars-mem-engine/src/executors/stack.rs @@ -37,7 +37,7 @@ impl StackExec { self.options.run_parallel, )?; // We don't have to do a broadcast check as cse is not allowed to hit this. - df._add_columns(res, schema)?; + df._add_series(res, schema)?; Ok(df) }); @@ -64,7 +64,7 @@ impl StackExec { // new, unique column names. It is immediately // followed by a projection which pulls out the // possibly mismatching column lengths. - unsafe { df.get_columns_mut().extend(res) }; + unsafe { df.get_columns_mut() }.extend(res.into_iter().map(Column::from)); } else { let height = df.height(); @@ -86,7 +86,7 @@ impl StackExec { ); } } - df._add_columns(res, schema)?; + df._add_series(res, schema)?; } df }; diff --git a/crates/polars-ops/src/chunked_array/array/to_struct.rs b/crates/polars-ops/src/chunked_array/array/to_struct.rs index b79a9ffcfe9f..b00dbbf4d43b 100644 --- a/crates/polars-ops/src/chunked_array/array/to_struct.rs +++ b/crates/polars-ops/src/chunked_array/array/to_struct.rs @@ -40,7 +40,7 @@ pub trait ToStruct: AsArray { .collect::>>() })?; - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) } } diff --git a/crates/polars-ops/src/chunked_array/hist.rs b/crates/polars-ops/src/chunked_array/hist.rs index 8d7781745531..ca906d12851c 100644 --- a/crates/polars-ops/src/chunked_array/hist.rs +++ b/crates/polars-ops/src/chunked_array/hist.rs @@ -136,7 +136,7 @@ where let out = fields.pop().unwrap(); out.with_name(ca.name().clone()) } else { - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) .unwrap() .into_series() } diff --git a/crates/polars-ops/src/chunked_array/list/namespace.rs b/crates/polars-ops/src/chunked_array/list/namespace.rs index 0c7a0975488c..e16ac5da4453 100644 --- a/crates/polars-ops/src/chunked_array/list/namespace.rs +++ b/crates/polars-ops/src/chunked_array/list/namespace.rs @@ -31,7 +31,7 @@ pub(super) fn has_inner_nulls(ca: &ListChunked) -> bool { } fn cast_rhs( - other: &mut [Series], + other: &mut [Column], inner_type: &DataType, dtype: &DataType, length: usize, @@ -294,7 +294,7 @@ pub trait ListNameSpaceImpl: AsList { ca.try_apply_amortized(|s| diff(s.as_ref(), n, null_behavior)) } - fn lst_shift(&self, periods: &Series) -> PolarsResult { + fn lst_shift(&self, periods: &Column) -> PolarsResult { let ca = self.as_list(); let periods_s = periods.cast(&DataType::Int64)?; let periods = periods_s.i64()?; @@ -584,7 +584,7 @@ pub trait ListNameSpaceImpl: AsList { out.map(|ok| self.same_type(ok)) } - fn lst_concat(&self, other: &[Series]) -> PolarsResult { + fn lst_concat(&self, other: &[Column]) -> PolarsResult { let ca = self.as_list(); let other_len = other.len(); let length = ca.len(); diff --git a/crates/polars-ops/src/chunked_array/list/to_struct.rs b/crates/polars-ops/src/chunked_array/list/to_struct.rs index 73798163ed48..cdd245bce8b7 100644 --- a/crates/polars-ops/src/chunked_array/list/to_struct.rs +++ b/crates/polars-ops/src/chunked_array/list/to_struct.rs @@ -80,7 +80,7 @@ pub trait ToStruct: AsList { .collect::>>() })?; - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) } } diff --git a/crates/polars-ops/src/chunked_array/strings/extract.rs b/crates/polars-ops/src/chunked_array/strings/extract.rs index 35f38e40d61d..cb26d66f7aff 100644 --- a/crates/polars-ops/src/chunked_array/strings/extract.rs +++ b/crates/polars-ops/src/chunked_array/strings/extract.rs @@ -50,7 +50,7 @@ pub(super) fn extract_groups( if n_fields == 1 { return StructChunked::from_series( ca.name().clone(), - &[Series::new_null(ca.name().clone(), ca.len())], + [Series::new_null(ca.name().clone(), ca.len())].iter(), ) .map(|ca| ca.into_series()); } diff --git a/crates/polars-ops/src/chunked_array/strings/json_path.rs b/crates/polars-ops/src/chunked_array/strings/json_path.rs index a25ce1937332..fe8783530d6a 100644 --- a/crates/polars-ops/src/chunked_array/strings/json_path.rs +++ b/crates/polars-ops/src/chunked_array/strings/json_path.rs @@ -204,10 +204,11 @@ mod tests { let expected_series = StructChunked::from_series( "".into(), - &[ + [ Series::new("a".into(), &[None, Some(1), Some(2), None]), Series::new("b".into(), &[None, Some("hello"), Some("goodbye"), None]), - ], + ] + .iter(), ) .unwrap() .with_outer_validity_chunked(BooleanChunked::new("".into(), [false, true, true, false])) diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index 1f2899764e4f..07c8fc600fbd 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -418,7 +418,7 @@ pub trait StringNameSpaceImpl: AsString { Ok(builder.finish()) } - fn strip_chars(&self, pat: &Series) -> PolarsResult { + fn strip_chars(&self, pat: &Column) -> PolarsResult { let ca = self.as_string(); if pat.dtype() == &DataType::Null { Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim()))) @@ -427,7 +427,7 @@ pub trait StringNameSpaceImpl: AsString { } } - fn strip_chars_start(&self, pat: &Series) -> PolarsResult { + fn strip_chars_start(&self, pat: &Column) -> PolarsResult { let ca = self.as_string(); if pat.dtype() == &DataType::Null { return Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_start()))); @@ -436,7 +436,7 @@ pub trait StringNameSpaceImpl: AsString { } } - fn strip_chars_end(&self, pat: &Series) -> PolarsResult { + fn strip_chars_end(&self, pat: &Column) -> PolarsResult { let ca = self.as_string(); if pat.dtype() == &DataType::Null { return Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_end()))); @@ -609,7 +609,7 @@ pub trait StringNameSpaceImpl: AsString { /// /// Determines a substring starting from `offset` and with length `length` of each of the elements in `array`. /// `offset` can be negative, in which case the start counts from the end of the string. - fn str_slice(&self, offset: &Series, length: &Series) -> PolarsResult { + fn str_slice(&self, offset: &Column, length: &Column) -> PolarsResult { let ca = self.as_string(); let offset = offset.cast(&DataType::Int64)?; // We strict cast, otherwise negative value will be treated as a valid length. @@ -623,7 +623,7 @@ pub trait StringNameSpaceImpl: AsString { /// Determines a substring starting at the beginning of the string up to offset `n` of each /// element in `array`. `n` can be negative, in which case the slice ends `n` characters from /// the end of the string. - fn str_head(&self, n: &Series) -> PolarsResult { + fn str_head(&self, n: &Column) -> PolarsResult { let ca = self.as_string(); let n = n.strict_cast(&DataType::Int64)?; @@ -634,7 +634,7 @@ pub trait StringNameSpaceImpl: AsString { /// /// Determines a substring starting at offset `n` of each element in `array`. `n` can be /// negative, in which case the slice begins `n` characters from the start of the string. - fn str_tail(&self, n: &Series) -> PolarsResult { + fn str_tail(&self, n: &Column) -> PolarsResult { let ca = self.as_string(); let n = n.strict_cast(&DataType::Int64)?; diff --git a/crates/polars-ops/src/chunked_array/strings/split.rs b/crates/polars-ops/src/chunked_array/strings/split.rs index d86e0efac2ae..31c15a70cb08 100644 --- a/crates/polars-ops/src/chunked_array/strings/split.rs +++ b/crates/polars-ops/src/chunked_array/strings/split.rs @@ -149,7 +149,7 @@ where }) .collect::>(); - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) } pub fn split_helper<'a, F, I>(ca: &'a StringChunked, by: &'a StringChunked, op: F) -> ListChunked diff --git a/crates/polars-ops/src/chunked_array/top_k.rs b/crates/polars-ops/src/chunked_array/top_k.rs index 9772a5593be0..9caf861b6cd9 100644 --- a/crates/polars-ops/src/chunked_array/top_k.rs +++ b/crates/polars-ops/src/chunked_array/top_k.rs @@ -145,8 +145,8 @@ fn top_k_binary_impl( ChunkedArray::with_chunk_like(ca, arr) } -pub fn top_k(s: &[Series], descending: bool) -> PolarsResult { - fn extract_target_and_k(s: &[Series]) -> PolarsResult<(usize, &Series)> { +pub fn top_k(s: &[Column], descending: bool) -> PolarsResult { + fn extract_target_and_k(s: &[Column]) -> PolarsResult<(usize, &Column)> { let k_s = &s[1]; polars_ensure!( k_s.len() == 1, @@ -197,20 +197,20 @@ pub fn top_k(s: &[Series], descending: bool) -> PolarsResult { let s = src.to_physical_repr(); match s.dtype() { - DataType::Boolean => Ok(top_k_bool_impl(s.bool().unwrap(), k, descending).into_series()), + DataType::Boolean => Ok(top_k_bool_impl(s.bool().unwrap(), k, descending).into_column()), DataType::String => { let ca = top_k_binary_impl(&s.str().unwrap().as_binary(), k, descending); let ca = unsafe { ca.to_string_unchecked() }; - Ok(ca.into_series()) + Ok(ca.into_column()) }, - DataType::Binary => Ok(top_k_binary_impl(s.binary().unwrap(), k, descending).into_series()), + DataType::Binary => Ok(top_k_binary_impl(s.binary().unwrap(), k, descending).into_column()), #[cfg(feature = "dtype-decimal")] DataType::Decimal(_, _) => { let src = src.decimal().unwrap(); let ca = top_k_num_impl(src, k, descending); let mut lca = DecimalChunked::new_logical(ca); lca.2 = Some(DataType::Decimal(src.precision(), Some(src.scale()))); - Ok(lca.into_series()) + Ok(lca.into_column()) }, DataType::Null => Ok(src.slice(0, k)), #[cfg(feature = "dtype-struct")] @@ -221,7 +221,7 @@ pub fn top_k(s: &[Series], descending: bool) -> PolarsResult { _dt => { macro_rules! dispatch { ($ca:expr) => {{ - top_k_num_impl($ca, k, descending).into_series() + top_k_num_impl($ca, k, descending).into_column() }}; } unsafe { downcast_as_macro_arg_physical!(&s, dispatch).cast_unchecked(origin_dtype) } @@ -229,9 +229,9 @@ pub fn top_k(s: &[Series], descending: bool) -> PolarsResult { } } -pub fn top_k_by(s: &[Series], descending: Vec) -> PolarsResult { +pub fn top_k_by(s: &[Column], descending: Vec) -> PolarsResult { /// Return (k, src, by) - fn extract_parameters(s: &[Series]) -> PolarsResult<(usize, &Series, &[Series])> { + fn extract_parameters(s: &[Column]) -> PolarsResult<(usize, &Column, &[Column])> { let k_s = &s[1]; polars_ensure!( @@ -271,10 +271,10 @@ pub fn top_k_by(s: &[Series], descending: Vec) -> PolarsResult { fn top_k_by_impl( k: usize, - src: &Series, - by: &[Series], + src: &Column, + by: &[Column], descending: Vec, -) -> PolarsResult { +) -> PolarsResult { if src.is_empty() { return Ok(src.clone()); } @@ -289,6 +289,9 @@ fn top_k_by_impl( let idx = _arg_bottom_k(k, by, &mut sort_options)?; - let result = unsafe { src.take_unchecked(&idx.into_inner()) }; - Ok(result) + let result = unsafe { + src.as_materialized_series() + .take_unchecked(&idx.into_inner()) + }; + Ok(result.into()) } diff --git a/crates/polars-ops/src/frame/join/asof/groups.rs b/crates/polars-ops/src/frame/join/asof/groups.rs index 81b05a4b752d..9332b10e392b 100644 --- a/crates/polars-ops/src/frame/join/asof/groups.rs +++ b/crates/polars-ops/src/frame/join/asof/groups.rs @@ -398,8 +398,8 @@ where F: Sync + for<'a> Fn(T::Physical<'a>, T::Physical<'a>) -> bool, { let out = if left_by.width() == 1 { - let left_by_s = left_by.get_columns()[0].to_physical_repr().into_owned(); - let right_by_s = right_by.get_columns()[0].to_physical_repr().into_owned(); + let left_by_s = left_by.get_columns()[0].to_physical_repr(); + let right_by_s = right_by.get_columns()[0].to_physical_repr(); let left_dtype = left_by_s.dtype(); let right_dtype = right_by_s.dtype(); polars_ensure!(left_dtype == right_dtype, @@ -418,8 +418,8 @@ where }, x if x.is_float() => { with_match_physical_float_polars_type!(left_by_s.dtype(), |$T| { - let left_by: &ChunkedArray<$T> = left_by_s.as_ref().as_ref().as_ref(); - let right_by: &ChunkedArray<$T> = right_by_s.as_ref().as_ref().as_ref(); + let left_by: &ChunkedArray<$T> = left_by_s.as_materialized_series().as_ref().as_ref().as_ref(); + let right_by: &ChunkedArray<$T> = right_by_s.as_materialized_series().as_ref().as_ref().as_ref(); asof_join_by_numeric::( left_by, right_by, left_asof, right_asof, filter, )? @@ -648,8 +648,8 @@ pub trait AsofJoinBy: IntoDf { { #[cfg(feature = "dtype-categorical")] _check_categorical_src(l.dtype(), r.dtype())?; - *l = l.to_physical_repr().into_owned(); - *r = r.to_physical_repr().into_owned(); + *l = l.to_physical_repr(); + *r = r.to_physical_repr(); } } @@ -707,8 +707,8 @@ pub trait AsofJoinBy: IntoDf { let self_df = self.to_df(); let left_by = left_by.into_iter().map(|s| s.as_ref().into()).collect(); let right_by = right_by.into_iter().map(|s| s.as_ref().into()).collect(); - let left_key = self_df.column(left_on)?; - let right_key = other.column(right_on)?; + let left_key = self_df.column(left_on)?.as_materialized_series(); + let right_key = other.column(right_on)?.as_materialized_series(); self_df._join_asof_by( other, left_key, right_key, left_by, right_by, strategy, tolerance, None, None, true, ) diff --git a/crates/polars-ops/src/frame/join/general.rs b/crates/polars-ops/src/frame/join/general.rs index 5840b853425c..1420d7b66062 100644 --- a/crates/polars-ops/src/frame/join/general.rs +++ b/crates/polars-ops/src/frame/join/general.rs @@ -1,7 +1,7 @@ use polars_utils::format_pl_smallstr; use super::*; -use crate::series::coalesce_series; +use crate::series::coalesce_columns; pub fn _join_suffix_name(name: &str, suffix: &str) -> PlSmallStr { format_pl_smallstr!("{name}{suffix}") @@ -83,7 +83,7 @@ pub fn _coalesce_full_join( let l = columns[pos_l].clone(); let r = columns[pos_r].clone(); - columns[pos_l] = coalesce_series(&[l, r]).unwrap(); + columns[pos_l] = coalesce_columns(&[l, r]).unwrap(); to_remove.push(pos_r); } // sort in reverse order, so the indexes remain correct if we remove. diff --git a/crates/polars-ops/src/frame/join/merge_sorted.rs b/crates/polars-ops/src/frame/join/merge_sorted.rs index a9f02c2904cd..a180b293ca0f 100644 --- a/crates/polars-ops/src/frame/join/merge_sorted.rs +++ b/crates/polars-ops/src/frame/join/merge_sorted.rs @@ -36,7 +36,11 @@ pub fn _merge_sorted_dfs( let lhs_phys = lhs.to_physical_repr(); let rhs_phys = rhs.to_physical_repr(); - let out = merge_series(&lhs_phys, &rhs_phys, &merge_indicator)?; + let out = Column::from(merge_series( + lhs_phys.as_materialized_series(), + rhs_phys.as_materialized_series(), + &merge_indicator, + )?); let mut out = out.cast(lhs.dtype()).unwrap(); out.rename(lhs.name().clone()); Ok(out) @@ -81,7 +85,7 @@ fn merge_series(lhs: &Series, rhs: &Series, merge_indicator: &[bool]) -> PolarsR .zip(rhs.fields_as_series()) .map(|(lhs, rhs)| merge_series(lhs, &rhs, merge_indicator)) .collect::>>()?; - StructChunked::from_series(PlSmallStr::EMPTY, &new_fields) + StructChunked::from_series(PlSmallStr::EMPTY, new_fields.iter()) .unwrap() .into_series() }, diff --git a/crates/polars-ops/src/frame/join/mod.rs b/crates/polars-ops/src/frame/join/mod.rs index 89507ac216c5..81f4fe54e7e4 100644 --- a/crates/polars-ops/src/frame/join/mod.rs +++ b/crates/polars-ops/src/frame/join/mod.rs @@ -93,8 +93,18 @@ pub trait DataFrameJoinOps: IntoDf { args: JoinArgs, ) -> PolarsResult { let df_left = self.to_df(); - let selected_left = df_left.select_series(left_on)?; - let selected_right = other.select_series(right_on)?; + let selected_left = df_left.select_columns(left_on)?; + let selected_right = other.select_columns(right_on)?; + + let selected_left = selected_left + .into_iter() + .map(Column::take_materialized_series) + .collect::>(); + let selected_right = selected_right + .into_iter() + .map(Column::take_materialized_series) + .collect::>(); + self._join_impl(other, selected_left, selected_right, args, true, false) } @@ -537,7 +547,19 @@ pub fn private_left_join_multiple_keys( b: &DataFrame, join_nulls: bool, ) -> PolarsResult { - let a = prepare_keys_multiple(a.get_columns(), join_nulls)?.into_series(); - let b = prepare_keys_multiple(b.get_columns(), join_nulls)?.into_series(); + // @scalar-opt + let a_cols = a + .get_columns() + .iter() + .map(|c| c.as_materialized_series().clone()) + .collect::>(); + let b_cols = b + .get_columns() + .iter() + .map(|c| c.as_materialized_series().clone()) + .collect::>(); + + let a = prepare_keys_multiple(&a_cols, join_nulls)?.into_series(); + let b = prepare_keys_multiple(&b_cols, join_nulls)?.into_series(); sort_or_hash_left(&a, &b, false, JoinValidation::ManyToMany, join_nulls) } diff --git a/crates/polars-ops/src/frame/mod.rs b/crates/polars-ops/src/frame/mod.rs index 5691919c8861..539d4e0cebc1 100644 --- a/crates/polars-ops/src/frame/mod.rs +++ b/crates/polars-ops/src/frame/mod.rs @@ -106,7 +106,7 @@ pub trait DataFrameOps: IntoDf { df.get_columns() .par_iter() .map(|s| match set.contains(s.name().as_str()) { - true => s.to_dummies(separator, drop_first), + true => s.as_materialized_series().to_dummies(separator, drop_first), false => Ok(s.clone().into_frame()), }) .collect::>>() diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index d909b580f87b..15753a7c49a7 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -232,7 +232,8 @@ fn pivot_impl( polars_bail!(ComputeError: "cannot use column name {column} that \ already exists in the DataFrame. Please rename it prior to calling `pivot`.") } - let columns_struct = StructChunked::from_series(column.clone(), fields) + // @scalar-opt + let columns_struct = StructChunked::from_columns(column.clone(), fields) .unwrap() .into_series(); let mut binding = pivot_df.clone(); @@ -306,13 +307,13 @@ fn pivot_impl_single_column( First => value_col.agg_first(&groups), Mean => value_col.agg_mean(&groups), Median => value_col.agg_median(&groups), - Count => groups.group_count().into_series(), + Count => groups.group_count().into_column(), Expr(ref expr) => { let name = expr.root_name()?.clone(); let mut value_col = value_col.clone(); value_col.rename(name); let tmp_df = value_col.into_frame(); - let mut aggregated = expr.evaluate(&tmp_df, &groups)?; + let mut aggregated = Column::from(expr.evaluate(&tmp_df, &groups)?); aggregated.rename(value_col_name.clone()); aggregated }, @@ -354,7 +355,7 @@ fn pivot_impl_single_column( n_cols, &row_locations, &col_locations, - &value_agg_phys, + value_agg_phys.as_materialized_series(), logical_type, &headers, ) diff --git a/crates/polars-ops/src/frame/pivot/positioning.rs b/crates/polars-ops/src/frame/pivot/positioning.rs index 51761df873b5..0e0de1083c5b 100644 --- a/crates/polars-ops/src/frame/pivot/positioning.rs +++ b/crates/polars-ops/src/frame/pivot/positioning.rs @@ -16,7 +16,7 @@ pub(super) fn position_aggregates( value_agg_phys: &Series, logical_type: &DataType, headers: &StringChunked, -) -> Vec { +) -> Vec { let mut buf = vec![AnyValue::Null; n_rows * n_cols]; let start_ptr = buf.as_mut_ptr() as usize; @@ -93,7 +93,7 @@ pub(super) fn position_aggregates( }, _ => Series::from_any_values_and_dtype(name, avs, &phys_type, false).unwrap(), }; - unsafe { out.cast_unchecked(logical_type).unwrap() } + unsafe { out.cast_unchecked(logical_type).unwrap() }.into() }) .collect::>() }) @@ -107,7 +107,7 @@ pub(super) fn position_aggregates_numeric( value_agg_phys: &ChunkedArray, logical_type: &DataType, headers: &StringChunked, -) -> Vec +) -> Vec where T: PolarsNumericType, ChunkedArray: IntoSeries, @@ -172,7 +172,7 @@ where .map(PlSmallStr::from_str) .unwrap_or_else(|| PlSmallStr::from_static("null")); let out = ChunkedArray::::from_slice_options(name, opt_values).into_series(); - unsafe { out.cast_unchecked(logical_type).unwrap() } + unsafe { out.cast_unchecked(logical_type).unwrap() }.into() }) .collect::>() }) @@ -231,7 +231,7 @@ pub(super) fn compute_col_idx( pivot_df: &DataFrame, column: &str, groups: &GroupsProxy, -) -> PolarsResult<(Vec, Series)> { +) -> PolarsResult<(Vec, Column)> { let column_s = pivot_df.column(column)?; let column_agg = unsafe { column_s.agg_first(groups) }; let column_agg_physical = column_agg.to_physical_repr(); @@ -251,11 +251,17 @@ pub(super) fn compute_col_idx( compute_col_idx_numeric(&ca) }, T::Float64 => { - let ca: &ChunkedArray = column_agg_physical.as_ref().as_ref().as_ref(); + let ca: &ChunkedArray = column_agg_physical + .as_materialized_series() + .as_ref() + .as_ref(); compute_col_idx_numeric(ca) }, T::Float32 => { - let ca: &ChunkedArray = column_agg_physical.as_ref().as_ref().as_ref(); + let ca: &ChunkedArray = column_agg_physical + .as_materialized_series() + .as_ref() + .as_ref(); compute_col_idx_numeric(ca) }, T::Struct(_) => { @@ -280,6 +286,7 @@ pub(super) fn compute_col_idx( let mut col_to_idx = PlHashMap::with_capacity(HASHMAP_INIT_SIZE); let mut idx = 0 as IdxSize; column_agg_physical + .as_materialized_series() .phys_iter() .map(|v| { let idx = *col_to_idx.entry(v).or_insert_with(|| { @@ -301,7 +308,7 @@ fn compute_row_index<'a, T>( index_agg_physical: &'a ChunkedArray, count: usize, logical_type: &DataType, -) -> (Vec, usize, Option>) +) -> (Vec, usize, Option>) where T: PolarsDataType, T::Physical<'a>: TotalHash + TotalEq + Copy + ToTotalOrd, @@ -337,7 +344,7 @@ where .into_series(); s.rename(index[0].clone()); let s = restore_logical_type(&s, logical_type); - Some(vec![s]) + Some(vec![s.into()]) }, _ => None, }; @@ -350,7 +357,7 @@ fn compute_row_index_struct( index_agg: &Series, index_agg_physical: &BinaryOffsetChunked, count: usize, -) -> (Vec, usize, Option>) { +) -> (Vec, usize, Option>) { let mut row_to_idx = PlIndexMap::with_capacity_and_hasher(HASHMAP_INIT_SIZE, Default::default()); let mut idx = 0 as IdxSize; @@ -382,7 +389,7 @@ fn compute_row_index_struct( // 0 and `index_agg.len() - 1`. let mut s = unsafe { index_agg.take_slice_unchecked(&unique_indices) }; s.rename(index[0].clone()); - Some(vec![s]) + Some(vec![s.into()]) }, _ => None, }; @@ -396,7 +403,7 @@ pub(super) fn compute_row_idx( index: &[PlSmallStr], groups: &GroupsProxy, count: usize, -) -> PolarsResult<(Vec, usize, Option>)> { +) -> PolarsResult<(Vec, usize, Option>)> { let (row_locations, n_rows, row_index) = if index.len() == 1 { let index_s = pivot_df.column(&index[0])?; let index_agg = unsafe { index_s.agg_first(groups) }; @@ -417,11 +424,17 @@ pub(super) fn compute_row_idx( compute_row_index(index, &ca, count, index_s.dtype()) }, T::Float64 => { - let ca: &ChunkedArray = index_agg_physical.as_ref().as_ref().as_ref(); + let ca: &ChunkedArray = index_agg_physical + .as_materialized_series() + .as_ref() + .as_ref(); compute_row_index(index, ca, count, index_s.dtype()) }, T::Float32 => { - let ca: &ChunkedArray = index_agg_physical.as_ref().as_ref().as_ref(); + let ca: &ChunkedArray = index_agg_physical + .as_materialized_series() + .as_ref() + .as_ref(); compute_row_index(index, ca, count, index_s.dtype()) }, T::Boolean => { @@ -431,7 +444,7 @@ pub(super) fn compute_row_idx( T::Struct(_) => { let ca = index_agg_physical.struct_().unwrap(); let ca = ca.get_row_encoded(Default::default())?; - compute_row_index_struct(index, &index_agg, &ca, count) + compute_row_index_struct(index, index_agg.as_materialized_series(), &ca, count) }, T::String => { let ca = index_agg_physical.str().unwrap(); @@ -442,6 +455,7 @@ pub(super) fn compute_row_idx( PlIndexMap::with_capacity_and_hasher(HASHMAP_INIT_SIZE, Default::default()); let mut idx = 0 as IdxSize; let row_locations = index_agg_physical + .as_materialized_series() .phys_iter() .map(|v| { let idx = *row_to_idx.entry(v).or_insert_with(|| { @@ -460,7 +474,7 @@ pub(super) fn compute_row_idx( row_to_idx.into_iter().map(|(k, _)| k).collect::>(), ); let s = restore_logical_type(&s, index_s.dtype()); - Some(vec![s]) + Some(vec![Column::from(s)]) }, _ => None, }; @@ -472,7 +486,7 @@ pub(super) fn compute_row_idx( let binding = pivot_df.select(index.iter().cloned())?; let fields = binding.get_columns(); let index_struct_series = - StructChunked::from_series(PlSmallStr::from_static("placeholder"), fields)? + StructChunked::from_columns(PlSmallStr::from_static("placeholder"), fields)? .into_series(); let index_agg = unsafe { index_struct_series.agg_first(groups) }; let index_agg_physical = index_agg.to_physical_repr(); @@ -486,7 +500,8 @@ pub(super) fn compute_row_idx( polars_ensure!(ca.null_count() == 0, InvalidOperation: "outer nullability in struct pivot not yet supported"); - Ok(ca.fields_as_series()) + // @scalar-opt + Ok(ca.fields_as_series().into_iter().map(Column::from).collect()) }).transpose()?; (row_locations, n_rows, row_index) }; diff --git a/crates/polars-ops/src/frame/pivot/unpivot.rs b/crates/polars-ops/src/frame/pivot/unpivot.rs index a9255bdede0e..89c38e88c37b 100644 --- a/crates/polars-ops/src/frame/pivot/unpivot.rs +++ b/crates/polars-ops/src/frame/pivot/unpivot.rs @@ -1,6 +1,7 @@ use arrow::array::{MutableArray, MutablePlString}; use arrow::legacy::kernels::concatenate::concatenate_owned_unchecked; use polars_core::datatypes::{DataType, PlSmallStr}; +use polars_core::frame::column::Column; use polars_core::frame::DataFrame; use polars_core::prelude::{IntoVec, Series, UnpivotArgsIR}; use polars_core::utils::try_get_supertype; @@ -96,8 +97,8 @@ pub trait UnpivotDF: IntoDf { if self_.get_columns().is_empty() { return DataFrame::new(vec![ - Series::new_empty(variable_name, &DataType::String), - Series::new_empty(value_name, &DataType::Null), + Column::new_empty(variable_name, &DataType::String), + Column::new_empty(value_name, &DataType::Null), ]); } @@ -107,8 +108,8 @@ pub trait UnpivotDF: IntoDf { if on.is_empty() { // return empty frame if there are no columns available to use as value vars if index.len() == self_.width() { - let variable_col = Series::new_empty(variable_name, &DataType::String); - let value_col = Series::new_empty(value_name, &DataType::Null); + let variable_col = Column::new_empty(variable_name, &DataType::String); + let value_col = Column::new_empty(value_name, &DataType::Null); let mut out = self_.select(index).unwrap().clear().take_columns(); out.push(variable_col); @@ -167,13 +168,14 @@ pub trait UnpivotDF: IntoDf { let value_col = col.cast(&st).map_err( |_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}", col.dtype()), )?; - values.extend_from_slice(value_col.chunks()) + values.extend_from_slice(value_col.as_materialized_series().chunks()) } let values_arr = concatenate_owned_unchecked(&values)?; // SAFETY: // The give dtype is correct let values = - unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) }; + unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) } + .into(); let variable_col = variable_col.as_box(); // SAFETY: @@ -184,7 +186,8 @@ pub trait UnpivotDF: IntoDf { vec![variable_col], &DataType::String, ) - }; + } + .into(); ids.hstack_mut(&[variables, values])?; diff --git a/crates/polars-ops/src/series/ops/cut.rs b/crates/polars-ops/src/series/ops/cut.rs index cba643cf98e9..52cc2ee5a67a 100644 --- a/crates/polars-ops/src/series/ops/cut.rs +++ b/crates/polars-ops/src/series/ops/cut.rs @@ -57,13 +57,13 @@ fn map_cats( }, }); - let outvals = vec![ + let outvals = [ brk_vals.finish().into_series(), bld.finish() ._with_fast_unique(label_has_value.iter().all(bool::clone)) .into_series(), ]; - Ok(StructChunked::from_series(out_name, &outvals)?.into_series()) + Ok(StructChunked::from_series(out_name, outvals.iter())?.into_series()) } else { Ok(bld .drain_iter_and_finish(s_iter.map(|opt| { diff --git a/crates/polars-ops/src/series/ops/duration.rs b/crates/polars-ops/src/series/ops/duration.rs index 1d5868260e64..bdfb17114459 100644 --- a/crates/polars-ops/src/series/ops/duration.rs +++ b/crates/polars-ops/src/series/ops/duration.rs @@ -1,11 +1,11 @@ use arrow::temporal_conversions::{MICROSECONDS, MILLISECONDS, NANOSECONDS, SECONDS_IN_DAY}; use polars_core::datatypes::{AnyValue, DataType, TimeUnit}; -use polars_core::prelude::Series; +use polars_core::prelude::Column; use polars_error::PolarsResult; -pub fn impl_duration(s: &[Series], time_unit: TimeUnit) -> PolarsResult { +pub fn impl_duration(s: &[Column], time_unit: TimeUnit) -> PolarsResult { if s.iter().any(|s| s.is_empty()) { - return Ok(Series::new_empty( + return Ok(Column::new_empty( s[0].name().clone(), &DataType::Duration(time_unit), )); @@ -21,8 +21,8 @@ pub fn impl_duration(s: &[Series], time_unit: TimeUnit) -> PolarsResult let mut microseconds = s[6].cast(&DataType::Int64).unwrap(); let mut nanoseconds = s[7].cast(&DataType::Int64).unwrap(); - let is_scalar = |s: &Series| s.len() == 1; - let is_zero_scalar = |s: &Series| is_scalar(s) && s.get(0).unwrap() == AnyValue::Int64(0); + let is_scalar = |s: &Column| s.len() == 1; + let is_zero_scalar = |s: &Column| is_scalar(s) && s.get(0).unwrap() == AnyValue::Int64(0); // Process subseconds let max_len = s.iter().map(|s| s.len()).max().unwrap(); @@ -35,7 +35,7 @@ pub fn impl_duration(s: &[Series], time_unit: TimeUnit) -> PolarsResult microseconds = (microseconds + (nanoseconds.wrapping_trunc_div_scalar(1_000)))?; } if !is_zero_scalar(&milliseconds) { - microseconds = (microseconds + (milliseconds * 1_000))?; + microseconds = (microseconds + (milliseconds * 1_000)?)?; } microseconds }, @@ -44,10 +44,10 @@ pub fn impl_duration(s: &[Series], time_unit: TimeUnit) -> PolarsResult nanoseconds = nanoseconds.new_from_index(0, max_len); } if !is_zero_scalar(µseconds) { - nanoseconds = (nanoseconds + (microseconds * 1_000))?; + nanoseconds = (nanoseconds + (microseconds * 1_000)?)?; } if !is_zero_scalar(&milliseconds) { - nanoseconds = (nanoseconds + (milliseconds * 1_000_000))?; + nanoseconds = (nanoseconds + (milliseconds * 1_000_000)?)?; } nanoseconds }, @@ -72,20 +72,22 @@ pub fn impl_duration(s: &[Series], time_unit: TimeUnit) -> PolarsResult TimeUnit::Milliseconds => MILLISECONDS, }; if !is_zero_scalar(&seconds) { - duration = (duration + seconds * multiplier)?; + duration = ((duration + seconds)? * multiplier)?; } if !is_zero_scalar(&minutes) { - duration = (duration + minutes * (multiplier * 60))?; + duration = ((duration + minutes)? * (multiplier * 60))?; } if !is_zero_scalar(&hours) { - duration = (duration + hours * (multiplier * 60 * 60))?; + duration = ((duration + hours)? * (multiplier * 60 * 60))?; } if !is_zero_scalar(&days) { - duration = (duration + days * (multiplier * SECONDS_IN_DAY))?; + duration = ((duration + days)? * (multiplier * SECONDS_IN_DAY))?; } if !is_zero_scalar(&weeks) { - duration = (duration + weeks * (multiplier * SECONDS_IN_DAY * 7))?; + duration = ((duration + weeks)? * (multiplier * SECONDS_IN_DAY * 7))?; } - duration.cast(&DataType::Duration(time_unit)) + duration + .cast(&DataType::Duration(time_unit)) + .map(Column::from) } diff --git a/crates/polars-ops/src/series/ops/fused.rs b/crates/polars-ops/src/series/ops/fused.rs index 16b06f76c479..8132eda7c22a 100644 --- a/crates/polars-ops/src/series/ops/fused.rs +++ b/crates/polars-ops/src/series/ops/fused.rs @@ -41,17 +41,20 @@ fn fma_ca( ChunkedArray::from_chunk_iter(a.name().clone(), chunks) } -pub fn fma_series(a: &Series, b: &Series, c: &Series) -> Series { +pub fn fma_columns(a: &Column, b: &Column, c: &Column) -> Column { if a.len() == b.len() && a.len() == c.len() { with_match_physical_numeric_polars_type!(a.dtype(), |$T| { - let a: &ChunkedArray<$T> = a.as_ref().as_ref().as_ref(); - let b: &ChunkedArray<$T> = b.as_ref().as_ref().as_ref(); - let c: &ChunkedArray<$T> = c.as_ref().as_ref().as_ref(); + let a: &ChunkedArray<$T> = a.as_materialized_series().as_ref().as_ref().as_ref(); + let b: &ChunkedArray<$T> = b.as_materialized_series().as_ref().as_ref().as_ref(); + let c: &ChunkedArray<$T> = c.as_materialized_series().as_ref().as_ref().as_ref(); - fma_ca(a, b, c).into_series() + fma_ca(a, b, c).into_column() }) } else { - (a + &(b * c).unwrap()).unwrap() + (a.as_materialized_series() + + &(b.as_materialized_series() * c.as_materialized_series()).unwrap()) + .unwrap() + .into() } } @@ -92,17 +95,20 @@ fn fsm_ca( ChunkedArray::from_chunk_iter(a.name().clone(), chunks) } -pub fn fsm_series(a: &Series, b: &Series, c: &Series) -> Series { +pub fn fsm_columns(a: &Column, b: &Column, c: &Column) -> Column { if a.len() == b.len() && a.len() == c.len() { with_match_physical_numeric_polars_type!(a.dtype(), |$T| { - let a: &ChunkedArray<$T> = a.as_ref().as_ref().as_ref(); - let b: &ChunkedArray<$T> = b.as_ref().as_ref().as_ref(); - let c: &ChunkedArray<$T> = c.as_ref().as_ref().as_ref(); + let a: &ChunkedArray<$T> = a.as_materialized_series().as_ref().as_ref().as_ref(); + let b: &ChunkedArray<$T> = b.as_materialized_series().as_ref().as_ref().as_ref(); + let c: &ChunkedArray<$T> = c.as_materialized_series().as_ref().as_ref().as_ref(); - fsm_ca(a, b, c).into_series() + fsm_ca(a, b, c).into_column() }) } else { - (a - &(b * c).unwrap()).unwrap() + (a.as_materialized_series() + - &(b.as_materialized_series() * c.as_materialized_series()).unwrap()) + .unwrap() + .into() } } @@ -142,16 +148,19 @@ fn fms_ca( ChunkedArray::from_chunk_iter(a.name().clone(), chunks) } -pub fn fms_series(a: &Series, b: &Series, c: &Series) -> Series { +pub fn fms_columns(a: &Column, b: &Column, c: &Column) -> Column { if a.len() == b.len() && a.len() == c.len() { with_match_physical_numeric_polars_type!(a.dtype(), |$T| { - let a: &ChunkedArray<$T> = a.as_ref().as_ref().as_ref(); - let b: &ChunkedArray<$T> = b.as_ref().as_ref().as_ref(); - let c: &ChunkedArray<$T> = c.as_ref().as_ref().as_ref(); + let a: &ChunkedArray<$T> = a.as_materialized_series().as_ref().as_ref().as_ref(); + let b: &ChunkedArray<$T> = b.as_materialized_series().as_ref().as_ref().as_ref(); + let c: &ChunkedArray<$T> = c.as_materialized_series().as_ref().as_ref().as_ref(); - fms_ca(a, b, c).into_series() + fms_ca(a, b, c).into_column() }) } else { - (&(a * b).unwrap() - c).unwrap() + (&(a.as_materialized_series() * b.as_materialized_series()).unwrap() + - c.as_materialized_series()) + .unwrap() + .into() } } diff --git a/crates/polars-ops/src/series/ops/horizontal.rs b/crates/polars-ops/src/series/ops/horizontal.rs index 4412e2aa21d1..53a392f920df 100644 --- a/crates/polars-ops/src/series/ops/horizontal.rs +++ b/crates/polars-ops/src/series/ops/horizontal.rs @@ -1,31 +1,35 @@ use polars_core::frame::NullStrategy; use polars_core::prelude::*; -pub fn max_horizontal(s: &[Series]) -> PolarsResult> { +pub fn max_horizontal(s: &[Column]) -> PolarsResult> { let df = unsafe { DataFrame::new_no_checks(Vec::from(s)) }; df.max_horizontal() + .map(|s| s.map(Column::from)) .map(|opt_s| opt_s.map(|res| res.with_name(s[0].name().clone()))) } -pub fn min_horizontal(s: &[Series]) -> PolarsResult> { +pub fn min_horizontal(s: &[Column]) -> PolarsResult> { let df = unsafe { DataFrame::new_no_checks(Vec::from(s)) }; df.min_horizontal() + .map(|s| s.map(Column::from)) .map(|opt_s| opt_s.map(|res| res.with_name(s[0].name().clone()))) } -pub fn sum_horizontal(s: &[Series]) -> PolarsResult> { +pub fn sum_horizontal(s: &[Column]) -> PolarsResult> { let df = unsafe { DataFrame::new_no_checks(Vec::from(s)) }; df.sum_horizontal(NullStrategy::Ignore) + .map(|s| s.map(Column::from)) .map(|opt_s| opt_s.map(|res| res.with_name(s[0].name().clone()))) } -pub fn mean_horizontal(s: &[Series]) -> PolarsResult> { +pub fn mean_horizontal(s: &[Column]) -> PolarsResult> { let df = unsafe { DataFrame::new_no_checks(Vec::from(s)) }; df.mean_horizontal(NullStrategy::Ignore) + .map(|s| s.map(Column::from)) .map(|opt_s| opt_s.map(|res| res.with_name(s[0].name().clone()))) } -pub fn coalesce_series(s: &[Series]) -> PolarsResult { +pub fn coalesce_columns(s: &[Column]) -> PolarsResult { // TODO! this can be faster if we have more than two inputs. polars_ensure!(!s.is_empty(), NoData: "cannot coalesce empty list"); let mut out = s[0].clone(); @@ -34,7 +38,10 @@ pub fn coalesce_series(s: &[Series]) -> PolarsResult { return Ok(out); } else { let mask = out.is_not_null(); - out = out.zip_with_same_type(&mask, s)?; + out = out + .as_materialized_series() + .zip_with_same_type(&mask, s.as_materialized_series())? + .into(); } } Ok(out) diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs index 06a8378055da..328d67763cfb 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs @@ -263,29 +263,29 @@ where } } -pub fn interpolate_by(s: &Series, by: &Series, by_is_sorted: bool) -> PolarsResult { +pub fn interpolate_by(s: &Column, by: &Column, by_is_sorted: bool) -> PolarsResult { polars_ensure!(s.len() == by.len(), InvalidOperation: "`by` column must be the same length as Series ({}), got {}", s.len(), by.len()); fn func( ca: &ChunkedArray, by: &ChunkedArray, is_sorted: bool, - ) -> PolarsResult + ) -> PolarsResult where T: PolarsNumericType, F: PolarsNumericType, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { if is_sorted { interpolate_impl_by_sorted(ca, by, |y_start, y_end, x, out| unsafe { signed_interp_by_sorted(y_start, y_end, x, out) }) - .map(|x| x.into_series()) + .map(|x| x.into_column()) } else { interpolate_impl_by(ca, by, |y_start, y_end, x, out, sorting_indices| unsafe { signed_interp_by(y_start, y_end, x, out, sorting_indices) }) - .map(|x| x.into_series()) + .map(|x| x.into_column()) } } diff --git a/crates/polars-ops/src/series/ops/not.rs b/crates/polars-ops/src/series/ops/not.rs index 2bb153166254..b6abf1559dce 100644 --- a/crates/polars-ops/src/series/ops/not.rs +++ b/crates/polars-ops/src/series/ops/not.rs @@ -9,8 +9,8 @@ pub fn negate_bitwise(s: &Series) -> PolarsResult { DataType::Boolean => Ok(s.bool().unwrap().not().into_series()), dt if dt.is_integer() => { with_match_physical_integer_polars_type!(dt, |$T| { - let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); - Ok(ca.apply_values(|v| !v).into_series()) + let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); + Ok(ca.apply_values(|v| !v).into_series()) }) }, dt => polars_bail!(InvalidOperation: "dtype {:?} not supported in 'not' operation", dt), diff --git a/crates/polars-ops/src/series/ops/replace.rs b/crates/polars-ops/src/series/ops/replace.rs index ff9f8f18760d..f2d8f8128777 100644 --- a/crates/polars-ops/src/series/ops/replace.rs +++ b/crates/polars-ops/src/series/ops/replace.rs @@ -179,7 +179,10 @@ fn replace_by_multiple( }, )?; - let replaced = joined.column("__POLARS_REPLACE_NEW").unwrap(); + let replaced = joined + .column("__POLARS_REPLACE_NEW") + .unwrap() + .as_materialized_series(); if replaced.null_count() == 0 { return Ok(replaced.clone()); @@ -226,7 +229,7 @@ fn replace_by_multiple_strict(s: &Series, old: Series, new: Series) -> PolarsRes .unwrap(); ensure_all_replaced(mask, s, old_has_null, false)?; - Ok(replaced.clone()) + Ok(replaced.as_materialized_series().clone()) } // Build replacer dataframe. @@ -235,11 +238,12 @@ fn create_replacer(mut old: Series, mut new: Series, add_mask: bool) -> PolarsRe new.rename(PlSmallStr::from_static("__POLARS_REPLACE_NEW")); let cols = if add_mask { - let mask = Series::new(PlSmallStr::from_static("__POLARS_REPLACE_MASK"), &[true]) + // @scalar-opt + let mask = Column::new(PlSmallStr::from_static("__POLARS_REPLACE_MASK"), &[true]) .new_from_index(0, new.len()); - vec![old, new, mask] + vec![old.into(), new.into(), mask] } else { - vec![old, new] + vec![old.into(), new.into()] }; let out = unsafe { DataFrame::new_no_checks(cols) }; Ok(out) diff --git a/crates/polars-ops/src/series/ops/rle.rs b/crates/polars-ops/src/series/ops/rle.rs index 8659512673f1..9277913558a2 100644 --- a/crates/polars-ops/src/series/ops/rle.rs +++ b/crates/polars-ops/src/series/ops/rle.rs @@ -2,14 +2,16 @@ use polars_core::prelude::*; use polars_core::series::IsSorted; /// Get the lengths of runs of identical values. -pub fn rle(s: &Series) -> PolarsResult { +pub fn rle(s: &Column) -> PolarsResult { let (s1, s2) = (s.slice(0, s.len() - 1), s.slice(1, s.len())); - let s_neq = s1.not_equal_missing(&s2)?; + let s_neq = s1 + .as_materialized_series() + .not_equal_missing(s2.as_materialized_series())?; let n_runs = s_neq.sum().ok_or_else(|| polars_err!(InvalidOperation: "could not evaluate 'rle_id' on series of dtype: {}", s.dtype()))? + 1; let mut lengths = Vec::::with_capacity(n_runs as usize); lengths.push(1); - let mut vals = Series::new_empty(PlSmallStr::from_static("value"), s.dtype()); + let mut vals = Column::new_empty(PlSmallStr::from_static("value"), s.dtype()); let vals = vals.extend(&s.head(Some(1)))?.extend(&s2.filter(&s_neq)?)?; let mut idx = 0; @@ -26,19 +28,21 @@ pub fn rle(s: &Series) -> PolarsResult { } let outvals = vec![ - Series::from_vec(PlSmallStr::from_static("len"), lengths), + Series::from_vec(PlSmallStr::from_static("len"), lengths).into(), vals.to_owned(), ]; - Ok(StructChunked::from_series(s.name().clone(), &outvals)?.into_series()) + Ok(StructChunked::from_columns(s.name().clone(), &outvals)?.into_column()) } /// Similar to `rle`, but maps values to run IDs. -pub fn rle_id(s: &Series) -> PolarsResult { - if s.len() == 0 { - return Ok(Series::new_empty(s.name().clone(), &IDX_DTYPE)); +pub fn rle_id(s: &Column) -> PolarsResult { + if s.is_empty() { + return Ok(Column::new_empty(s.name().clone(), &IDX_DTYPE)); } let (s1, s2) = (s.slice(0, s.len() - 1), s.slice(1, s.len())); - let s_neq = s1.not_equal_missing(&s2)?; + let s_neq = s1 + .as_materialized_series() + .not_equal_missing(s2.as_materialized_series())?; let mut out = Vec::::with_capacity(s.len()); let mut last = 0; @@ -52,5 +56,5 @@ pub fn rle_id(s: &Series) -> PolarsResult { } Ok(IdxCa::from_vec(s.name().clone(), out) .with_sorted_flag(IsSorted::Ascending) - .into_series()) + .into_column()) } diff --git a/crates/polars-ops/src/series/ops/to_dummies.rs b/crates/polars-ops/src/series/ops/to_dummies.rs index 3cd9d426ac1d..437f49dad480 100644 --- a/crates/polars-ops/src/series/ops/to_dummies.rs +++ b/crates/polars-ops/src/series/ops/to_dummies.rs @@ -42,7 +42,7 @@ impl ToDummies for Series { dummies_helper_slice(offset, len, self.len(), name) }, }; - ca.into_series() + ca.into_column() }) .collect(); @@ -77,7 +77,7 @@ fn dummies_helper_slice( ChunkedArray::from_vec(name, av) } -fn sort_columns(mut columns: Vec) -> Vec { +fn sort_columns(mut columns: Vec) -> Vec { columns.sort_by(|a, b| a.name().partial_cmp(b.name()).unwrap()); columns } diff --git a/crates/polars-ops/src/series/ops/various.rs b/crates/polars-ops/src/series/ops/various.rs index 9ad21ab617d3..c29fcc431c98 100644 --- a/crates/polars-ops/src/series/ops/various.rs +++ b/crates/polars-ops/src/series/ops/various.rs @@ -27,19 +27,19 @@ pub trait SeriesMethods: SeriesSealed { ); // we need to sort here as well in case of `maintain_order` because duplicates behavior is undefined let groups = s.group_tuples(parallel, sort)?; - let values = unsafe { s.agg_first(&groups) }; + let values = unsafe { s.agg_first(&groups) }.into(); let counts = groups.group_count().with_name(name.clone()); let counts = if normalize { let len = s.len() as f64; let counts: Float64Chunked = unary_elementwise_values(&counts, |count| count as f64 / len); - counts.into_series() + counts.into_column() } else { - counts.into_series() + counts.into_column() }; - let cols = vec![values, counts.into_series()]; + let cols = vec![values, counts]; let df = unsafe { DataFrame::new_no_checks(cols) }; if sort { df.sort( @@ -95,7 +95,7 @@ pub trait SeriesMethods: SeriesSealed { if matches!(s.dtype(), DataType::Struct(_)) { let encoded = _get_rows_encoded_ca( PlSmallStr::EMPTY, - &[s.clone()], + &[s.clone().into()], &[options.descending], &[options.nulls_last], )?; diff --git a/crates/polars-pipe/src/executors/operators/projection.rs b/crates/polars-pipe/src/executors/operators/projection.rs index 67141d0c44a7..9ae6dbc5299d 100644 --- a/crates/polars-pipe/src/executors/operators/projection.rs +++ b/crates/polars-pipe/src/executors/operators/projection.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use polars_core::error::PolarsResult; +use polars_core::frame::column::{Column, IntoColumn}; use polars_core::frame::DataFrame; use polars_core::schema::SchemaRef; use polars_plan::prelude::ProjectionOptions; @@ -70,7 +71,7 @@ impl Operator for ProjectionOperator { has_literals |= s.len() == 1; has_empty |= s.len() == 0; - Ok(s) + Ok(s.into_column()) }) .collect::>>()?; @@ -117,7 +118,10 @@ impl Operator for HstackOperator { let projected = self .exprs .iter() - .map(|e| e.evaluate(chunk, &context.execution_state)) + .map(|e| { + e.evaluate(chunk, &context.execution_state) + .map(Column::from) + }) .collect::>>()?; let columns = chunk.data.get_columns()[..width].to_vec(); diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/global.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/global.rs index 4488a6faad82..afa67eb80300 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/global.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/global.rs @@ -131,7 +131,7 @@ impl GlobalTable { hashes: &[u64], chunk_indexes: &[IdxSize], keys: &BinaryArray, - agg_cols: &[Series], + agg_cols: &[Column], ) { debug_assert_eq!(hashes.len(), chunk_indexes.len()); debug_assert_eq!(hashes.len(), keys.len()); @@ -168,7 +168,14 @@ impl GlobalTable { let keys = payload.keys(); let chunk_indexes = payload.chunk_index(); let agg_cols = payload.cols(); - self.process_partition_impl(&mut hash_map, hashes, chunk_indexes, keys, agg_cols); + + // @scalar-opt + let agg_cols = agg_cols + .iter() + .map(|v| v.clone().into_column()) + .collect::>(); + + self.process_partition_impl(&mut hash_map, hashes, chunk_indexes, keys, &agg_cols); } } } diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs index 3e57db331b3e..05947baae209 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs @@ -268,12 +268,16 @@ impl AggHashTable { unsafe { polars_row::decode::decode_rows(&mut key_rows, &fields, &key_dtypes) }; let mut cols = Vec::with_capacity(self.num_keys + self.agg_constructors.len()); + cols.extend(key_columns.into_iter().map(|arr| { + Series::try_from((PlSmallStr::EMPTY, arr)) + .unwrap() + .into_column() + })); cols.extend( - key_columns + agg_builders .into_iter() - .map(|arr| Series::try_from((PlSmallStr::EMPTY, arr)).unwrap()), + .map(|buf| buf.into_series().into_column()), ); - cols.extend(agg_builders.into_iter().map(|buf| buf.into_series())); physical_agg_to_logical(&mut cols, &self.output_schema); unsafe { DataFrame::new_no_checks(cols) } } diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/mod.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/mod.rs index 55244679e204..e9fa7ba495cd 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/mod.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/mod.rs @@ -75,23 +75,24 @@ impl SpillPayload { debug_assert_eq!(self.hashes.len(), self.keys.len()); let hashes = - UInt64Chunked::from_vec(PlSmallStr::from_static(HASH_COL), self.hashes).into_series(); + UInt64Chunked::from_vec(PlSmallStr::from_static(HASH_COL), self.hashes).into_column(); let chunk_idx = - IdxCa::from_vec(PlSmallStr::from_static(INDEX_COL), self.chunk_idx).into_series(); + IdxCa::from_vec(PlSmallStr::from_static(INDEX_COL), self.chunk_idx).into_column(); let keys = BinaryOffsetChunked::with_chunk(PlSmallStr::from_static(KEYS_COL), self.keys) - .into_series(); + .into_column(); let mut cols = Vec::with_capacity(self.aggs.len() + 3); cols.push(hashes); cols.push(chunk_idx); cols.push(keys); - cols.extend(self.aggs); + // @scalar-opt + cols.extend(self.aggs.into_iter().map(Column::from)); unsafe { DataFrame::new_no_checks(cols) } } fn spilled_to_columns( spilled: &DataFrame, - ) -> (&[u64], &[IdxSize], &BinaryArray, &[Series]) { + ) -> (&[u64], &[IdxSize], &BinaryArray, &[Column]) { let cols = spilled.get_columns(); let hashes = cols[0].u64().unwrap(); let hashes = hashes.cont_slice().unwrap(); diff --git a/crates/polars-pipe/src/executors/sinks/group_by/mod.rs b/crates/polars-pipe/src/executors/sinks/group_by/mod.rs index 7a999e7e7cc7..b8478dd9eb7e 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/mod.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/mod.rs @@ -13,7 +13,7 @@ use polars_core::using_string_cache; pub(crate) use primitive::*; pub(crate) use string::*; -pub(super) fn physical_agg_to_logical(cols: &mut [Series], output_schema: &Schema) { +pub(super) fn physical_agg_to_logical(cols: &mut [Column], output_schema: &Schema) { for (s, (name, dtype)) in cols.iter_mut().zip(output_schema.iter()) { if s.name() != name { s.rename(name.clone()); @@ -32,7 +32,7 @@ pub(super) fn physical_agg_to_logical(cols: &mut [Series], output_schema: &Schem matches!(dt, DataType::Enum(_, _)), *ordering, ) - .into_series() + .into_column() } } else { let cats = s.u32().unwrap().clone(); @@ -40,7 +40,7 @@ pub(super) fn physical_agg_to_logical(cols: &mut [Series], output_schema: &Schem // SAFETY, we go from logical to primitive back to logical so the categoricals should still match the global map. *s = unsafe { CategoricalChunked::from_global_indices_unchecked(cats, *ordering) - .into_series() + .into_column() }; } else { // we set the global string cache once we start a streaming pipeline diff --git a/crates/polars-pipe/src/executors/sinks/group_by/primitive/mod.rs b/crates/polars-pipe/src/executors/sinks/group_by/primitive/mod.rs index d20ab9bf2b0d..8442b8a9cd7e 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/primitive/mod.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/primitive/mod.rs @@ -205,8 +205,12 @@ where ); let mut cols = Vec::with_capacity(1 + self.number_of_aggs()); - cols.push(key_builder.finish().into_series()); - cols.extend(buffers.into_iter().map(|buf| buf.into_series())); + cols.push(key_builder.finish().into_series().into_column()); + cols.extend( + buffers + .into_iter() + .map(|buf| buf.into_series().into_column()), + ); physical_agg_to_logical(&mut cols, &self.output_schema); Some(unsafe { DataFrame::new_no_checks(cols) }) }) diff --git a/crates/polars-pipe/src/executors/sinks/group_by/string.rs b/crates/polars-pipe/src/executors/sinks/group_by/string.rs index 0a66255f6e7c..d2fec9c16173 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/string.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/string.rs @@ -209,8 +209,12 @@ impl StringGroupbySink { ); let mut cols = Vec::with_capacity(1 + self.number_of_aggs()); - cols.push(key_builder.finish().into_series()); - cols.extend(buffers.into_iter().map(|buf| buf.into_series())); + cols.push(key_builder.finish().into_series().into_column()); + cols.extend( + buffers + .into_iter() + .map(|buf| buf.into_series().into_column()), + ); physical_agg_to_logical(&mut cols, &self.output_schema); Some(unsafe { DataFrame::new_no_checks(cols) }) }) diff --git a/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs b/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs index 0157fe660de5..2ab417ad2096 100644 --- a/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs +++ b/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs @@ -268,7 +268,7 @@ impl GenericFullOuterJoinProbe { right_df .get_columns() .iter() - .map(|s| Series::full_null(s.name().clone(), size, s.dtype())) + .map(|s| Column::full_null(s.name().clone(), size, s.dtype())) .collect(), ) }; diff --git a/crates/polars-pipe/src/executors/sinks/sort/ooc.rs b/crates/polars-pipe/src/executors/sinks/sort/ooc.rs index 64acfa30a5db..1c04f67a34a9 100644 --- a/crates/polars-pipe/src/executors/sinks/sort/ooc.rs +++ b/crates/polars-pipe/src/executors/sinks/sort/ooc.rs @@ -168,7 +168,8 @@ pub(super) fn sort_ooc( let df = read_df(&path)?; let sort_col = &df.get_columns()[idx]; - let assigned_parts = det_partitions(sort_col, &samples, descending); + let assigned_parts = + det_partitions(sort_col.as_materialized_series(), &samples, descending); // partition the dataframe into proper buckets let (iter, unique_assigned_parts) = diff --git a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs index 053ccb1f1999..f08609ab9e21 100644 --- a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs +++ b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs @@ -100,7 +100,8 @@ fn finalize_dataframe( let (name, logical_dtype) = schema.get_at_index(sort_idx).unwrap(); assert_eq!(logical_dtype.to_physical(), DataType::from(arr.dtype())); let col = - Series::from_chunks_and_dtype_unchecked(name.clone(), vec![arr], logical_dtype); + Series::from_chunks_and_dtype_unchecked(name.clone(), vec![arr], logical_dtype) + .into_column(); cols.insert(sort_idx, col); } } @@ -205,7 +206,7 @@ impl SortSinkMultiple { for i in self.sort_idx.iter() { let s = &cols[*i]; - let arr = _get_rows_encoded_compat_array(s)?; + let arr = _get_rows_encoded_compat_array(s.as_materialized_series())?; self.sort_column.push(arr); } diff --git a/crates/polars-pipe/src/executors/sources/csv.rs b/crates/polars-pipe/src/executors/sources/csv.rs index f3267ac1e90a..323776deb976 100644 --- a/crates/polars-pipe/src/executors/sources/csv.rs +++ b/crates/polars-pipe/src/executors/sources/csv.rs @@ -220,7 +220,7 @@ impl Source for CsvSource { // gets passed contains the column. for s in unsafe { data_chunk.data.get_columns_mut() } { if s.name() == ca.name() { - *s = ca.slice(0, s.len()).into_series(); + *s = ca.slice(0, s.len()).into_column(); break; } } diff --git a/crates/polars-pipe/src/operators/chunks.rs b/crates/polars-pipe/src/operators/chunks.rs index 1c78a32dde80..c1f63019a611 100644 --- a/crates/polars-pipe/src/operators/chunks.rs +++ b/crates/polars-pipe/src/operators/chunks.rs @@ -14,7 +14,7 @@ impl DataChunk { #[cfg(debug_assertions)] { for c in data.get_columns() { - assert_eq!(c.chunks().len(), 1); + assert_eq!(c.as_materialized_series().chunks().len(), 1); } } Self { chunk_index, data } @@ -138,7 +138,7 @@ mod test { .iter() .enumerate() .map(|(i, length)| { - let series = Series::new("val".into(), vec![i as u64; *length]); + let series = Column::new("val".into(), vec![i as u64; *length]); DataFrame::new(vec![series]).unwrap() }) .collect(); @@ -167,7 +167,13 @@ mod test { } } // Make sure all result DataFrames only have a single chunk. - assert_eq!(result_df.get_columns()[0].chunk_lengths().len(), 1); + assert_eq!( + result_df.get_columns()[0] + .as_materialized_series() + .chunk_lengths() + .len(), + 1 + ); } // Make sure the data was preserved: diff --git a/crates/polars-plan/src/dsl/array.rs b/crates/polars-plan/src/dsl/array.rs index 558a7a98a42a..a5b7db2e8437 100644 --- a/crates/polars-plan/src/dsl/array.rs +++ b/crates/polars-plan/src/dsl/array.rs @@ -164,7 +164,7 @@ impl ArrayNameSpace { move |s| { s.array()? .to_struct(name_generator.clone()) - .map(|s| Some(s.into_series())) + .map(|s| Some(s.into_column())) }, GetOutput::map_dtype(move |dt: &DataType| { let DataType::Array(inner, width) = dt else { diff --git a/crates/polars-plan/src/dsl/expr.rs b/crates/polars-plan/src/dsl/expr.rs index a8c48cd17fb8..0bbecd7e1d77 100644 --- a/crates/polars-plan/src/dsl/expr.rs +++ b/crates/polars-plan/src/dsl/expr.rs @@ -153,7 +153,7 @@ pub enum Expr { /// function arguments input: Vec, /// function to apply - function: SpecialEq>, + function: SpecialEq>, /// output dtype of the function output_type: GetOutput, options: FunctionOptions, diff --git a/crates/polars-plan/src/dsl/expr_dyn_fn.rs b/crates/polars-plan/src/dsl/expr_dyn_fn.rs index 9ac6f872eed8..e134e8b556ef 100644 --- a/crates/polars-plan/src/dsl/expr_dyn_fn.rs +++ b/crates/polars-plan/src/dsl/expr_dyn_fn.rs @@ -10,12 +10,12 @@ use serde::{Deserializer, Serializer}; use super::*; /// A wrapper trait for any closure `Fn(Vec) -> PolarsResult` -pub trait SeriesUdf: Send + Sync { +pub trait ColumnsUdf: Send + Sync { fn as_any(&self) -> &dyn std::any::Any { unimplemented!("as_any not implemented for this 'opaque' function") } - fn call_udf(&self, s: &mut [Series]) -> PolarsResult>; + fn call_udf(&self, s: &mut [Column]) -> PolarsResult>; fn try_serialize(&self, _buf: &mut Vec) -> PolarsResult<()> { polars_bail!(ComputeError: "serialization not supported for this 'opaque' function") @@ -31,7 +31,7 @@ pub trait SeriesUdf: Send + Sync { } #[cfg(feature = "serde")] -impl Serialize for SpecialEq> { +impl Serialize for SpecialEq> { fn serialize(&self, serializer: S) -> std::result::Result where S: Serializer, @@ -46,7 +46,7 @@ impl Serialize for SpecialEq> { } #[cfg(feature = "serde")] -impl<'a> Deserialize<'a> for SpecialEq> { +impl<'a> Deserialize<'a> for SpecialEq> { fn deserialize(deserializer: D) -> std::result::Result where D: Deserializer<'a>, @@ -68,6 +68,8 @@ impl<'a> Deserialize<'a> for SpecialEq> { } #[cfg(not(feature = "python"))] { + _ = deserializer; + Err(D::Error::custom( "deserialization not supported for this 'opaque' function", )) @@ -75,42 +77,42 @@ impl<'a> Deserialize<'a> for SpecialEq> { } } -impl SeriesUdf for F +impl ColumnsUdf for F where - F: Fn(&mut [Series]) -> PolarsResult> + Send + Sync, + F: Fn(&mut [Column]) -> PolarsResult> + Send + Sync, { - fn call_udf(&self, s: &mut [Series]) -> PolarsResult> { + fn call_udf(&self, s: &mut [Column]) -> PolarsResult> { self(s) } } -impl Debug for dyn SeriesUdf { +impl Debug for dyn ColumnsUdf { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "SeriesUdf") + write!(f, "ColumnUdf") } } -/// A wrapper trait for any binary closure `Fn(Series, Series) -> PolarsResult` -pub trait SeriesBinaryUdf: Send + Sync { - fn call_udf(&self, a: Series, b: Series) -> PolarsResult; +/// A wrapper trait for any binary closure `Fn(Column, Column) -> PolarsResult` +pub trait ColumnBinaryUdf: Send + Sync { + fn call_udf(&self, a: Column, b: Column) -> PolarsResult; } -impl SeriesBinaryUdf for F +impl ColumnBinaryUdf for F where - F: Fn(Series, Series) -> PolarsResult + Send + Sync, + F: Fn(Column, Column) -> PolarsResult + Send + Sync, { - fn call_udf(&self, a: Series, b: Series) -> PolarsResult { + fn call_udf(&self, a: Column, b: Column) -> PolarsResult { self(a, b) } } -impl Debug for dyn SeriesBinaryUdf { +impl Debug for dyn ColumnBinaryUdf { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "SeriesBinaryUdf") + write!(f, "ColumnBinaryUdf") } } -impl Default for SpecialEq> { +impl Default for SpecialEq> { fn default() -> Self { panic!("implementation error"); } @@ -394,6 +396,8 @@ impl<'a> Deserialize<'a> for GetOutput { } #[cfg(not(feature = "python"))] { + _ = deserializer; + Err(D::Error::custom( "deserialization not supported for this output field", )) diff --git a/crates/polars-plan/src/dsl/function_expr/abs.rs b/crates/polars-plan/src/dsl/function_expr/abs.rs index 45e99ea42648..5464f06daada 100644 --- a/crates/polars-plan/src/dsl/function_expr/abs.rs +++ b/crates/polars-plan/src/dsl/function_expr/abs.rs @@ -1,5 +1,5 @@ use super::*; -pub(super) fn abs(s: &Series) -> PolarsResult { - polars_ops::prelude::abs(s) +pub(super) fn abs(s: &Column) -> PolarsResult { + polars_ops::prelude::abs(s.as_materialized_series()).map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/arg_where.rs b/crates/polars-plan/src/dsl/function_expr/arg_where.rs index 8f77be0724bd..ab0afba55960 100644 --- a/crates/polars-plan/src/dsl/function_expr/arg_where.rs +++ b/crates/polars-plan/src/dsl/function_expr/arg_where.rs @@ -2,11 +2,11 @@ use polars_core::utils::arrow::bitmap::utils::SlicesIterator; use super::*; -pub(super) fn arg_where(s: &mut [Series]) -> PolarsResult> { +pub(super) fn arg_where(s: &mut [Column]) -> PolarsResult> { let predicate = s[0].bool()?; if predicate.is_empty() { - Ok(Some(Series::full_null( + Ok(Some(Column::full_null( predicate.name().clone(), 0, &IDX_DTYPE, @@ -37,6 +37,6 @@ pub(super) fn arg_where(s: &mut [Series]) -> PolarsResult> { total_offset += arr.len(); }); let ca = IdxCa::with_chunk(predicate.name().clone(), IdxArr::from_vec(out)); - Ok(Some(ca.into_series())) + Ok(Some(ca.into_column())) } } diff --git a/crates/polars-plan/src/dsl/function_expr/array.rs b/crates/polars-plan/src/dsl/function_expr/array.rs index 0de5e9d99883..dce6d44bce94 100644 --- a/crates/polars-plan/src/dsl/function_expr/array.rs +++ b/crates/polars-plan/src/dsl/function_expr/array.rs @@ -101,7 +101,7 @@ impl Display for ArrayFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: ArrayFunction) -> Self { use ArrayFunction::*; match func { @@ -133,101 +133,104 @@ impl From for SpecialEq> { } } -pub(super) fn max(s: &Series) -> PolarsResult { - Ok(s.array()?.array_max()) +pub(super) fn max(s: &Column) -> PolarsResult { + Ok(s.array()?.array_max().into()) } -pub(super) fn min(s: &Series) -> PolarsResult { - Ok(s.array()?.array_min()) +pub(super) fn min(s: &Column) -> PolarsResult { + Ok(s.array()?.array_min().into()) } -pub(super) fn sum(s: &Series) -> PolarsResult { - s.array()?.array_sum() +pub(super) fn sum(s: &Column) -> PolarsResult { + s.array()?.array_sum().map(Column::from) } -pub(super) fn std(s: &Series, ddof: u8) -> PolarsResult { - s.array()?.array_std(ddof) +pub(super) fn std(s: &Column, ddof: u8) -> PolarsResult { + s.array()?.array_std(ddof).map(Column::from) } -pub(super) fn var(s: &Series, ddof: u8) -> PolarsResult { - s.array()?.array_var(ddof) +pub(super) fn var(s: &Column, ddof: u8) -> PolarsResult { + s.array()?.array_var(ddof).map(Column::from) } -pub(super) fn median(s: &Series) -> PolarsResult { - s.array()?.array_median() +pub(super) fn median(s: &Column) -> PolarsResult { + s.array()?.array_median().map(Column::from) } -pub(super) fn unique(s: &Series, stable: bool) -> PolarsResult { +pub(super) fn unique(s: &Column, stable: bool) -> PolarsResult { let ca = s.array()?; let out = if stable { ca.array_unique_stable() } else { ca.array_unique() }; - out.map(|ca| ca.into_series()) + out.map(|ca| ca.into_column()) } -pub(super) fn n_unique(s: &Series) -> PolarsResult { - Ok(s.array()?.array_n_unique()?.into_series()) +pub(super) fn n_unique(s: &Column) -> PolarsResult { + Ok(s.array()?.array_n_unique()?.into_column()) } -pub(super) fn to_list(s: &Series) -> PolarsResult { +pub(super) fn to_list(s: &Column) -> PolarsResult { let list_dtype = map_array_dtype_to_list_dtype(s.dtype())?; s.cast(&list_dtype) } #[cfg(feature = "array_any_all")] -pub(super) fn any(s: &Series) -> PolarsResult { - s.array()?.array_any() +pub(super) fn any(s: &Column) -> PolarsResult { + s.array()?.array_any().map(Column::from) } #[cfg(feature = "array_any_all")] -pub(super) fn all(s: &Series) -> PolarsResult { - s.array()?.array_all() +pub(super) fn all(s: &Column) -> PolarsResult { + s.array()?.array_all().map(Column::from) } -pub(super) fn sort(s: &Series, options: SortOptions) -> PolarsResult { - Ok(s.array()?.array_sort(options)?.into_series()) +pub(super) fn sort(s: &Column, options: SortOptions) -> PolarsResult { + Ok(s.array()?.array_sort(options)?.into_column()) } -pub(super) fn reverse(s: &Series) -> PolarsResult { - Ok(s.array()?.array_reverse().into_series()) +pub(super) fn reverse(s: &Column) -> PolarsResult { + Ok(s.array()?.array_reverse().into_column()) } -pub(super) fn arg_min(s: &Series) -> PolarsResult { - Ok(s.array()?.array_arg_min().into_series()) +pub(super) fn arg_min(s: &Column) -> PolarsResult { + Ok(s.array()?.array_arg_min().into_column()) } -pub(super) fn arg_max(s: &Series) -> PolarsResult { - Ok(s.array()?.array_arg_max().into_series()) +pub(super) fn arg_max(s: &Column) -> PolarsResult { + Ok(s.array()?.array_arg_max().into_column()) } -pub(super) fn get(s: &[Series], null_on_oob: bool) -> PolarsResult { +pub(super) fn get(s: &[Column], null_on_oob: bool) -> PolarsResult { let ca = s[0].array()?; let index = s[1].cast(&DataType::Int64)?; let index = index.i64().unwrap(); - ca.array_get(index, null_on_oob) + ca.array_get(index, null_on_oob).map(Column::from) } -pub(super) fn join(s: &[Series], ignore_nulls: bool) -> PolarsResult { +pub(super) fn join(s: &[Column], ignore_nulls: bool) -> PolarsResult { let ca = s[0].array()?; let separator = s[1].str()?; - ca.array_join(separator, ignore_nulls) + ca.array_join(separator, ignore_nulls).map(Column::from) } #[cfg(feature = "is_in")] -pub(super) fn contains(s: &[Series]) -> PolarsResult { +pub(super) fn contains(s: &[Column]) -> PolarsResult { let array = &s[0]; let item = &s[1]; polars_ensure!(matches!(array.dtype(), DataType::Array(_, _)), SchemaMismatch: "invalid series dtype: expected `Array`, got `{}`", array.dtype(), ); - Ok(is_in(item, array)? - .with_name(array.name().clone()) - .into_series()) + Ok(is_in( + item.as_materialized_series(), + array.as_materialized_series(), + )? + .with_name(array.name().clone()) + .into_column()) } #[cfg(feature = "array_count")] -pub(super) fn count_matches(args: &[Series]) -> PolarsResult { +pub(super) fn count_matches(args: &[Column]) -> PolarsResult { let s = &args[0]; let element = &args[1]; polars_ensure!( @@ -237,11 +240,12 @@ pub(super) fn count_matches(args: &[Series]) -> PolarsResult { ); let ca = s.array()?; ca.array_count_matches(element.get(0).unwrap()) + .map(Column::from) } -pub(super) fn shift(s: &[Series]) -> PolarsResult { +pub(super) fn shift(s: &[Column]) -> PolarsResult { let ca = s[0].array()?; let n = &s[1]; - ca.array_shift(n) + ca.array_shift(n.as_materialized_series()).map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/binary.rs b/crates/polars-plan/src/dsl/function_expr/binary.rs index f803ba0ba952..88f3ad71b545 100644 --- a/crates/polars-plan/src/dsl/function_expr/binary.rs +++ b/crates/polars-plan/src/dsl/function_expr/binary.rs @@ -57,7 +57,7 @@ impl Display for BinaryFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: BinaryFunction) -> Self { use BinaryFunction::*; match func { @@ -83,62 +83,62 @@ impl From for SpecialEq> { } } -pub(super) fn contains(s: &[Series]) -> PolarsResult { +pub(super) fn contains(s: &[Column]) -> PolarsResult { let ca = s[0].binary()?; let lit = s[1].binary()?; Ok(ca .contains_chunked(lit) .with_name(ca.name().clone()) - .into_series()) + .into_column()) } -pub(super) fn ends_with(s: &[Series]) -> PolarsResult { +pub(super) fn ends_with(s: &[Column]) -> PolarsResult { let ca = s[0].binary()?; let suffix = s[1].binary()?; Ok(ca .ends_with_chunked(suffix) .with_name(ca.name().clone()) - .into_series()) + .into_column()) } -pub(super) fn starts_with(s: &[Series]) -> PolarsResult { +pub(super) fn starts_with(s: &[Column]) -> PolarsResult { let ca = s[0].binary()?; let prefix = s[1].binary()?; Ok(ca .starts_with_chunked(prefix) .with_name(ca.name().clone()) - .into_series()) + .into_column()) } -pub(super) fn size_bytes(s: &Series) -> PolarsResult { +pub(super) fn size_bytes(s: &Column) -> PolarsResult { let ca = s.binary()?; - Ok(ca.size_bytes().into_series()) + Ok(ca.size_bytes().into_column()) } #[cfg(feature = "binary_encoding")] -pub(super) fn hex_decode(s: &Series, strict: bool) -> PolarsResult { +pub(super) fn hex_decode(s: &Column, strict: bool) -> PolarsResult { let ca = s.binary()?; - ca.hex_decode(strict).map(|ok| ok.into_series()) + ca.hex_decode(strict).map(|ok| ok.into_column()) } #[cfg(feature = "binary_encoding")] -pub(super) fn hex_encode(s: &Series) -> PolarsResult { +pub(super) fn hex_encode(s: &Column) -> PolarsResult { let ca = s.binary()?; - Ok(ca.hex_encode()) + Ok(ca.hex_encode().into()) } #[cfg(feature = "binary_encoding")] -pub(super) fn base64_decode(s: &Series, strict: bool) -> PolarsResult { +pub(super) fn base64_decode(s: &Column, strict: bool) -> PolarsResult { let ca = s.binary()?; - ca.base64_decode(strict).map(|ok| ok.into_series()) + ca.base64_decode(strict).map(|ok| ok.into_column()) } #[cfg(feature = "binary_encoding")] -pub(super) fn base64_encode(s: &Series) -> PolarsResult { +pub(super) fn base64_encode(s: &Column) -> PolarsResult { let ca = s.binary()?; - Ok(ca.base64_encode()) + Ok(ca.base64_encode().into()) } impl From for FunctionExpr { diff --git a/crates/polars-plan/src/dsl/function_expr/boolean.rs b/crates/polars-plan/src/dsl/function_expr/boolean.rs index d00045c0d3f9..089fed3dc51b 100644 --- a/crates/polars-plan/src/dsl/function_expr/boolean.rs +++ b/crates/polars-plan/src/dsl/function_expr/boolean.rs @@ -93,7 +93,7 @@ impl Display for BooleanFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: BooleanFunction) -> Self { use BooleanFunction::*; match func { @@ -130,89 +130,99 @@ impl From for FunctionExpr { } } -fn any(s: &Series, ignore_nulls: bool) -> PolarsResult { +fn any(s: &Column, ignore_nulls: bool) -> PolarsResult { let ca = s.bool()?; if ignore_nulls { - Ok(Series::new(s.name().clone(), [ca.any()])) + Ok(Column::new(s.name().clone(), [ca.any()])) } else { - Ok(Series::new(s.name().clone(), [ca.any_kleene()])) + Ok(Column::new(s.name().clone(), [ca.any_kleene()])) } } -fn all(s: &Series, ignore_nulls: bool) -> PolarsResult { +fn all(s: &Column, ignore_nulls: bool) -> PolarsResult { let ca = s.bool()?; if ignore_nulls { - Ok(Series::new(s.name().clone(), [ca.all()])) + Ok(Column::new(s.name().clone(), [ca.all()])) } else { - Ok(Series::new(s.name().clone(), [ca.all_kleene()])) + Ok(Column::new(s.name().clone(), [ca.all_kleene()])) } } -fn is_null(s: &Series) -> PolarsResult { - Ok(s.is_null().into_series()) +fn is_null(s: &Column) -> PolarsResult { + Ok(s.is_null().into_column()) } -fn is_not_null(s: &Series) -> PolarsResult { - Ok(s.is_not_null().into_series()) +fn is_not_null(s: &Column) -> PolarsResult { + Ok(s.is_not_null().into_column()) } -fn is_finite(s: &Series) -> PolarsResult { - s.is_finite().map(|ca| ca.into_series()) +fn is_finite(s: &Column) -> PolarsResult { + s.is_finite().map(|ca| ca.into_column()) } -fn is_infinite(s: &Series) -> PolarsResult { - s.is_infinite().map(|ca| ca.into_series()) +fn is_infinite(s: &Column) -> PolarsResult { + s.is_infinite().map(|ca| ca.into_column()) } -pub(super) fn is_nan(s: &Series) -> PolarsResult { - s.is_nan().map(|ca| ca.into_series()) +pub(super) fn is_nan(s: &Column) -> PolarsResult { + s.is_nan().map(|ca| ca.into_column()) } -pub(super) fn is_not_nan(s: &Series) -> PolarsResult { - s.is_not_nan().map(|ca| ca.into_series()) +pub(super) fn is_not_nan(s: &Column) -> PolarsResult { + s.is_not_nan().map(|ca| ca.into_column()) } #[cfg(feature = "is_first_distinct")] -fn is_first_distinct(s: &Series) -> PolarsResult { - polars_ops::prelude::is_first_distinct(s).map(|ca| ca.into_series()) +fn is_first_distinct(s: &Column) -> PolarsResult { + polars_ops::prelude::is_first_distinct(s.as_materialized_series()).map(|ca| ca.into_column()) } #[cfg(feature = "is_last_distinct")] -fn is_last_distinct(s: &Series) -> PolarsResult { - polars_ops::prelude::is_last_distinct(s).map(|ca| ca.into_series()) +fn is_last_distinct(s: &Column) -> PolarsResult { + polars_ops::prelude::is_last_distinct(s.as_materialized_series()).map(|ca| ca.into_column()) } #[cfg(feature = "is_unique")] -fn is_unique(s: &Series) -> PolarsResult { - polars_ops::prelude::is_unique(s).map(|ca| ca.into_series()) +fn is_unique(s: &Column) -> PolarsResult { + polars_ops::prelude::is_unique(s.as_materialized_series()).map(|ca| ca.into_column()) } #[cfg(feature = "is_unique")] -fn is_duplicated(s: &Series) -> PolarsResult { - polars_ops::prelude::is_duplicated(s).map(|ca| ca.into_series()) +fn is_duplicated(s: &Column) -> PolarsResult { + polars_ops::prelude::is_duplicated(s.as_materialized_series()).map(|ca| ca.into_column()) } #[cfg(feature = "is_between")] -fn is_between(s: &[Series], closed: ClosedInterval) -> PolarsResult { +fn is_between(s: &[Column], closed: ClosedInterval) -> PolarsResult { let ser = &s[0]; let lower = &s[1]; let upper = &s[2]; - polars_ops::prelude::is_between(ser, lower, upper, closed).map(|ca| ca.into_series()) + polars_ops::prelude::is_between( + ser.as_materialized_series(), + lower.as_materialized_series(), + upper.as_materialized_series(), + closed, + ) + .map(|ca| ca.into_column()) } #[cfg(feature = "is_in")] -fn is_in(s: &mut [Series]) -> PolarsResult> { +fn is_in(s: &mut [Column]) -> PolarsResult> { let left = &s[0]; let other = &s[1]; - polars_ops::prelude::is_in(left, other).map(|ca| Some(ca.into_series())) + polars_ops::prelude::is_in( + left.as_materialized_series(), + other.as_materialized_series(), + ) + .map(|ca| Some(ca.into_column())) } -fn not(s: &Series) -> PolarsResult { - polars_ops::series::negate_bitwise(s) +fn not(s: &Column) -> PolarsResult { + polars_ops::series::negate_bitwise(s.as_materialized_series()).map(Column::from) } // We shouldn't hit these often only on very wide dataframes where we don't reduce to & expressions. -fn any_horizontal(s: &[Series]) -> PolarsResult { +fn any_horizontal(s: &[Column]) -> PolarsResult { let out = POOL .install(|| { s.par_iter() @@ -230,11 +240,11 @@ fn any_horizontal(s: &[Series]) -> PolarsResult { ) })? .with_name(s[0].name().clone()); - Ok(out.into_series()) + Ok(out.into_column()) } // We shouldn't hit these often only on very wide dataframes where we don't reduce to & expressions. -fn all_horizontal(s: &[Series]) -> PolarsResult { +fn all_horizontal(s: &[Column]) -> PolarsResult { let out = POOL .install(|| { s.par_iter() @@ -252,5 +262,5 @@ fn all_horizontal(s: &[Series]) -> PolarsResult { ) })? .with_name(s[0].name().clone()); - Ok(out.into_series()) + Ok(out.into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/bounds.rs b/crates/polars-plan/src/dsl/function_expr/bounds.rs index 0f14feb5675f..77c8a6f3ef5f 100644 --- a/crates/polars-plan/src/dsl/function_expr/bounds.rs +++ b/crates/polars-plan/src/dsl/function_expr/bounds.rs @@ -1,23 +1,23 @@ use super::*; -pub(super) fn upper_bound(s: &Series) -> PolarsResult { +pub(super) fn upper_bound(s: &Column) -> PolarsResult { let name = s.name().clone(); use DataType::*; let s = match s.dtype().to_physical() { #[cfg(feature = "dtype-i8")] - Int8 => Series::new(name, &[i8::MAX]), + Int8 => Column::new_scalar(name, Scalar::from(i8::MAX), 1), #[cfg(feature = "dtype-i16")] - Int16 => Series::new(name, &[i16::MAX]), - Int32 => Series::new(name, &[i32::MAX]), - Int64 => Series::new(name, &[i64::MAX]), + Int16 => Column::new_scalar(name, Scalar::from(i16::MAX), 1), + Int32 => Column::new_scalar(name, Scalar::from(i32::MAX), 1), + Int64 => Column::new_scalar(name, Scalar::from(i64::MAX), 1), #[cfg(feature = "dtype-u8")] - UInt8 => Series::new(name, &[u8::MAX]), + UInt8 => Column::new_scalar(name, Scalar::from(u8::MAX), 1), #[cfg(feature = "dtype-u16")] - UInt16 => Series::new(name, &[u16::MAX]), - UInt32 => Series::new(name, &[u32::MAX]), - UInt64 => Series::new(name, &[u64::MAX]), - Float32 => Series::new(name, &[f32::INFINITY]), - Float64 => Series::new(name, &[f64::INFINITY]), + UInt16 => Column::new_scalar(name, Scalar::from(u16::MAX), 1), + UInt32 => Column::new_scalar(name, Scalar::from(u32::MAX), 1), + UInt64 => Column::new_scalar(name, Scalar::from(u64::MAX), 1), + Float32 => Column::new_scalar(name, Scalar::from(f32::INFINITY), 1), + Float64 => Column::new_scalar(name, Scalar::from(f64::INFINITY), 1), dt => polars_bail!( ComputeError: "cannot determine upper bound for dtype `{}`", dt, ), @@ -25,24 +25,24 @@ pub(super) fn upper_bound(s: &Series) -> PolarsResult { Ok(s) } -pub(super) fn lower_bound(s: &Series) -> PolarsResult { +pub(super) fn lower_bound(s: &Column) -> PolarsResult { let name = s.name().clone(); use DataType::*; let s = match s.dtype().to_physical() { #[cfg(feature = "dtype-i8")] - Int8 => Series::new(name, &[i8::MIN]), + Int8 => Column::new_scalar(name, Scalar::from(i8::MIN), 1), #[cfg(feature = "dtype-i16")] - Int16 => Series::new(name, &[i16::MIN]), - Int32 => Series::new(name, &[i32::MIN]), - Int64 => Series::new(name, &[i64::MIN]), + Int16 => Column::new_scalar(name, Scalar::from(i16::MIN), 1), + Int32 => Column::new_scalar(name, Scalar::from(i32::MIN), 1), + Int64 => Column::new_scalar(name, Scalar::from(i64::MIN), 1), #[cfg(feature = "dtype-u8")] - UInt8 => Series::new(name, &[u8::MIN]), + UInt8 => Column::new_scalar(name, Scalar::from(u8::MIN), 1), #[cfg(feature = "dtype-u16")] - UInt16 => Series::new(name, &[u16::MIN]), - UInt32 => Series::new(name, &[u32::MIN]), - UInt64 => Series::new(name, &[u64::MIN]), - Float32 => Series::new(name, &[f32::NEG_INFINITY]), - Float64 => Series::new(name, &[f64::NEG_INFINITY]), + UInt16 => Column::new_scalar(name, Scalar::from(u16::MIN), 1), + UInt32 => Column::new_scalar(name, Scalar::from(u32::MIN), 1), + UInt64 => Column::new_scalar(name, Scalar::from(u64::MIN), 1), + Float32 => Column::new_scalar(name, Scalar::from(f32::NEG_INFINITY), 1), + Float64 => Column::new_scalar(name, Scalar::from(f64::NEG_INFINITY), 1), dt => polars_bail!( ComputeError: "cannot determine lower bound for dtype `{}`", dt, ), diff --git a/crates/polars-plan/src/dsl/function_expr/business.rs b/crates/polars-plan/src/dsl/function_expr/business.rs index 0d4fc2939d98..c488666503ae 100644 --- a/crates/polars-plan/src/dsl/function_expr/business.rs +++ b/crates/polars-plan/src/dsl/function_expr/business.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use crate::dsl::SpecialEq; use crate::map_as_slice; -use crate::prelude::SeriesUdf; +use crate::prelude::ColumnsUdf; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Debug, Eq, Hash)] @@ -37,7 +37,7 @@ impl Display for BusinessFunction { write!(f, "{s}") } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: BusinessFunction) -> Self { use BusinessFunction::*; match func { @@ -62,23 +62,36 @@ impl From for SpecialEq> { #[cfg(feature = "business")] pub(super) fn business_day_count( - s: &[Series], + s: &[Column], week_mask: [bool; 7], holidays: &[i32], -) -> PolarsResult { +) -> PolarsResult { let start = &s[0]; let end = &s[1]; - polars_ops::prelude::business_day_count(start, end, week_mask, holidays) + polars_ops::prelude::business_day_count( + start.as_materialized_series(), + end.as_materialized_series(), + week_mask, + holidays, + ) + .map(Column::from) } #[cfg(feature = "business")] pub(super) fn add_business_days( - s: &[Series], + s: &[Column], week_mask: [bool; 7], holidays: &[i32], roll: Roll, -) -> PolarsResult { +) -> PolarsResult { let start = &s[0]; let n = &s[1]; - polars_ops::prelude::add_business_days(start, n, week_mask, holidays, roll) + polars_ops::prelude::add_business_days( + start.as_materialized_series(), + n.as_materialized_series(), + week_mask, + holidays, + roll, + ) + .map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/cat.rs b/crates/polars-plan/src/dsl/function_expr/cat.rs index 9cc5d993a638..b25215589789 100644 --- a/crates/polars-plan/src/dsl/function_expr/cat.rs +++ b/crates/polars-plan/src/dsl/function_expr/cat.rs @@ -26,7 +26,7 @@ impl Display for CategoricalFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: CategoricalFunction) -> Self { use CategoricalFunction::*; match func { @@ -41,10 +41,10 @@ impl From for FunctionExpr { } } -fn get_categories(s: &Series) -> PolarsResult { +fn get_categories(s: &Column) -> PolarsResult { // categorical check let ca = s.categorical()?; let rev_map = ca.get_rev_map(); let arr = rev_map.get_categories().clone().boxed(); - Series::try_from((ca.name().clone(), arr)) + Series::try_from((ca.name().clone(), arr)).map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/clip.rs b/crates/polars-plan/src/dsl/function_expr/clip.rs index adae248a8af2..9a721d65d198 100644 --- a/crates/polars-plan/src/dsl/function_expr/clip.rs +++ b/crates/polars-plan/src/dsl/function_expr/clip.rs @@ -1,10 +1,21 @@ use super::*; -pub(super) fn clip(s: &[Series], has_min: bool, has_max: bool) -> PolarsResult { +pub(super) fn clip(s: &[Column], has_min: bool, has_max: bool) -> PolarsResult { match (has_min, has_max) { - (true, true) => polars_ops::series::clip(&s[0], &s[1], &s[2]), - (true, false) => polars_ops::series::clip_min(&s[0], &s[1]), - (false, true) => polars_ops::series::clip_max(&s[0], &s[1]), + (true, true) => polars_ops::series::clip( + s[0].as_materialized_series(), + s[1].as_materialized_series(), + s[2].as_materialized_series(), + ), + (true, false) => polars_ops::series::clip_min( + s[0].as_materialized_series(), + s[1].as_materialized_series(), + ), + (false, true) => polars_ops::series::clip_max( + s[0].as_materialized_series(), + s[1].as_materialized_series(), + ), _ => unreachable!(), } + .map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/coerce.rs b/crates/polars-plan/src/dsl/function_expr/coerce.rs index 652866491edb..bd03ede32c84 100644 --- a/crates/polars-plan/src/dsl/function_expr/coerce.rs +++ b/crates/polars-plan/src/dsl/function_expr/coerce.rs @@ -1,5 +1,5 @@ use polars_core::prelude::*; -pub fn as_struct(s: &[Series]) -> PolarsResult { - Ok(StructChunked::from_series(s[0].name().clone(), s)?.into_series()) +pub fn as_struct(s: &[Column]) -> PolarsResult { + Ok(StructChunked::from_columns(s[0].name().clone(), s)?.into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/concat.rs b/crates/polars-plan/src/dsl/function_expr/concat.rs index 3c069fd90bf8..a021545f2ad0 100644 --- a/crates/polars-plan/src/dsl/function_expr/concat.rs +++ b/crates/polars-plan/src/dsl/function_expr/concat.rs @@ -1,6 +1,6 @@ use super::*; -pub(super) fn concat_expr(s: &[Series], rechunk: bool) -> PolarsResult { +pub(super) fn concat_expr(s: &[Column], rechunk: bool) -> PolarsResult { let mut first = s[0].clone(); for s in &s[1..] { diff --git a/crates/polars-plan/src/dsl/function_expr/correlation.rs b/crates/polars-plan/src/dsl/function_expr/correlation.rs index 216a635ba475..0413bac9dc01 100644 --- a/crates/polars-plan/src/dsl/function_expr/correlation.rs +++ b/crates/polars-plan/src/dsl/function_expr/correlation.rs @@ -25,7 +25,7 @@ impl Display for CorrelationMethod { } } -pub(super) fn corr(s: &[Series], ddof: u8, method: CorrelationMethod) -> PolarsResult { +pub(super) fn corr(s: &[Column], ddof: u8, method: CorrelationMethod) -> PolarsResult { match method { CorrelationMethod::Pearson => pearson_corr(s, ddof), #[cfg(all(feature = "rank", feature = "propagate_nans"))] @@ -36,7 +36,7 @@ pub(super) fn corr(s: &[Series], ddof: u8, method: CorrelationMethod) -> PolarsR } } -fn covariance(s: &[Series], ddof: u8) -> PolarsResult { +fn covariance(s: &[Column], ddof: u8) -> PolarsResult { let a = &s[0]; let b = &s[1]; let name = PlSmallStr::from_static("cov"); @@ -45,7 +45,7 @@ fn covariance(s: &[Series], ddof: u8) -> PolarsResult { let ret = match a.dtype() { DataType::Float32 => { let ret = cov(a.f32().unwrap(), b.f32().unwrap(), ddof).map(|v| v as f32); - return Ok(Series::new(name, &[ret])); + return Ok(Column::new(name, &[ret])); }, DataType::Float64 => cov(a.f64().unwrap(), b.f64().unwrap(), ddof), DataType::Int32 => cov(a.i32().unwrap(), b.i32().unwrap(), ddof), @@ -58,10 +58,10 @@ fn covariance(s: &[Series], ddof: u8) -> PolarsResult { cov(a.f64().unwrap(), b.f64().unwrap(), ddof) }, }; - Ok(Series::new(name, &[ret])) + Ok(Column::new(name, &[ret])) } -fn pearson_corr(s: &[Series], ddof: u8) -> PolarsResult { +fn pearson_corr(s: &[Column], ddof: u8) -> PolarsResult { let a = &s[0]; let b = &s[1]; let name = PlSmallStr::from_static("pearson_corr"); @@ -70,7 +70,7 @@ fn pearson_corr(s: &[Series], ddof: u8) -> PolarsResult { let ret = match a.dtype() { DataType::Float32 => { let ret = pearson_corr(a.f32().unwrap(), b.f32().unwrap(), ddof).map(|v| v as f32); - return Ok(Series::new(name.clone(), &[ret])); + return Ok(Column::new(name.clone(), &[ret])); }, DataType::Float64 => pearson_corr(a.f64().unwrap(), b.f64().unwrap(), ddof), DataType::Int32 => pearson_corr(a.i32().unwrap(), b.i32().unwrap(), ddof), @@ -82,29 +82,29 @@ fn pearson_corr(s: &[Series], ddof: u8) -> PolarsResult { pearson_corr(a.f64().unwrap(), b.f64().unwrap(), ddof) }, }; - Ok(Series::new(name, &[ret])) + Ok(Column::new(name, &[ret])) } #[cfg(all(feature = "rank", feature = "propagate_nans"))] -fn spearman_rank_corr(s: &[Series], ddof: u8, propagate_nans: bool) -> PolarsResult { - use polars_core::utils::coalesce_nulls_series; +fn spearman_rank_corr(s: &[Column], ddof: u8, propagate_nans: bool) -> PolarsResult { + use polars_core::utils::coalesce_nulls_columns; use polars_ops::chunked_array::nan_propagating_aggregate::nan_max_s; let a = &s[0]; let b = &s[1]; - let (a, b) = coalesce_nulls_series(a, b); + let (a, b) = coalesce_nulls_columns(a, b); let name = PlSmallStr::from_static("spearman_rank_correlation"); if propagate_nans && a.dtype().is_float() { for s in [&a, &b] { - if nan_max_s(s, PlSmallStr::EMPTY) + if nan_max_s(s.as_materialized_series(), PlSmallStr::EMPTY) .get(0) .unwrap() .extract::() .unwrap() .is_nan() { - return Ok(Series::new(name, &[f64::NAN])); + return Ok(Column::new(name, &[f64::NAN])); } } } @@ -113,20 +113,26 @@ fn spearman_rank_corr(s: &[Series], ddof: u8, propagate_nans: bool) -> PolarsRes let a = a.drop_nulls(); let b = b.drop_nulls(); - let a_rank = a.rank( - RankOptions { - method: RankMethod::Average, - ..Default::default() - }, - None, - ); - let b_rank = b.rank( - RankOptions { - method: RankMethod::Average, - ..Default::default() - }, - None, - ); + let a_rank = a + .as_materialized_series() + .rank( + RankOptions { + method: RankMethod::Average, + ..Default::default() + }, + None, + ) + .into(); + let b_rank = b + .as_materialized_series() + .rank( + RankOptions { + method: RankMethod::Average, + ..Default::default() + }, + None, + ) + .into(); pearson_corr(&[a_rank, b_rank], ddof) } diff --git a/crates/polars-plan/src/dsl/function_expr/cum.rs b/crates/polars-plan/src/dsl/function_expr/cum.rs index 74ad6eec596a..755199c3a2a0 100644 --- a/crates/polars-plan/src/dsl/function_expr/cum.rs +++ b/crates/polars-plan/src/dsl/function_expr/cum.rs @@ -1,23 +1,28 @@ use super::*; -pub(super) fn cum_count(s: &Series, reverse: bool) -> PolarsResult { - polars_ops::prelude::cum_count(s, reverse) +pub(super) fn cum_count(s: &Column, reverse: bool) -> PolarsResult { + // @scalar-opt + polars_ops::prelude::cum_count(s.as_materialized_series(), reverse).map(Column::from) } -pub(super) fn cum_sum(s: &Series, reverse: bool) -> PolarsResult { - polars_ops::prelude::cum_sum(s, reverse) +pub(super) fn cum_sum(s: &Column, reverse: bool) -> PolarsResult { + // @scalar-opt + polars_ops::prelude::cum_sum(s.as_materialized_series(), reverse).map(Column::from) } -pub(super) fn cum_prod(s: &Series, reverse: bool) -> PolarsResult { - polars_ops::prelude::cum_prod(s, reverse) +pub(super) fn cum_prod(s: &Column, reverse: bool) -> PolarsResult { + // @scalar-opt + polars_ops::prelude::cum_prod(s.as_materialized_series(), reverse).map(Column::from) } -pub(super) fn cum_min(s: &Series, reverse: bool) -> PolarsResult { - polars_ops::prelude::cum_min(s, reverse) +pub(super) fn cum_min(s: &Column, reverse: bool) -> PolarsResult { + // @scalar-opt + polars_ops::prelude::cum_min(s.as_materialized_series(), reverse).map(Column::from) } -pub(super) fn cum_max(s: &Series, reverse: bool) -> PolarsResult { - polars_ops::prelude::cum_max(s, reverse) +pub(super) fn cum_max(s: &Column, reverse: bool) -> PolarsResult { + // @scalar-opt + polars_ops::prelude::cum_max(s.as_materialized_series(), reverse).map(Column::from) } pub(super) mod dtypes { diff --git a/crates/polars-plan/src/dsl/function_expr/cut.rs b/crates/polars-plan/src/dsl/function_expr/cut.rs new file mode 100644 index 000000000000..faafc7aa3f76 --- /dev/null +++ b/crates/polars-plan/src/dsl/function_expr/cut.rs @@ -0,0 +1,37 @@ +use polars_core::prelude::*; + +pub(crate) fn cut( + s: &Column, + breaks: Vec, + labels: Option>, + left_closed: bool, + include_breaks: bool, +) -> PolarsResult { + polars_ops::prelude::cut( + s.as_materialized_series(), + breaks, + labels, + left_closed, + include_breaks, + ) + .map(Column::from) +} + +pub(crate) fn qcut( + s: &Column, + probs: Vec, + labels: Option>, + left_closed: bool, + allow_duplicates: bool, + include_breaks: bool, +) -> PolarsResult { + polars_ops::prelude::qcut( + s.as_materialized_series(), + probs, + labels, + left_closed, + allow_duplicates, + include_breaks, + ) + .map(Column::from) +} diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs index 1d1d6a5022e4..436e1718d5e3 100644 --- a/crates/polars-plan/src/dsl/function_expr/datetime.rs +++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs @@ -196,40 +196,56 @@ impl Display for TemporalFunction { } } -pub(super) fn millennium(s: &Series) -> PolarsResult { - s.millennium().map(|ca| ca.into_series()) -} -pub(super) fn century(s: &Series) -> PolarsResult { - s.century().map(|ca| ca.into_series()) -} -pub(super) fn year(s: &Series) -> PolarsResult { - s.year().map(|ca| ca.into_series()) -} -pub(super) fn is_leap_year(s: &Series) -> PolarsResult { - s.is_leap_year().map(|ca| ca.into_series()) -} -pub(super) fn iso_year(s: &Series) -> PolarsResult { - s.iso_year().map(|ca| ca.into_series()) -} -pub(super) fn month(s: &Series) -> PolarsResult { - s.month().map(|ca| ca.into_series()) -} -pub(super) fn quarter(s: &Series) -> PolarsResult { - s.quarter().map(|ca| ca.into_series()) -} -pub(super) fn week(s: &Series) -> PolarsResult { - s.week().map(|ca| ca.into_series()) -} -pub(super) fn weekday(s: &Series) -> PolarsResult { - s.weekday().map(|ca| ca.into_series()) -} -pub(super) fn day(s: &Series) -> PolarsResult { - s.day().map(|ca| ca.into_series()) -} -pub(super) fn ordinal_day(s: &Series) -> PolarsResult { - s.ordinal_day().map(|ca| ca.into_series()) -} -pub(super) fn time(s: &Series) -> PolarsResult { +pub(super) fn millennium(s: &Column) -> PolarsResult { + s.as_materialized_series() + .millennium() + .map(|ca| ca.into_column()) +} +pub(super) fn century(s: &Column) -> PolarsResult { + s.as_materialized_series() + .century() + .map(|ca| ca.into_column()) +} +pub(super) fn year(s: &Column) -> PolarsResult { + s.as_materialized_series().year().map(|ca| ca.into_column()) +} +pub(super) fn is_leap_year(s: &Column) -> PolarsResult { + s.as_materialized_series() + .is_leap_year() + .map(|ca| ca.into_column()) +} +pub(super) fn iso_year(s: &Column) -> PolarsResult { + s.as_materialized_series() + .iso_year() + .map(|ca| ca.into_column()) +} +pub(super) fn month(s: &Column) -> PolarsResult { + s.as_materialized_series() + .month() + .map(|ca| ca.into_column()) +} +pub(super) fn quarter(s: &Column) -> PolarsResult { + s.as_materialized_series() + .quarter() + .map(|ca| ca.into_column()) +} +pub(super) fn week(s: &Column) -> PolarsResult { + s.as_materialized_series().week().map(|ca| ca.into_column()) +} +pub(super) fn weekday(s: &Column) -> PolarsResult { + s.as_materialized_series() + .weekday() + .map(|ca| ca.into_column()) +} +pub(super) fn day(s: &Column) -> PolarsResult { + s.as_materialized_series().day().map(|ca| ca.into_column()) +} +pub(super) fn ordinal_day(s: &Column) -> PolarsResult { + s.as_materialized_series() + .ordinal_day() + .map(|ca| ca.into_column()) +} +pub(super) fn time(s: &Column) -> PolarsResult { match s.dtype() { #[cfg(feature = "timezones")] DataType::Datetime(_, Some(_)) => polars_ops::prelude::replace_time_zone( @@ -238,13 +254,19 @@ pub(super) fn time(s: &Series) -> PolarsResult { &StringChunked::from_iter(std::iter::once("raise")), NonExistent::Raise, )? - .cast(&DataType::Time), - DataType::Datetime(_, _) => s.datetime().unwrap().cast(&DataType::Time), + .cast(&DataType::Time) + .map(Column::from), + DataType::Datetime(_, _) => s + .datetime() + .unwrap() + .cast(&DataType::Time) + .map(Column::from), DataType::Time => Ok(s.clone()), dtype => polars_bail!(ComputeError: "expected Datetime or Time, got {}", dtype), } + .map(Column::from) } -pub(super) fn date(s: &Series) -> PolarsResult { +pub(super) fn date(s: &Column) -> PolarsResult { match s.dtype() { #[cfg(feature = "timezones")] DataType::Datetime(_, Some(tz)) => { @@ -261,14 +283,18 @@ pub(super) fn date(s: &Series) -> PolarsResult { // DST transitions may not preserve sortedness. out.set_sorted_flag(IsSorted::Not); } - Ok(out) + Ok(out.into()) }, - DataType::Datetime(_, _) => s.datetime().unwrap().cast(&DataType::Date), + DataType::Datetime(_, _) => s + .datetime() + .unwrap() + .cast(&DataType::Date) + .map(Column::from), DataType::Date => Ok(s.clone()), dtype => polars_bail!(ComputeError: "expected Datetime or Date, got {}", dtype), } } -pub(super) fn datetime(s: &Series) -> PolarsResult { +pub(super) fn datetime(s: &Column) -> PolarsResult { match s.dtype() { #[cfg(feature = "timezones")] DataType::Datetime(tu, Some(tz)) => { @@ -285,111 +311,139 @@ pub(super) fn datetime(s: &Series) -> PolarsResult { // DST transitions may not preserve sortedness. out.set_sorted_flag(IsSorted::Not); } - Ok(out) + Ok(out.into()) }, - DataType::Datetime(tu, _) => s.datetime().unwrap().cast(&DataType::Datetime(*tu, None)), + DataType::Datetime(tu, _) => s + .datetime() + .unwrap() + .cast(&DataType::Datetime(*tu, None)) + .map(Column::from), dtype => polars_bail!(ComputeError: "expected Datetime, got {}", dtype), } } -pub(super) fn hour(s: &Series) -> PolarsResult { - s.hour().map(|ca| ca.into_series()) +pub(super) fn hour(s: &Column) -> PolarsResult { + s.as_materialized_series().hour().map(|ca| ca.into_column()) } -pub(super) fn minute(s: &Series) -> PolarsResult { - s.minute().map(|ca| ca.into_series()) +pub(super) fn minute(s: &Column) -> PolarsResult { + s.as_materialized_series() + .minute() + .map(|ca| ca.into_column()) } -pub(super) fn second(s: &Series) -> PolarsResult { - s.second().map(|ca| ca.into_series()) +pub(super) fn second(s: &Column) -> PolarsResult { + s.as_materialized_series() + .second() + .map(|ca| ca.into_column()) } -pub(super) fn millisecond(s: &Series) -> PolarsResult { - s.nanosecond() - .map(|ca| (ca.wrapping_trunc_div_scalar(1_000_000)).into_series()) +pub(super) fn millisecond(s: &Column) -> PolarsResult { + s.as_materialized_series() + .nanosecond() + .map(|ca| (ca.wrapping_trunc_div_scalar(1_000_000)).into_column()) } -pub(super) fn microsecond(s: &Series) -> PolarsResult { - s.nanosecond() - .map(|ca| (ca.wrapping_trunc_div_scalar(1_000)).into_series()) +pub(super) fn microsecond(s: &Column) -> PolarsResult { + s.as_materialized_series() + .nanosecond() + .map(|ca| (ca.wrapping_trunc_div_scalar(1_000)).into_column()) } -pub(super) fn nanosecond(s: &Series) -> PolarsResult { - s.nanosecond().map(|ca| ca.into_series()) +pub(super) fn nanosecond(s: &Column) -> PolarsResult { + s.as_materialized_series() + .nanosecond() + .map(|ca| ca.into_column()) } #[cfg(feature = "dtype-duration")] -pub(super) fn total_days(s: &Series) -> PolarsResult { - s.duration().map(|ca| ca.days().into_series()) +pub(super) fn total_days(s: &Column) -> PolarsResult { + s.as_materialized_series() + .duration() + .map(|ca| ca.days().into_column()) } #[cfg(feature = "dtype-duration")] -pub(super) fn total_hours(s: &Series) -> PolarsResult { - s.duration().map(|ca| ca.hours().into_series()) +pub(super) fn total_hours(s: &Column) -> PolarsResult { + s.as_materialized_series() + .duration() + .map(|ca| ca.hours().into_column()) } #[cfg(feature = "dtype-duration")] -pub(super) fn total_minutes(s: &Series) -> PolarsResult { - s.duration().map(|ca| ca.minutes().into_series()) +pub(super) fn total_minutes(s: &Column) -> PolarsResult { + s.as_materialized_series() + .duration() + .map(|ca| ca.minutes().into_column()) } #[cfg(feature = "dtype-duration")] -pub(super) fn total_seconds(s: &Series) -> PolarsResult { - s.duration().map(|ca| ca.seconds().into_series()) +pub(super) fn total_seconds(s: &Column) -> PolarsResult { + s.as_materialized_series() + .duration() + .map(|ca| ca.seconds().into_column()) } #[cfg(feature = "dtype-duration")] -pub(super) fn total_milliseconds(s: &Series) -> PolarsResult { - s.duration().map(|ca| ca.milliseconds().into_series()) +pub(super) fn total_milliseconds(s: &Column) -> PolarsResult { + s.as_materialized_series() + .duration() + .map(|ca| ca.milliseconds().into_column()) } #[cfg(feature = "dtype-duration")] -pub(super) fn total_microseconds(s: &Series) -> PolarsResult { - s.duration().map(|ca| ca.microseconds().into_series()) +pub(super) fn total_microseconds(s: &Column) -> PolarsResult { + s.as_materialized_series() + .duration() + .map(|ca| ca.microseconds().into_column()) } #[cfg(feature = "dtype-duration")] -pub(super) fn total_nanoseconds(s: &Series) -> PolarsResult { - s.duration().map(|ca| ca.nanoseconds().into_series()) +pub(super) fn total_nanoseconds(s: &Column) -> PolarsResult { + s.as_materialized_series() + .duration() + .map(|ca| ca.nanoseconds().into_column()) } -pub(super) fn timestamp(s: &Series, tu: TimeUnit) -> PolarsResult { - s.timestamp(tu).map(|ca| ca.into_series()) +pub(super) fn timestamp(s: &Column, tu: TimeUnit) -> PolarsResult { + s.as_materialized_series() + .timestamp(tu) + .map(|ca| ca.into_column()) } -pub(super) fn to_string(s: &Series, format: &str) -> PolarsResult { - TemporalMethods::to_string(s, format) +pub(super) fn to_string(s: &Column, format: &str) -> PolarsResult { + TemporalMethods::to_string(s.as_materialized_series(), format).map(Column::from) } #[cfg(feature = "timezones")] -pub(super) fn convert_time_zone(s: &Series, time_zone: &TimeZone) -> PolarsResult { +pub(super) fn convert_time_zone(s: &Column, time_zone: &TimeZone) -> PolarsResult { match s.dtype() { DataType::Datetime(_, _) => { let mut ca = s.datetime()?.clone(); validate_time_zone(time_zone)?; ca.set_time_zone(time_zone.clone())?; - Ok(ca.into_series()) + Ok(ca.into_column()) }, dtype => polars_bail!(ComputeError: "expected Datetime, got {}", dtype), } } -pub(super) fn with_time_unit(s: &Series, tu: TimeUnit) -> PolarsResult { +pub(super) fn with_time_unit(s: &Column, tu: TimeUnit) -> PolarsResult { match s.dtype() { DataType::Datetime(_, _) => { let mut ca = s.datetime()?.clone(); ca.set_time_unit(tu); - Ok(ca.into_series()) + Ok(ca.into_column()) }, #[cfg(feature = "dtype-duration")] DataType::Duration(_) => { - let mut ca = s.duration()?.clone(); + let mut ca = s.as_materialized_series().duration()?.clone(); ca.set_time_unit(tu); - Ok(ca.into_series()) + Ok(ca.into_column()) }, dt => polars_bail!(ComputeError: "dtype `{}` has no time unit", dt), } } -pub(super) fn cast_time_unit(s: &Series, tu: TimeUnit) -> PolarsResult { +pub(super) fn cast_time_unit(s: &Column, tu: TimeUnit) -> PolarsResult { match s.dtype() { DataType::Datetime(_, _) => { let ca = s.datetime()?; - Ok(ca.cast_time_unit(tu).into_series()) + Ok(ca.cast_time_unit(tu).into_column()) }, #[cfg(feature = "dtype-duration")] DataType::Duration(_) => { - let ca = s.duration()?; - Ok(ca.cast_time_unit(tu).into_series()) + let ca = s.as_materialized_series().duration()?; + Ok(ca.cast_time_unit(tu).into_column()) }, dt => polars_bail!(ComputeError: "dtype `{}` has no time unit", dt), } } -pub(super) fn truncate(s: &[Series]) -> PolarsResult { +pub(super) fn truncate(s: &[Column]) -> PolarsResult { let time_series = &s[0]; let every = s[1].str()?; @@ -399,10 +453,10 @@ pub(super) fn truncate(s: &[Series]) -> PolarsResult { Some(tz) => time_series .datetime()? .truncate(tz.parse::().ok().as_ref(), every)? - .into_series(), - _ => time_series.datetime()?.truncate(None, every)?.into_series(), + .into_column(), + _ => time_series.datetime()?.truncate(None, every)?.into_column(), }, - DataType::Date => time_series.date()?.truncate(None, every)?.into_series(), + DataType::Date => time_series.date()?.truncate(None, every)?.into_column(), dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), }; out.set_sorted_flag(time_series.is_sorted_flag()); @@ -410,12 +464,12 @@ pub(super) fn truncate(s: &[Series]) -> PolarsResult { } #[cfg(feature = "offset_by")] -pub(super) fn offset_by(s: &[Series]) -> PolarsResult { - impl_offset_by(&s[0], &s[1]) +pub(super) fn offset_by(s: &[Column]) -> PolarsResult { + impl_offset_by(s[0].as_materialized_series(), s[1].as_materialized_series()).map(Column::from) } #[cfg(feature = "month_start")] -pub(super) fn month_start(s: &Series) -> PolarsResult { +pub(super) fn month_start(s: &Column) -> PolarsResult { Ok(match s.dtype() { DataType::Datetime(_, tz) => match tz { #[cfg(feature = "timezones")] @@ -423,16 +477,16 @@ pub(super) fn month_start(s: &Series) -> PolarsResult { .datetime() .unwrap() .month_start(tz.parse::().ok().as_ref())? - .into_series(), - _ => s.datetime().unwrap().month_start(None)?.into_series(), + .into_column(), + _ => s.datetime().unwrap().month_start(None)?.into_column(), }, - DataType::Date => s.date().unwrap().month_start(None)?.into_series(), + DataType::Date => s.date().unwrap().month_start(None)?.into_column(), dt => polars_bail!(opq = month_start, got = dt, expected = "date/datetime"), }) } #[cfg(feature = "month_end")] -pub(super) fn month_end(s: &Series) -> PolarsResult { +pub(super) fn month_end(s: &Column) -> PolarsResult { Ok(match s.dtype() { DataType::Datetime(_, tz) => match tz { #[cfg(feature = "timezones")] @@ -440,22 +494,22 @@ pub(super) fn month_end(s: &Series) -> PolarsResult { .datetime() .unwrap() .month_end(tz.parse::().ok().as_ref())? - .into_series(), - _ => s.datetime().unwrap().month_end(None)?.into_series(), + .into_column(), + _ => s.datetime().unwrap().month_end(None)?.into_column(), }, - DataType::Date => s.date().unwrap().month_end(None)?.into_series(), + DataType::Date => s.date().unwrap().month_end(None)?.into_column(), dt => polars_bail!(opq = month_end, got = dt, expected = "date/datetime"), }) } #[cfg(feature = "timezones")] -pub(super) fn base_utc_offset(s: &Series) -> PolarsResult { +pub(super) fn base_utc_offset(s: &Column) -> PolarsResult { match s.dtype() { DataType::Datetime(time_unit, Some(tz)) => { let tz = tz .parse::() .expect("Time zone has already been validated"); - Ok(base_utc_offset_fn(s.datetime().unwrap(), time_unit, &tz).into_series()) + Ok(base_utc_offset_fn(s.datetime().unwrap(), time_unit, &tz).into_column()) }, dt => polars_bail!( opq = base_utc_offset, @@ -465,13 +519,13 @@ pub(super) fn base_utc_offset(s: &Series) -> PolarsResult { } } #[cfg(feature = "timezones")] -pub(super) fn dst_offset(s: &Series) -> PolarsResult { +pub(super) fn dst_offset(s: &Column) -> PolarsResult { match s.dtype() { DataType::Datetime(time_unit, Some(tz)) => { let tz = tz .parse::() .expect("Time zone has already been validated"); - Ok(dst_offset_fn(s.datetime().unwrap(), time_unit, &tz).into_series()) + Ok(dst_offset_fn(s.datetime().unwrap(), time_unit, &tz).into_column()) }, dt => polars_bail!( opq = dst_offset, @@ -481,7 +535,7 @@ pub(super) fn dst_offset(s: &Series) -> PolarsResult { } } -pub(super) fn round(s: &[Series]) -> PolarsResult { +pub(super) fn round(s: &[Column]) -> PolarsResult { let time_series = &s[0]; let every = s[1].str()?; @@ -492,18 +546,18 @@ pub(super) fn round(s: &[Series]) -> PolarsResult { .datetime() .unwrap() .round(every, tz.parse::().ok().as_ref())? - .into_series(), + .into_column(), _ => time_series .datetime() .unwrap() .round(every, None)? - .into_series(), + .into_column(), }, DataType::Date => time_series .date() .unwrap() .round(every, None)? - .into_series(), + .into_column(), dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), }) } diff --git a/crates/polars-plan/src/dsl/function_expr/dispatch.rs b/crates/polars-plan/src/dsl/function_expr/dispatch.rs index 12275fc57200..6ee70819b4f7 100644 --- a/crates/polars-plan/src/dsl/function_expr/dispatch.rs +++ b/crates/polars-plan/src/dsl/function_expr/dispatch.rs @@ -1,41 +1,42 @@ use super::*; -pub(super) fn reverse(s: &Series) -> PolarsResult { +pub(super) fn reverse(s: &Column) -> PolarsResult { Ok(s.reverse()) } #[cfg(feature = "approx_unique")] -pub(super) fn approx_n_unique(s: &Series) -> PolarsResult { - polars_ops::prelude::approx_n_unique(s) +pub(super) fn approx_n_unique(s: &Column) -> PolarsResult { + polars_ops::prelude::approx_n_unique(s.as_materialized_series()).map(Column::from) } #[cfg(feature = "diff")] -pub(super) fn diff(s: &Series, n: i64, null_behavior: NullBehavior) -> PolarsResult { - polars_ops::prelude::diff(s, n, null_behavior) +pub(super) fn diff(s: &Column, n: i64, null_behavior: NullBehavior) -> PolarsResult { + polars_ops::prelude::diff(s.as_materialized_series(), n, null_behavior).map(Column::from) } #[cfg(feature = "pct_change")] -pub(super) fn pct_change(s: &[Series]) -> PolarsResult { - polars_ops::prelude::pct_change(&s[0], &s[1]) +pub(super) fn pct_change(s: &[Column]) -> PolarsResult { + polars_ops::prelude::pct_change(s[0].as_materialized_series(), s[1].as_materialized_series()) + .map(Column::from) } #[cfg(feature = "interpolate")] -pub(super) fn interpolate(s: &Series, method: InterpolationMethod) -> PolarsResult { - Ok(polars_ops::prelude::interpolate(s, method)) +pub(super) fn interpolate(s: &Column, method: InterpolationMethod) -> PolarsResult { + Ok(polars_ops::prelude::interpolate(s.as_materialized_series(), method).into()) } #[cfg(feature = "interpolate_by")] -pub(super) fn interpolate_by(s: &[Series]) -> PolarsResult { +pub(super) fn interpolate_by(s: &[Column]) -> PolarsResult { let by = &s[1]; - let by_is_sorted = by.is_sorted(Default::default())?; + let by_is_sorted = by.as_materialized_series().is_sorted(Default::default())?; polars_ops::prelude::interpolate_by(&s[0], by, by_is_sorted) } -pub(super) fn to_physical(s: &Series) -> PolarsResult { - Ok(s.to_physical_repr().into_owned()) +pub(super) fn to_physical(s: &Column) -> PolarsResult { + Ok(s.to_physical_repr()) } -pub(super) fn set_sorted_flag(s: &Series, sorted: IsSorted) -> PolarsResult { +pub(super) fn set_sorted_flag(s: &Column, sorted: IsSorted) -> PolarsResult { let mut s = s.clone(); s.set_sorted_flag(sorted); Ok(s) @@ -43,34 +44,35 @@ pub(super) fn set_sorted_flag(s: &Series, sorted: IsSorted) -> PolarsResult, non_existent: NonExistent, -) -> PolarsResult { +) -> PolarsResult { let s1 = &s[0]; let ca = s1.datetime().unwrap(); let s2 = &s[1].str()?; - Ok(polars_ops::prelude::replace_time_zone(ca, time_zone, s2, non_existent)?.into_series()) + Ok(polars_ops::prelude::replace_time_zone(ca, time_zone, s2, non_existent)?.into_column()) } #[cfg(feature = "dtype-struct")] pub(super) fn value_counts( - s: &Series, + s: &Column, sort: bool, parallel: bool, name: PlSmallStr, normalize: bool, -) -> PolarsResult { - s.value_counts(sort, parallel, name, normalize) - .map(|df| df.into_struct(s.name().clone()).into_series()) +) -> PolarsResult { + s.as_materialized_series() + .value_counts(sort, parallel, name, normalize) + .map(|df| df.into_struct(s.name().clone()).into_column()) } #[cfg(feature = "unique_counts")] -pub(super) fn unique_counts(s: &Series) -> PolarsResult { - polars_ops::prelude::unique_counts(s) +pub(super) fn unique_counts(s: &Column) -> PolarsResult { + polars_ops::prelude::unique_counts(s.as_materialized_series()).map(Column::from) } -pub(super) fn reshape(s: &Series, dimensions: &[i64], nested: &NestedType) -> PolarsResult { +pub(super) fn reshape(s: &Column, dimensions: &[i64], nested: &NestedType) -> PolarsResult { match nested { NestedType::List => s.reshape_list(dimensions), #[cfg(feature = "dtype-array")] @@ -79,120 +81,146 @@ pub(super) fn reshape(s: &Series, dimensions: &[i64], nested: &NestedType) -> Po } #[cfg(feature = "repeat_by")] -pub(super) fn repeat_by(s: &[Series]) -> PolarsResult { +pub(super) fn repeat_by(s: &[Column]) -> PolarsResult { let by = &s[1]; let s = &s[0]; let by = by.cast(&IDX_DTYPE)?; - polars_ops::chunked_array::repeat_by(s, by.idx()?).map(|ok| ok.into_series()) + polars_ops::chunked_array::repeat_by(s.as_materialized_series(), by.idx()?) + .map(|ok| ok.into_column()) } -pub(super) fn backward_fill(s: &Series, limit: FillNullLimit) -> PolarsResult { +pub(super) fn backward_fill(s: &Column, limit: FillNullLimit) -> PolarsResult { s.fill_null(FillNullStrategy::Backward(limit)) } -pub(super) fn forward_fill(s: &Series, limit: FillNullLimit) -> PolarsResult { +pub(super) fn forward_fill(s: &Column, limit: FillNullLimit) -> PolarsResult { s.fill_null(FillNullStrategy::Forward(limit)) } -pub(super) fn max_horizontal(s: &mut [Series]) -> PolarsResult> { +pub(super) fn max_horizontal(s: &mut [Column]) -> PolarsResult> { polars_ops::prelude::max_horizontal(s) } -pub(super) fn min_horizontal(s: &mut [Series]) -> PolarsResult> { +pub(super) fn min_horizontal(s: &mut [Column]) -> PolarsResult> { polars_ops::prelude::min_horizontal(s) } -pub(super) fn sum_horizontal(s: &mut [Series]) -> PolarsResult> { +pub(super) fn sum_horizontal(s: &mut [Column]) -> PolarsResult> { polars_ops::prelude::sum_horizontal(s) } -pub(super) fn mean_horizontal(s: &mut [Series]) -> PolarsResult> { +pub(super) fn mean_horizontal(s: &mut [Column]) -> PolarsResult> { polars_ops::prelude::mean_horizontal(s) } -pub(super) fn drop_nulls(s: &Series) -> PolarsResult { +pub(super) fn drop_nulls(s: &Column) -> PolarsResult { Ok(s.drop_nulls()) } #[cfg(feature = "mode")] -pub(super) fn mode(s: &Series) -> PolarsResult { - mode::mode(s) +pub(super) fn mode(s: &Column) -> PolarsResult { + mode::mode(s.as_materialized_series()).map(Column::from) } #[cfg(feature = "moment")] -pub(super) fn skew(s: &Series, bias: bool) -> PolarsResult { - s.skew(bias) - .map(|opt_v| Series::new(s.name().clone(), &[opt_v])) +pub(super) fn skew(s: &Column, bias: bool) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .skew(bias) + .map(|opt_v| Column::new(s.name().clone(), &[opt_v])) } #[cfg(feature = "moment")] -pub(super) fn kurtosis(s: &Series, fisher: bool, bias: bool) -> PolarsResult { - s.kurtosis(fisher, bias) - .map(|opt_v| Series::new(s.name().clone(), &[opt_v])) +pub(super) fn kurtosis(s: &Column, fisher: bool, bias: bool) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .kurtosis(fisher, bias) + .map(|opt_v| Column::new(s.name().clone(), &[opt_v])) } -pub(super) fn arg_unique(s: &Series) -> PolarsResult { - s.arg_unique().map(|ok| ok.into_series()) +pub(super) fn arg_unique(s: &Column) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .arg_unique() + .map(|ok| ok.into_column()) } #[cfg(feature = "rank")] -pub(super) fn rank(s: &Series, options: RankOptions, seed: Option) -> PolarsResult { - Ok(s.rank(options, seed)) +pub(super) fn rank(s: &Column, options: RankOptions, seed: Option) -> PolarsResult { + Ok(s.as_materialized_series().rank(options, seed).into_column()) } #[cfg(feature = "hist")] pub(super) fn hist( - s: &[Series], + s: &[Column], bin_count: Option, include_category: bool, include_breakpoint: bool, -) -> PolarsResult { - let bins = if s.len() == 2 { - Some(s[1].clone()) - } else { - None - }; - let s = &s[0]; - hist_series(s, bin_count, bins, include_category, include_breakpoint) +) -> PolarsResult { + let bins = if s.len() == 2 { Some(&s[1]) } else { None }; + let s = s[0].as_materialized_series(); + hist_series( + s, + bin_count, + bins.map(|b| b.as_materialized_series().clone()), + include_category, + include_breakpoint, + ) + .map(Column::from) } #[cfg(feature = "replace")] -pub(super) fn replace(s: &[Series]) -> PolarsResult { - polars_ops::series::replace(&s[0], &s[1], &s[2]) +pub(super) fn replace(s: &[Column]) -> PolarsResult { + polars_ops::series::replace( + s[0].as_materialized_series(), + s[1].as_materialized_series(), + s[2].as_materialized_series(), + ) + .map(Column::from) } #[cfg(feature = "replace")] -pub(super) fn replace_strict(s: &[Series], return_dtype: Option) -> PolarsResult { +pub(super) fn replace_strict(s: &[Column], return_dtype: Option) -> PolarsResult { match s.get(3) { - Some(default) => { - polars_ops::series::replace_or_default(&s[0], &s[1], &s[2], default, return_dtype) - }, - None => polars_ops::series::replace_strict(&s[0], &s[1], &s[2], return_dtype), + Some(default) => polars_ops::series::replace_or_default( + s[0].as_materialized_series(), + s[1].as_materialized_series(), + s[2].as_materialized_series(), + default.as_materialized_series(), + return_dtype, + ), + None => polars_ops::series::replace_strict( + s[0].as_materialized_series(), + s[1].as_materialized_series(), + s[2].as_materialized_series(), + return_dtype, + ), } + .map(Column::from) } pub(super) fn fill_null_with_strategy( - s: &Series, + s: &Column, strategy: FillNullStrategy, -) -> PolarsResult { +) -> PolarsResult { s.fill_null(strategy) } -pub(super) fn gather_every(s: &Series, n: usize, offset: usize) -> PolarsResult { +pub(super) fn gather_every(s: &Column, n: usize, offset: usize) -> PolarsResult { polars_ensure!(n > 0, InvalidOperation: "gather_every(n): n should be positive"); Ok(s.gather_every(n, offset)) } #[cfg(feature = "reinterpret")] -pub(super) fn reinterpret(s: &Series, signed: bool) -> PolarsResult { - polars_ops::series::reinterpret(s, signed) +pub(super) fn reinterpret(s: &Column, signed: bool) -> PolarsResult { + polars_ops::series::reinterpret(s.as_materialized_series(), signed).map(Column::from) } -pub(super) fn negate(s: &Series) -> PolarsResult { - polars_ops::series::negate(s) +pub(super) fn negate(s: &Column) -> PolarsResult { + polars_ops::series::negate(s.as_materialized_series()).map(Column::from) } -pub(super) fn extend_constant(s: &[Series]) -> PolarsResult { +pub(super) fn extend_constant(s: &[Column]) -> PolarsResult { let value = &s[1]; let n = &s[2]; polars_ensure!(value.len() == 1 && n.len() == 1, ComputeError: "value and n should have unit length."); diff --git a/crates/polars-plan/src/dsl/function_expr/ewm.rs b/crates/polars-plan/src/dsl/function_expr/ewm.rs index b824ca3013e9..6f7a20045503 100644 --- a/crates/polars-plan/src/dsl/function_expr/ewm.rs +++ b/crates/polars-plan/src/dsl/function_expr/ewm.rs @@ -1,13 +1,13 @@ use super::*; -pub(super) fn ewm_mean(s: &Series, options: EWMOptions) -> PolarsResult { - polars_ops::prelude::ewm_mean(s, options) +pub(super) fn ewm_mean(s: &Column, options: EWMOptions) -> PolarsResult { + polars_ops::prelude::ewm_mean(s.as_materialized_series(), options).map(Column::from) } -pub(super) fn ewm_std(s: &Series, options: EWMOptions) -> PolarsResult { - polars_ops::prelude::ewm_std(s, options) +pub(super) fn ewm_std(s: &Column, options: EWMOptions) -> PolarsResult { + polars_ops::prelude::ewm_std(s.as_materialized_series(), options).map(Column::from) } -pub(super) fn ewm_var(s: &Series, options: EWMOptions) -> PolarsResult { - polars_ops::prelude::ewm_var(s, options) +pub(super) fn ewm_var(s: &Column, options: EWMOptions) -> PolarsResult { + polars_ops::prelude::ewm_var(s.as_materialized_series(), options).map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/ewm_by.rs b/crates/polars-plan/src/dsl/function_expr/ewm_by.rs index c901dc22a25f..adfc66a01524 100644 --- a/crates/polars-plan/src/dsl/function_expr/ewm_by.rs +++ b/crates/polars-plan/src/dsl/function_expr/ewm_by.rs @@ -2,7 +2,7 @@ use polars_ops::series::SeriesMethods; use super::*; -pub(super) fn ewm_mean_by(s: &[Series], half_life: Duration) -> PolarsResult { +pub(super) fn ewm_mean_by(s: &[Column], half_life: Duration) -> PolarsResult { let time_zone = match s[1].dtype() { DataType::Datetime(_, Some(time_zone)) => Some(time_zone.as_str()), _ => None, @@ -13,6 +13,14 @@ pub(super) fn ewm_mean_by(s: &[Series], half_life: Duration) -> PolarsResult PolarsResult { +pub(super) fn fill_null(s: &[Column]) -> PolarsResult { let series = s[0].clone(); - let fill_value = s[1].clone(); // Nothing to fill, so return early // this is done after casting as the output type must be correct @@ -10,8 +9,10 @@ pub(super) fn fill_null(s: &[Series]) -> PolarsResult { return Ok(series); } + let fill_value = s[1].clone(); + // default branch - fn default(series: Series, fill_value: Series) -> PolarsResult { + fn default(series: Column, fill_value: Column) -> PolarsResult { let mask = series.is_not_null(); series.zip_with_same_type(&mask, &fill_value) } @@ -28,7 +29,7 @@ pub(super) fn fill_null(s: &[Series]) -> PolarsResult { let cats = series.to_physical_repr(); let mask = cats.is_not_null(); let out = cats - .zip_with_same_type(&mask, &Series::new(PlSmallStr::EMPTY, &[idx])) + .zip_with_same_type(&mask, &Column::new(PlSmallStr::EMPTY, &[idx])) .unwrap(); unsafe { return out.cast_unchecked(series.dtype()) } } @@ -46,6 +47,6 @@ pub(super) fn fill_null(s: &[Series]) -> PolarsResult { } } -pub(super) fn coalesce(s: &mut [Series]) -> PolarsResult { - coalesce_series(s) +pub(super) fn coalesce(s: &mut [Column]) -> PolarsResult { + coalesce_columns(s) } diff --git a/crates/polars-plan/src/dsl/function_expr/fused.rs b/crates/polars-plan/src/dsl/function_expr/fused.rs index a95ac809ebc7..088078105216 100644 --- a/crates/polars-plan/src/dsl/function_expr/fused.rs +++ b/crates/polars-plan/src/dsl/function_expr/fused.rs @@ -22,13 +22,13 @@ impl Display for FusedOperator { } } -pub(super) fn fused(input: &[Series], op: FusedOperator) -> PolarsResult { +pub(super) fn fused(input: &[Column], op: FusedOperator) -> PolarsResult { let s0 = &input[0]; let s1 = &input[1]; let s2 = &input[2]; match op { - FusedOperator::MultiplyAdd => Ok(fma_series(s0, s1, s2)), - FusedOperator::SubMultiply => Ok(fsm_series(s0, s1, s2)), - FusedOperator::MultiplySub => Ok(fms_series(s0, s1, s2)), + FusedOperator::MultiplyAdd => Ok(fma_columns(s0, s1, s2)), + FusedOperator::SubMultiply => Ok(fsm_columns(s0, s1, s2)), + FusedOperator::MultiplySub => Ok(fms_columns(s0, s1, s2)), } } diff --git a/crates/polars-plan/src/dsl/function_expr/list.rs b/crates/polars-plan/src/dsl/function_expr/list.rs index 05df577ed8f3..35467eff92bc 100644 --- a/crates/polars-plan/src/dsl/function_expr/list.rs +++ b/crates/polars-plan/src/dsl/function_expr/list.rs @@ -179,7 +179,7 @@ impl Display for ListFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: ListFunction) -> Self { use ListFunction::*; match func { @@ -240,49 +240,55 @@ impl From for SpecialEq> { } #[cfg(feature = "is_in")] -pub(super) fn contains(args: &mut [Series]) -> PolarsResult> { +pub(super) fn contains(args: &mut [Column]) -> PolarsResult> { let list = &args[0]; let item = &args[1]; polars_ensure!(matches!(list.dtype(), DataType::List(_)), SchemaMismatch: "invalid series dtype: expected `List`, got `{}`", list.dtype(), ); - polars_ops::prelude::is_in(item, list).map(|mut ca| { - ca.rename(list.name().clone()); - Some(ca.into_series()) - }) + polars_ops::prelude::is_in(item.as_materialized_series(), list.as_materialized_series()).map( + |mut ca| { + ca.rename(list.name().clone()); + Some(ca.into_column()) + }, + ) } #[cfg(feature = "list_drop_nulls")] -pub(super) fn drop_nulls(s: &Series) -> PolarsResult { +pub(super) fn drop_nulls(s: &Column) -> PolarsResult { let list = s.list()?; - - Ok(list.lst_drop_nulls().into_series()) + Ok(list.lst_drop_nulls().into_column()) } #[cfg(feature = "list_sample")] pub(super) fn sample_n( - s: &[Series], + s: &[Column], with_replacement: bool, shuffle: bool, seed: Option, -) -> PolarsResult { +) -> PolarsResult { let list = s[0].list()?; let n = &s[1]; - list.lst_sample_n(n, with_replacement, shuffle, seed) - .map(|ok| ok.into_series()) + list.lst_sample_n(n.as_materialized_series(), with_replacement, shuffle, seed) + .map(|ok| ok.into_column()) } #[cfg(feature = "list_sample")] pub(super) fn sample_fraction( - s: &[Series], + s: &[Column], with_replacement: bool, shuffle: bool, seed: Option, -) -> PolarsResult { +) -> PolarsResult { let list = s[0].list()?; let fraction = &s[1]; - list.lst_sample_fraction(fraction, with_replacement, shuffle, seed) - .map(|ok| ok.into_series()) + list.lst_sample_fraction( + fraction.as_materialized_series(), + with_replacement, + shuffle, + seed, + ) + .map(|ok| ok.into_column()) } fn check_slice_arg_shape(slice_len: usize, ca_len: usize, name: &str) -> PolarsResult<()> { @@ -295,14 +301,14 @@ fn check_slice_arg_shape(slice_len: usize, ca_len: usize, name: &str) -> PolarsR Ok(()) } -pub(super) fn shift(s: &[Series]) -> PolarsResult { +pub(super) fn shift(s: &[Column]) -> PolarsResult { let list = s[0].list()?; let periods = &s[1]; - list.lst_shift(periods).map(|ok| ok.into_series()) + list.lst_shift(periods).map(|ok| ok.into_column()) } -pub(super) fn slice(args: &mut [Series]) -> PolarsResult> { +pub(super) fn slice(args: &mut [Column]) -> PolarsResult> { let s = &args[0]; let list_ca = s.list()?; let offset_s = &args[1]; @@ -316,7 +322,7 @@ pub(super) fn slice(args: &mut [Series]) -> PolarsResult> { .unwrap() .extract::() .unwrap_or(usize::MAX); - return Ok(Some(list_ca.lst_slice(offset, slice_len).into_series())); + return Ok(Some(list_ca.lst_slice(offset, slice_len).into_column())); }, (1, length_slice_len) => { check_slice_arg_shape(length_slice_len, list_ca.len(), "length")?; @@ -379,10 +385,10 @@ pub(super) fn slice(args: &mut [Series]) -> PolarsResult> { }, }; out.rename(s.name().clone()); - Ok(Some(out.into_series())) + Ok(Some(out.into_column())) } -pub(super) fn concat(s: &mut [Series]) -> PolarsResult> { +pub(super) fn concat(s: &mut [Column]) -> PolarsResult> { let mut first = std::mem::take(&mut s[0]); let other = &s[1..]; @@ -402,10 +408,10 @@ pub(super) fn concat(s: &mut [Series]) -> PolarsResult> { } } - first_ca.lst_concat(other).map(|ca| Some(ca.into_series())) + first_ca.lst_concat(other).map(|ca| Some(ca.into_column())) } -pub(super) fn get(s: &mut [Series], null_on_oob: bool) -> PolarsResult> { +pub(super) fn get(s: &mut [Column], null_on_oob: bool) -> PolarsResult> { let ca = s[0].list()?; let index = s[1].cast(&DataType::Int64)?; let index = index.i64().unwrap(); @@ -414,9 +420,9 @@ pub(super) fn get(s: &mut [Series], null_on_oob: bool) -> PolarsResult { let index = index.get(0); if let Some(index) = index { - ca.lst_get(index, null_on_oob).map(Some) + ca.lst_get(index, null_on_oob).map(Column::from).map(Some) } else { - Ok(Some(Series::full_null( + Ok(Some(Column::full_null( ca.name().clone(), ca.len(), ca.inner_dtype(), @@ -478,6 +484,7 @@ pub(super) fn get(s: &mut [Series], null_on_oob: bool) -> PolarsResult polars_bail!( @@ -489,7 +496,7 @@ pub(super) fn get(s: &mut [Series], null_on_oob: bool) -> PolarsResult PolarsResult { +pub(super) fn gather(args: &[Column], null_on_oob: bool) -> PolarsResult { let ca = &args[0]; let idx = &args[1]; let ca = ca.list()?; @@ -497,25 +504,28 @@ pub(super) fn gather(args: &[Series], null_on_oob: bool) -> PolarsResult if idx.len() == 1 && null_on_oob { // fast path let idx = idx.get(0)?.try_extract::()?; - let out = ca.lst_get(idx, null_on_oob)?; + let out = ca.lst_get(idx, null_on_oob).map(Column::from)?; // make sure we return a list out.reshape_list(&[-1, 1]) } else { - ca.lst_gather(idx, null_on_oob) + ca.lst_gather(idx.as_materialized_series(), null_on_oob) + .map(Column::from) } } #[cfg(feature = "list_gather")] -pub(super) fn gather_every(args: &[Series]) -> PolarsResult { +pub(super) fn gather_every(args: &[Column]) -> PolarsResult { let ca = &args[0]; let n = &args[1].strict_cast(&IDX_DTYPE)?; let offset = &args[2].strict_cast(&IDX_DTYPE)?; - ca.list()?.lst_gather_every(n.idx()?, offset.idx()?) + ca.list()? + .lst_gather_every(n.idx()?, offset.idx()?) + .map(Column::from) } #[cfg(feature = "list_count")] -pub(super) fn count_matches(args: &[Series]) -> PolarsResult { +pub(super) fn count_matches(args: &[Column]) -> PolarsResult { let s = &args[0]; let element = &args[1]; polars_ensure!( @@ -524,79 +534,79 @@ pub(super) fn count_matches(args: &[Series]) -> PolarsResult { element.len() ); let ca = s.list()?; - list_count_matches(ca, element.get(0).unwrap()) + list_count_matches(ca, element.get(0).unwrap()).map(Column::from) } -pub(super) fn sum(s: &Series) -> PolarsResult { - s.list()?.lst_sum() +pub(super) fn sum(s: &Column) -> PolarsResult { + s.list()?.lst_sum().map(Column::from) } -pub(super) fn length(s: &Series) -> PolarsResult { - Ok(s.list()?.lst_lengths().into_series()) +pub(super) fn length(s: &Column) -> PolarsResult { + Ok(s.list()?.lst_lengths().into_column()) } -pub(super) fn max(s: &Series) -> PolarsResult { - s.list()?.lst_max() +pub(super) fn max(s: &Column) -> PolarsResult { + s.list()?.lst_max().map(Column::from) } -pub(super) fn min(s: &Series) -> PolarsResult { - s.list()?.lst_min() +pub(super) fn min(s: &Column) -> PolarsResult { + s.list()?.lst_min().map(Column::from) } -pub(super) fn mean(s: &Series) -> PolarsResult { - Ok(s.list()?.lst_mean()) +pub(super) fn mean(s: &Column) -> PolarsResult { + Ok(s.list()?.lst_mean().into()) } -pub(super) fn median(s: &Series) -> PolarsResult { - Ok(s.list()?.lst_median()) +pub(super) fn median(s: &Column) -> PolarsResult { + Ok(s.list()?.lst_median().into()) } -pub(super) fn std(s: &Series, ddof: u8) -> PolarsResult { - Ok(s.list()?.lst_std(ddof)) +pub(super) fn std(s: &Column, ddof: u8) -> PolarsResult { + Ok(s.list()?.lst_std(ddof).into()) } -pub(super) fn var(s: &Series, ddof: u8) -> PolarsResult { - Ok(s.list()?.lst_var(ddof)) +pub(super) fn var(s: &Column, ddof: u8) -> PolarsResult { + Ok(s.list()?.lst_var(ddof).into()) } -pub(super) fn arg_min(s: &Series) -> PolarsResult { - Ok(s.list()?.lst_arg_min().into_series()) +pub(super) fn arg_min(s: &Column) -> PolarsResult { + Ok(s.list()?.lst_arg_min().into_column()) } -pub(super) fn arg_max(s: &Series) -> PolarsResult { - Ok(s.list()?.lst_arg_max().into_series()) +pub(super) fn arg_max(s: &Column) -> PolarsResult { + Ok(s.list()?.lst_arg_max().into_column()) } #[cfg(feature = "diff")] -pub(super) fn diff(s: &Series, n: i64, null_behavior: NullBehavior) -> PolarsResult { - Ok(s.list()?.lst_diff(n, null_behavior)?.into_series()) +pub(super) fn diff(s: &Column, n: i64, null_behavior: NullBehavior) -> PolarsResult { + Ok(s.list()?.lst_diff(n, null_behavior)?.into_column()) } -pub(super) fn sort(s: &Series, options: SortOptions) -> PolarsResult { - Ok(s.list()?.lst_sort(options)?.into_series()) +pub(super) fn sort(s: &Column, options: SortOptions) -> PolarsResult { + Ok(s.list()?.lst_sort(options)?.into_column()) } -pub(super) fn reverse(s: &Series) -> PolarsResult { - Ok(s.list()?.lst_reverse().into_series()) +pub(super) fn reverse(s: &Column) -> PolarsResult { + Ok(s.list()?.lst_reverse().into_column()) } -pub(super) fn unique(s: &Series, is_stable: bool) -> PolarsResult { +pub(super) fn unique(s: &Column, is_stable: bool) -> PolarsResult { if is_stable { - Ok(s.list()?.lst_unique_stable()?.into_series()) + Ok(s.list()?.lst_unique_stable()?.into_column()) } else { - Ok(s.list()?.lst_unique()?.into_series()) + Ok(s.list()?.lst_unique()?.into_column()) } } #[cfg(feature = "list_sets")] -pub(super) fn set_operation(s: &[Series], set_type: SetOperation) -> PolarsResult { +pub(super) fn set_operation(s: &[Column], set_type: SetOperation) -> PolarsResult { let s0 = &s[0]; let s1 = &s[1]; - if s0.len() == 0 || s1.len() == 0 { + if s0.is_empty() || s1.is_empty() { return match set_type { SetOperation::Intersection => { - if s0.len() == 0 { + if s0.is_empty() { Ok(s0.clone()) } else { Ok(s1.clone().with_name(s0.name().clone())) @@ -604,7 +614,7 @@ pub(super) fn set_operation(s: &[Series], set_type: SetOperation) -> PolarsResul }, SetOperation::Difference => Ok(s0.clone()), SetOperation::Union | SetOperation::SymmetricDifference => { - if s0.len() == 0 { + if s0.is_empty() { Ok(s1.clone().with_name(s0.name().clone())) } else { Ok(s0.clone()) @@ -613,31 +623,31 @@ pub(super) fn set_operation(s: &[Series], set_type: SetOperation) -> PolarsResul }; } - list_set_operation(s0.list()?, s1.list()?, set_type).map(|ca| ca.into_series()) + list_set_operation(s0.list()?, s1.list()?, set_type).map(|ca| ca.into_column()) } #[cfg(feature = "list_any_all")] -pub(super) fn lst_any(s: &Series) -> PolarsResult { - s.list()?.lst_any() +pub(super) fn lst_any(s: &Column) -> PolarsResult { + s.list()?.lst_any().map(Column::from) } #[cfg(feature = "list_any_all")] -pub(super) fn lst_all(s: &Series) -> PolarsResult { - s.list()?.lst_all() +pub(super) fn lst_all(s: &Column) -> PolarsResult { + s.list()?.lst_all().map(Column::from) } -pub(super) fn join(s: &[Series], ignore_nulls: bool) -> PolarsResult { +pub(super) fn join(s: &[Column], ignore_nulls: bool) -> PolarsResult { let ca = s[0].list()?; let separator = s[1].str()?; - Ok(ca.lst_join(separator, ignore_nulls)?.into_series()) + Ok(ca.lst_join(separator, ignore_nulls)?.into_column()) } #[cfg(feature = "dtype-array")] -pub(super) fn to_array(s: &Series, width: usize) -> PolarsResult { +pub(super) fn to_array(s: &Column, width: usize) -> PolarsResult { let array_dtype = map_list_dtype_to_array_dtype(s.dtype(), width)?; s.cast(&array_dtype) } -pub(super) fn n_unique(s: &Series) -> PolarsResult { - Ok(s.list()?.lst_n_unique()?.into_series()) +pub(super) fn n_unique(s: &Column) -> PolarsResult { + Ok(s.list()?.lst_n_unique()?.into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/log.rs b/crates/polars-plan/src/dsl/function_expr/log.rs index 42c71c681f33..23b6b1e970b1 100644 --- a/crates/polars-plan/src/dsl/function_expr/log.rs +++ b/crates/polars-plan/src/dsl/function_expr/log.rs @@ -1,23 +1,23 @@ use super::*; -pub(super) fn entropy(s: &Series, base: f64, normalize: bool) -> PolarsResult { - let out = s.entropy(base, normalize)?; +pub(super) fn entropy(s: &Column, base: f64, normalize: bool) -> PolarsResult { + let out = s.as_materialized_series().entropy(base, normalize)?; if matches!(s.dtype(), DataType::Float32) { let out = out as f32; - Ok(Series::new(s.name().clone(), [out])) + Ok(Column::new(s.name().clone(), [out])) } else { - Ok(Series::new(s.name().clone(), [out])) + Ok(Column::new(s.name().clone(), [out])) } } -pub(super) fn log(s: &Series, base: f64) -> PolarsResult { - Ok(s.log(base)) +pub(super) fn log(s: &Column, base: f64) -> PolarsResult { + Ok(s.as_materialized_series().log(base).into()) } -pub(super) fn log1p(s: &Series) -> PolarsResult { - Ok(s.log1p()) +pub(super) fn log1p(s: &Column) -> PolarsResult { + Ok(s.as_materialized_series().log1p().into()) } -pub(super) fn exp(s: &Series) -> PolarsResult { - Ok(s.exp()) +pub(super) fn exp(s: &Column) -> PolarsResult { + Ok(s.as_materialized_series().exp().into()) } diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index a40c25df764c..a5049b84eb1b 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -20,6 +20,8 @@ mod concat; mod correlation; #[cfg(feature = "cum_agg")] mod cum; +#[cfg(feature = "cutqcut")] +mod cut; #[cfg(feature = "temporal")] mod datetime; mod dispatch; @@ -776,7 +778,7 @@ macro_rules! wrap { }; ($e:expr, $($args:expr),*) => {{ - let f = move |s: &mut [Series]| { + let f = move |s: &mut [Column]| { $e(s, $($args),*) }; @@ -784,13 +786,13 @@ macro_rules! wrap { }}; } -// Fn(&[Series], args) +// Fn(&[Column], args) // all expression arguments are in the slice. // the first element is the root expression. #[macro_export] macro_rules! map_as_slice { ($func:path) => {{ - let f = move |s: &mut [Series]| { + let f = move |s: &mut [Column]| { $func(s).map(Some) }; @@ -798,7 +800,7 @@ macro_rules! map_as_slice { }}; ($func:path, $($args:expr),*) => {{ - let f = move |s: &mut [Series]| { + let f = move |s: &mut [Column]| { $func(s, $($args),*).map(Some) }; @@ -811,18 +813,18 @@ macro_rules! map_as_slice { #[macro_export] macro_rules! map_owned { ($func:path) => {{ - let f = move |s: &mut [Series]| { - let s = std::mem::take(&mut s[0]); - $func(s).map(Some) + let f = move |c: &mut [Column]| { + let c = std::mem::take(&mut c[0]); + $func(c).map(Some) }; SpecialEq::new(Arc::new(f)) }}; ($func:path, $($args:expr),*) => {{ - let f = move |s: &mut [Series]| { - let s = std::mem::take(&mut s[0]); - $func(s, $($args),*).map(Some) + let f = move |c: &mut [Column]| { + let c = std::mem::take(&mut c[0]); + $func(c, $($args),*).map(Some) }; SpecialEq::new(Arc::new(f)) @@ -833,25 +835,25 @@ macro_rules! map_owned { #[macro_export] macro_rules! map { ($func:path) => {{ - let f = move |s: &mut [Series]| { - let s = &s[0]; - $func(s).map(Some) + let f = move |c: &mut [Column]| { + let c = &c[0]; + $func(c).map(Some) }; SpecialEq::new(Arc::new(f)) }}; ($func:path, $($args:expr),*) => {{ - let f = move |s: &mut [Series]| { - let s = &s[0]; - $func(s, $($args),*).map(Some) + let f = move |c: &mut [Column]| { + let c = &c[0]; + $func(c, $($args),*).map(Some) }; SpecialEq::new(Arc::new(f)) }}; } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: FunctionExpr) -> Self { use FunctionExpr::*; match func { @@ -877,9 +879,9 @@ impl From for SpecialEq> { Abs => map!(abs::abs), Negate => map!(dispatch::negate), NullCount => { - let f = |s: &mut [Series]| { + let f = |s: &mut [Column]| { let s = &s[0]; - Ok(Some(Series::new( + Ok(Some(Column::new( s.name().clone(), [s.null_count() as IdxSize], ))) @@ -1074,7 +1076,7 @@ impl From for SpecialEq> { left_closed, include_breaks, } => map!( - cut, + cut::cut, breaks.clone(), labels.clone(), left_closed, @@ -1088,7 +1090,7 @@ impl From for SpecialEq> { allow_duplicates, include_breaks, } => map!( - qcut, + cut::qcut, probs.clone(), labels.clone(), left_closed, diff --git a/crates/polars-plan/src/dsl/function_expr/nan.rs b/crates/polars-plan/src/dsl/function_expr/nan.rs index 45b6274dfc35..035556336a65 100644 --- a/crates/polars-plan/src/dsl/function_expr/nan.rs +++ b/crates/polars-plan/src/dsl/function_expr/nan.rs @@ -1,16 +1,16 @@ use super::*; -pub(super) fn drop_nans(s: Series) -> PolarsResult { +pub(super) fn drop_nans(s: Column) -> PolarsResult { match s.dtype() { DataType::Float32 => { let ca = s.f32()?; let mask = ca.is_not_nan() | ca.is_null(); - ca.filter(&mask).map(|ca| ca.into_series()) + ca.filter(&mask).map(|ca| ca.into_column()) }, DataType::Float64 => { let ca = s.f64()?; let mask = ca.is_not_nan() | ca.is_null(); - ca.filter(&mask).map(|ca| ca.into_series()) + ca.filter(&mask).map(|ca| ca.into_column()) }, _ => Ok(s), } diff --git a/crates/polars-plan/src/dsl/function_expr/peaks.rs b/crates/polars-plan/src/dsl/function_expr/peaks.rs index bd3ce01b975c..702a9dc3c86d 100644 --- a/crates/polars-plan/src/dsl/function_expr/peaks.rs +++ b/crates/polars-plan/src/dsl/function_expr/peaks.rs @@ -3,32 +3,34 @@ use polars_ops::chunked_array::peaks::{peak_max as pmax, peak_min as pmin}; use super::*; -pub(super) fn peak_min(s: &Series) -> PolarsResult { +pub(super) fn peak_min(s: &Column) -> PolarsResult { let s = s.to_physical_repr(); + let s = s.as_materialized_series(); let s = match s.dtype() { DataType::Boolean => polars_bail!(opq = peak_min, DataType::Boolean), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => pmin(s.decimal()?).into_series(), + DataType::Decimal(_, _) => pmin(s.decimal()?).into_column(), dt => { with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); - pmin(ca).into_series() + pmin(ca).into_column() }) }, }; Ok(s) } -pub(super) fn peak_max(s: &Series) -> PolarsResult { +pub(super) fn peak_max(s: &Column) -> PolarsResult { let s = s.to_physical_repr(); + let s = s.as_materialized_series(); let s = match s.dtype() { DataType::Boolean => polars_bail!(opq = peak_max, DataType::Boolean), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => pmax(s.decimal()?).into_series(), + DataType::Decimal(_, _) => pmax(s.decimal()?).into_column(), dt => { with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref(); - pmax(ca).into_series() + pmax(ca).into_column() }) }, }; diff --git a/crates/polars-plan/src/dsl/function_expr/plugin.rs b/crates/polars-plan/src/dsl/function_expr/plugin.rs index 5ce4875fe68d..f11cca8863ec 100644 --- a/crates/polars-plan/src/dsl/function_expr/plugin.rs +++ b/crates/polars-plan/src/dsl/function_expr/plugin.rs @@ -48,11 +48,11 @@ unsafe fn retrieve_error_msg(lib: &Library) -> &CStr { } pub(super) unsafe fn call_plugin( - s: &[Series], + s: &[Column], lib: &str, symbol: &str, kwargs: &[u8], -) -> PolarsResult { +) -> PolarsResult { let plugin = get_lib(lib)?; let lib = &plugin.0; let major = plugin.1; @@ -78,7 +78,8 @@ pub(super) unsafe fn call_plugin( .get(format!("_polars_plugin_{}", symbol).as_bytes()) .unwrap(); - let input = s.iter().map(export_series).collect::>(); + // @scalar-correctness? + let input = s.iter().map(export_column).collect::>(); let input_len = s.len(); let slice_ptr = input.as_ptr(); @@ -104,7 +105,7 @@ pub(super) unsafe fn call_plugin( } if !return_value.is_null() { - import_series(return_value) + import_series(return_value).map(Column::from) } else { let msg = retrieve_error_msg(lib); let msg = msg.to_string_lossy(); diff --git a/crates/polars-plan/src/dsl/function_expr/pow.rs b/crates/polars-plan/src/dsl/function_expr/pow.rs index 5336220d1ace..44394e9ae10a 100644 --- a/crates/polars-plan/src/dsl/function_expr/pow.rs +++ b/crates/polars-plan/src/dsl/function_expr/pow.rs @@ -29,12 +29,12 @@ impl Display for PowFunction { fn pow_on_chunked_arrays( base: &ChunkedArray, exponent: &ChunkedArray, -) -> PolarsResult> +) -> PolarsResult> where T: PolarsNumericType, F: PolarsNumericType, T::Native: num::pow::Pow + ToPrimitive, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { if (base.len() == 1) && (exponent.len() != 1) { let name = base.name(); @@ -44,13 +44,13 @@ where Ok(Some( unary_elementwise_values(exponent, |exp| Pow::pow(base, exp)) - .into_series() + .into_column() .with_name(name.clone()), )) } else { Ok(Some( polars_core::chunked_array::ops::arity::binary(base, exponent, pow_kernel) - .into_series(), + .into_column(), )) } } @@ -58,38 +58,38 @@ where fn pow_on_floats( base: &ChunkedArray, exponent: &ChunkedArray, -) -> PolarsResult> +) -> PolarsResult> where T: PolarsFloatType, T::Native: num::pow::Pow + ToPrimitive + Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { let dtype = T::get_dtype(); if exponent.len() == 1 { let Some(exponent_value) = exponent.get(0) else { - return Ok(Some(Series::full_null( + return Ok(Some(Column::full_null( base.name().clone(), base.len(), &dtype, ))); }; let s = match exponent_value.to_f64().unwrap() { - a if a == 1.0 => base.clone().into_series(), + a if a == 1.0 => base.clone().into_column(), // specialized sqrt will ensure (-inf)^0.5 = NaN // and will likely be faster as well. - a if a == 0.5 => base.apply_values(|v| v.sqrt()).into_series(), + a if a == 0.5 => base.apply_values(|v| v.sqrt()).into_column(), a if a.fract() == 0.0 && a < 10.0 && a > 1.0 => { let mut out = base.clone(); for _ in 1..exponent_value.to_u8().unwrap() { out = out * base.clone() } - out.into_series() + out.into_column() }, _ => base .apply_values(|v| Pow::pow(v, exponent_value)) - .into_series(), + .into_column(), }; Ok(Some(s)) } else { @@ -100,36 +100,36 @@ where fn pow_to_uint_dtype( base: &ChunkedArray, exponent: &ChunkedArray, -) -> PolarsResult> +) -> PolarsResult> where T: PolarsIntegerType, F: PolarsIntegerType, T::Native: num::pow::Pow + ToPrimitive, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { let dtype = T::get_dtype(); if exponent.len() == 1 { let Some(exponent_value) = exponent.get(0) else { - return Ok(Some(Series::full_null( + return Ok(Some(Column::full_null( base.name().clone(), base.len(), &dtype, ))); }; let s = match exponent_value.to_u64().unwrap() { - 1 => base.clone().into_series(), + 1 => base.clone().into_column(), 2..=10 => { let mut out = base.clone(); for _ in 1..exponent_value.to_u8().unwrap() { out = out * base.clone() } - out.into_series() + out.into_column() }, _ => base .apply_values(|v| Pow::pow(v, exponent_value)) - .into_series(), + .into_column(), }; Ok(Some(s)) } else { @@ -137,7 +137,7 @@ where } } -fn pow_on_series(base: &Series, exponent: &Series) -> PolarsResult> { +fn pow_on_series(base: &Column, exponent: &Column) -> PolarsResult> { use DataType::*; let base_dtype = base.dtype(); @@ -193,7 +193,7 @@ fn pow_on_series(base: &Series, exponent: &Series) -> PolarsResult PolarsResult> { +pub(super) fn pow(s: &mut [Column]) -> PolarsResult> { let base = &s[0]; let exponent = &s[1]; @@ -210,7 +210,7 @@ pub(super) fn pow(s: &mut [Series]) -> PolarsResult> { } } -pub(super) fn sqrt(base: &Series) -> PolarsResult { +pub(super) fn sqrt(base: &Column) -> PolarsResult { use DataType::*; match base.dtype() { Float32 => { @@ -228,16 +228,16 @@ pub(super) fn sqrt(base: &Series) -> PolarsResult { } } -fn sqrt_on_floats(base: &ChunkedArray) -> PolarsResult +fn sqrt_on_floats(base: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: num::pow::Pow + ToPrimitive + Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(base.apply_values(|v| v.sqrt()).into_series()) + Ok(base.apply_values(|v| v.sqrt()).into_column()) } -pub(super) fn cbrt(base: &Series) -> PolarsResult { +pub(super) fn cbrt(base: &Column) -> PolarsResult { use DataType::*; match base.dtype() { Float32 => { @@ -255,11 +255,11 @@ pub(super) fn cbrt(base: &Series) -> PolarsResult { } } -fn cbrt_on_floats(base: &ChunkedArray) -> PolarsResult +fn cbrt_on_floats(base: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: num::pow::Pow + ToPrimitive + Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(base.apply_values(|v| v.cbrt()).into_series()) + Ok(base.apply_values(|v| v.cbrt()).into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/random.rs b/crates/polars-plan/src/dsl/function_expr/random.rs index cb21e08367aa..91b0decc1c71 100644 --- a/crates/polars-plan/src/dsl/function_expr/random.rs +++ b/crates/polars-plan/src/dsl/function_expr/random.rs @@ -23,16 +23,16 @@ impl Hash for RandomMethod { } } -pub(super) fn shuffle(s: &Series, seed: Option) -> PolarsResult { +pub(super) fn shuffle(s: &Column, seed: Option) -> PolarsResult { Ok(s.shuffle(seed)) } pub(super) fn sample_frac( - s: &[Series], + s: &[Column], with_replacement: bool, shuffle: bool, seed: Option, -) -> PolarsResult { +) -> PolarsResult { let src = &s[0]; let frac_s = &s[1]; @@ -46,16 +46,16 @@ pub(super) fn sample_frac( match frac.get(0) { Some(frac) => src.sample_frac(frac, with_replacement, shuffle, seed), - None => Ok(Series::new_empty(src.name().clone(), src.dtype())), + None => Ok(Column::new_empty(src.name().clone(), src.dtype())), } } pub(super) fn sample_n( - s: &[Series], + s: &[Column], with_replacement: bool, shuffle: bool, seed: Option, -) -> PolarsResult { +) -> PolarsResult { let src = &s[0]; let n_s = &s[1]; @@ -69,6 +69,6 @@ pub(super) fn sample_n( match n.get(0) { Some(n) => src.sample_n(n as usize, with_replacement, shuffle, seed), - None => Ok(Series::new_empty(src.name().clone(), src.dtype())), + None => Ok(Column::new_empty(src.name().clone(), src.dtype())), } } diff --git a/crates/polars-plan/src/dsl/function_expr/range/date_range.rs b/crates/polars-plan/src/dsl/function_expr/range/date_range.rs index 5518d32df275..116d626a923f 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/date_range.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/date_range.rs @@ -10,10 +10,10 @@ use super::utils::{ const CAPACITY_FACTOR: usize = 5; pub(super) fn date_range( - s: &[Series], + s: &[Column], interval: Duration, closed: ClosedWindow, -) -> PolarsResult { +) -> PolarsResult { let start = &s[0]; let end = &s[1]; @@ -44,14 +44,14 @@ pub(super) fn date_range( )?; let to_type = DataType::Date; - out.cast(&to_type) + out.cast(&to_type).map(Column::from) } pub(super) fn date_ranges( - s: &[Series], + s: &[Column], interval: Duration, closed: ClosedWindow, -) -> PolarsResult { +) -> PolarsResult { let start = &s[0]; let end = &s[1]; diff --git a/crates/polars-plan/src/dsl/function_expr/range/datetime_range.rs b/crates/polars-plan/src/dsl/function_expr/range/datetime_range.rs index 394889dd34f1..a61264ce7aca 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/datetime_range.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/datetime_range.rs @@ -12,12 +12,12 @@ use crate::dsl::function_expr::FieldsMapper; const CAPACITY_FACTOR: usize = 5; pub(super) fn datetime_range( - s: &[Series], + s: &[Column], interval: Duration, closed: ClosedWindow, time_unit: Option, time_zone: Option, -) -> PolarsResult { +) -> PolarsResult { let mut start = s[0].clone(); let mut end = s[1].clone(); @@ -69,7 +69,7 @@ pub(super) fn datetime_range( NonExistent::Raise, )? .cast(&dtype)? - .into_series(), + .into_column(), polars_ops::prelude::replace_time_zone( end.datetime().unwrap(), Some(&tz), @@ -77,7 +77,7 @@ pub(super) fn datetime_range( NonExistent::Raise, )? .cast(&dtype)? - .into_series(), + .into_column(), ), _ => (start.cast(&dtype)?, end.cast(&dtype)?), }; @@ -99,16 +99,16 @@ pub(super) fn datetime_range( }, _ => unimplemented!(), }; - Ok(result.cast(&dtype).unwrap().into_series()) + Ok(result.cast(&dtype).unwrap().into_column()) } pub(super) fn datetime_ranges( - s: &[Series], + s: &[Column], interval: Duration, closed: ClosedWindow, time_unit: Option, time_zone: Option, -) -> PolarsResult { +) -> PolarsResult { let mut start = s[0].clone(); let mut end = s[1].clone(); @@ -158,7 +158,7 @@ pub(super) fn datetime_ranges( NonExistent::Raise, )? .cast(&dtype)? - .into_series() + .into_column() .to_physical_repr() .cast(&DataType::Int64)?, polars_ops::prelude::replace_time_zone( @@ -168,7 +168,7 @@ pub(super) fn datetime_ranges( NonExistent::Raise, )? .cast(&dtype)? - .into_series() + .into_column() .to_physical_repr() .cast(&DataType::Int64)?, ), @@ -220,7 +220,7 @@ pub(super) fn datetime_ranges( }; let to_type = DataType::List(Box::new(dtype)); - out.cast(&to_type) + out.cast(&to_type).map(Column::from) } impl<'a> FieldsMapper<'a> { diff --git a/crates/polars-plan/src/dsl/function_expr/range/int_range.rs b/crates/polars-plan/src/dsl/function_expr/range/int_range.rs index f1ae0ffe13a7..f9b524cfe481 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/int_range.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/int_range.rs @@ -6,7 +6,7 @@ use super::utils::{ensure_range_bounds_contain_exactly_one_value, numeric_ranges const CAPACITY_FACTOR: usize = 5; -pub(super) fn int_range(s: &[Series], step: i64, dtype: DataType) -> PolarsResult { +pub(super) fn int_range(s: &[Column], step: i64, dtype: DataType) -> PolarsResult { let mut start = &s[0]; let mut end = &s[1]; let name = start.name(); @@ -27,22 +27,22 @@ pub(super) fn int_range(s: &[Series], step: i64, dtype: DataType) -> PolarsResul with_match_physical_integer_polars_type!(dtype, |$T| { let start_v = get_first_series_value::<$T>(start)?; let end_v = get_first_series_value::<$T>(end)?; - new_int_range::<$T>(start_v, end_v, step, name.clone()) + new_int_range::<$T>(start_v, end_v, step, name.clone()).map(Column::from) }) } -fn get_first_series_value(s: &Series) -> PolarsResult +fn get_first_series_value(s: &Column) -> PolarsResult where T: PolarsIntegerType, { - let ca: &ChunkedArray = s.as_any().downcast_ref().unwrap(); + let ca: &ChunkedArray = s.as_materialized_series().as_any().downcast_ref().unwrap(); let value_opt = ca.get(0); let value = value_opt.ok_or_else(|| polars_err!(ComputeError: "invalid null input for `int_range`"))?; Ok(value) } -pub(super) fn int_ranges(s: &[Series]) -> PolarsResult { +pub(super) fn int_ranges(s: &[Column]) -> PolarsResult { let start = &s[0]; let end = &s[1]; let step = &s[2]; diff --git a/crates/polars-plan/src/dsl/function_expr/range/mod.rs b/crates/polars-plan/src/dsl/function_expr/range/mod.rs index 3350f0c6f8f5..61eebd9cf4b1 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/mod.rs @@ -18,7 +18,7 @@ use serde::{Deserialize, Serialize}; use crate::dsl::function_expr::FieldsMapper; use crate::dsl::SpecialEq; use crate::map_as_slice; -use crate::prelude::SeriesUdf; +use crate::prelude::ColumnsUdf; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Debug, Eq, Hash)] @@ -129,7 +129,7 @@ impl Display for RangeFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: RangeFunction) -> Self { use RangeFunction::*; match func { diff --git a/crates/polars-plan/src/dsl/function_expr/range/time_range.rs b/crates/polars-plan/src/dsl/function_expr/range/time_range.rs index 52211e89bc56..e339105bee3f 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/time_range.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/time_range.rs @@ -9,10 +9,10 @@ use super::utils::{ const CAPACITY_FACTOR: usize = 5; pub(super) fn time_range( - s: &[Series], + s: &[Column], interval: Duration, closed: ClosedWindow, -) -> PolarsResult { +) -> PolarsResult { let start = &s[0]; let end = &s[1]; let name = start.name(); @@ -26,14 +26,14 @@ pub(super) fn time_range( .ok_or_else(|| polars_err!(ComputeError: "end is an out-of-range time."))?; let out = time_range_impl(name.clone(), start, end, interval, closed)?; - Ok(out.cast(&dtype).unwrap().into_series()) + Ok(out.cast(&dtype).unwrap().into_column()) } pub(super) fn time_ranges( - s: &[Series], + s: &[Column], interval: Duration, closed: ClosedWindow, -) -> PolarsResult { +) -> PolarsResult { let start = &s[0]; let end = &s[1]; @@ -62,5 +62,5 @@ pub(super) fn time_ranges( let out = temporal_ranges_impl_broadcast(start, end, range_impl, &mut builder)?; let to_type = DataType::List(Box::new(DataType::Time)); - out.cast(&to_type) + out.cast(&to_type).map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/range/utils.rs b/crates/polars-plan/src/dsl/function_expr/range/utils.rs index b748daf0879a..c6f91c49b02b 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/utils.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/utils.rs @@ -1,14 +1,14 @@ use polars_core::prelude::{ - polars_bail, polars_ensure, ChunkedArray, Int64Chunked, IntoSeries, ListBuilderTrait, - ListPrimitiveChunkedBuilder, PolarsIntegerType, PolarsResult, Series, + polars_bail, polars_ensure, ChunkedArray, Column, Int64Chunked, IntoColumn, ListBuilderTrait, + ListPrimitiveChunkedBuilder, PolarsIntegerType, PolarsResult, }; -pub(super) fn temporal_series_to_i64_scalar(s: &Series) -> Option { +pub(super) fn temporal_series_to_i64_scalar(s: &Column) -> Option { s.to_physical_repr().get(0).unwrap().extract::() } pub(super) fn ensure_range_bounds_contain_exactly_one_value( - start: &Series, - end: &Series, + start: &Column, + end: &Column, ) -> PolarsResult<()> { polars_ensure!( start.len() == 1, @@ -28,7 +28,7 @@ pub(super) fn numeric_ranges_impl_broadcast( step: &Int64Chunked, range_impl: F, builder: &mut ListPrimitiveChunkedBuilder, -) -> PolarsResult +) -> PolarsResult where T: PolarsIntegerType, U: PolarsIntegerType, @@ -133,7 +133,7 @@ where ) }, }; - let out = builder.finish().into_series(); + let out = builder.finish().into_column(); Ok(out) } @@ -143,7 +143,7 @@ pub(super) fn temporal_ranges_impl_broadcast( end: &ChunkedArray, range_impl: F, builder: &mut ListPrimitiveChunkedBuilder, -) -> PolarsResult +) -> PolarsResult where T: PolarsIntegerType, U: PolarsIntegerType, @@ -190,7 +190,7 @@ where ) }, }; - let out = builder.finish().into_series(); + let out = builder.finish().into_column(); Ok(out) } diff --git a/crates/polars-plan/src/dsl/function_expr/rolling.rs b/crates/polars-plan/src/dsl/function_expr/rolling.rs index 9302ab4a1ad7..c108c92b571a 100644 --- a/crates/polars-plan/src/dsl/function_expr/rolling.rs +++ b/crates/polars-plan/src/dsl/function_expr/rolling.rs @@ -52,38 +52,62 @@ impl Hash for RollingFunction { } } -pub(super) fn rolling_min(s: &Series, options: RollingOptionsFixedWindow) -> PolarsResult { - s.rolling_min(options) +pub(super) fn rolling_min(s: &Column, options: RollingOptionsFixedWindow) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_min(options) + .map(Column::from) } -pub(super) fn rolling_max(s: &Series, options: RollingOptionsFixedWindow) -> PolarsResult { - s.rolling_max(options) +pub(super) fn rolling_max(s: &Column, options: RollingOptionsFixedWindow) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_max(options) + .map(Column::from) } -pub(super) fn rolling_mean(s: &Series, options: RollingOptionsFixedWindow) -> PolarsResult { - s.rolling_mean(options) +pub(super) fn rolling_mean(s: &Column, options: RollingOptionsFixedWindow) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_mean(options) + .map(Column::from) } -pub(super) fn rolling_sum(s: &Series, options: RollingOptionsFixedWindow) -> PolarsResult { - s.rolling_sum(options) +pub(super) fn rolling_sum(s: &Column, options: RollingOptionsFixedWindow) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_sum(options) + .map(Column::from) } pub(super) fn rolling_quantile( - s: &Series, + s: &Column, options: RollingOptionsFixedWindow, -) -> PolarsResult { - s.rolling_quantile(options) +) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_quantile(options) + .map(Column::from) } -pub(super) fn rolling_var(s: &Series, options: RollingOptionsFixedWindow) -> PolarsResult { - s.rolling_var(options) +pub(super) fn rolling_var(s: &Column, options: RollingOptionsFixedWindow) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_var(options) + .map(Column::from) } -pub(super) fn rolling_std(s: &Series, options: RollingOptionsFixedWindow) -> PolarsResult { - s.rolling_std(options) +pub(super) fn rolling_std(s: &Column, options: RollingOptionsFixedWindow) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_std(options) + .map(Column::from) } #[cfg(feature = "moment")] -pub(super) fn rolling_skew(s: &Series, window_size: usize, bias: bool) -> PolarsResult { - s.rolling_skew(window_size, bias) +pub(super) fn rolling_skew(s: &Column, window_size: usize, bias: bool) -> PolarsResult { + // @scalar-opt + s.as_materialized_series() + .rolling_skew(window_size, bias) + .map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/rolling_by.rs b/crates/polars-plan/src/dsl/function_expr/rolling_by.rs index c2b3510281f2..3077c83355f2 100644 --- a/crates/polars-plan/src/dsl/function_expr/rolling_by.rs +++ b/crates/polars-plan/src/dsl/function_expr/rolling_by.rs @@ -39,50 +39,71 @@ impl Hash for RollingFunctionBy { } pub(super) fn rolling_min_by( - s: &[Series], + s: &[Column], options: RollingOptionsDynamicWindow, -) -> PolarsResult { - s[0].rolling_min_by(&s[1], options) +) -> PolarsResult { + // @scalar-opt + s[0].as_materialized_series() + .rolling_min_by(s[1].as_materialized_series(), options) + .map(Column::from) } pub(super) fn rolling_max_by( - s: &[Series], + s: &[Column], options: RollingOptionsDynamicWindow, -) -> PolarsResult { - s[0].rolling_max_by(&s[1], options) +) -> PolarsResult { + // @scalar-opt + s[0].as_materialized_series() + .rolling_max_by(s[1].as_materialized_series(), options) + .map(Column::from) } pub(super) fn rolling_mean_by( - s: &[Series], + s: &[Column], options: RollingOptionsDynamicWindow, -) -> PolarsResult { - s[0].rolling_mean_by(&s[1], options) +) -> PolarsResult { + // @scalar-opt + s[0].as_materialized_series() + .rolling_mean_by(s[1].as_materialized_series(), options) + .map(Column::from) } pub(super) fn rolling_sum_by( - s: &[Series], + s: &[Column], options: RollingOptionsDynamicWindow, -) -> PolarsResult { - s[0].rolling_sum_by(&s[1], options) +) -> PolarsResult { + // @scalar-opt + s[0].as_materialized_series() + .rolling_sum_by(s[1].as_materialized_series(), options) + .map(Column::from) } pub(super) fn rolling_quantile_by( - s: &[Series], + s: &[Column], options: RollingOptionsDynamicWindow, -) -> PolarsResult { - s[0].rolling_quantile_by(&s[1], options) +) -> PolarsResult { + // @scalar-opt + s[0].as_materialized_series() + .rolling_quantile_by(s[1].as_materialized_series(), options) + .map(Column::from) } pub(super) fn rolling_var_by( - s: &[Series], + s: &[Column], options: RollingOptionsDynamicWindow, -) -> PolarsResult { - s[0].rolling_var_by(&s[1], options) +) -> PolarsResult { + // @scalar-opt + s[0].as_materialized_series() + .rolling_var_by(s[1].as_materialized_series(), options) + .map(Column::from) } pub(super) fn rolling_std_by( - s: &[Series], + s: &[Column], options: RollingOptionsDynamicWindow, -) -> PolarsResult { - s[0].rolling_std_by(&s[1], options) +) -> PolarsResult { + // @scalar-opt + s[0].as_materialized_series() + .rolling_std_by(s[1].as_materialized_series(), options) + .map(Column::from) } diff --git a/crates/polars-plan/src/dsl/function_expr/round.rs b/crates/polars-plan/src/dsl/function_expr/round.rs index be7b25d00706..41b2f04324d0 100644 --- a/crates/polars-plan/src/dsl/function_expr/round.rs +++ b/crates/polars-plan/src/dsl/function_expr/round.rs @@ -1,17 +1,54 @@ +use polars_core::frame::column::ScalarColumn; + use super::*; -pub(super) fn round(s: &Series, decimals: u32) -> PolarsResult { - s.round(decimals) +pub(super) fn round(c: &Column, decimals: u32) -> PolarsResult { + match c { + Column::Series(s) => s.round(decimals).map(Column::from), + Column::Scalar(s) if s.is_empty() => { + s.as_materialized_series().round(decimals).map(Column::from) + }, + Column::Scalar(s) => ScalarColumn::from_single_value_series( + s.as_single_value_series().round(decimals)?, + s.len(), + ) + .map(Column::from), + } } -pub(super) fn round_sig_figs(s: &Series, digits: i32) -> PolarsResult { - s.round_sig_figs(digits) +pub(super) fn round_sig_figs(c: &Column, digits: i32) -> PolarsResult { + match c { + Column::Series(s) => s.round_sig_figs(digits).map(Column::from), + Column::Scalar(s) if s.is_empty() => s + .as_materialized_series() + .round_sig_figs(digits) + .map(Column::from), + Column::Scalar(s) => ScalarColumn::from_single_value_series( + s.as_single_value_series().round_sig_figs(digits)?, + s.len(), + ) + .map(Column::from), + } } -pub(super) fn floor(s: &Series) -> PolarsResult { - s.floor() +pub(super) fn floor(c: &Column) -> PolarsResult { + match c { + Column::Series(s) => s.floor().map(Column::from), + Column::Scalar(s) if s.is_empty() => s.as_materialized_series().floor().map(Column::from), + Column::Scalar(s) => { + ScalarColumn::from_single_value_series(s.as_single_value_series().floor()?, s.len()) + .map(Column::from) + }, + } } -pub(super) fn ceil(s: &Series) -> PolarsResult { - s.ceil() +pub(super) fn ceil(c: &Column) -> PolarsResult { + match c { + Column::Series(s) => s.ceil().map(Column::from), + Column::Scalar(s) if s.is_empty() => s.as_materialized_series().ceil().map(Column::from), + Column::Scalar(s) => { + ScalarColumn::from_single_value_series(s.as_single_value_series().ceil()?, s.len()) + .map(Column::from) + }, + } } diff --git a/crates/polars-plan/src/dsl/function_expr/row_hash.rs b/crates/polars-plan/src/dsl/function_expr/row_hash.rs index 3a2d33f08384..2039e86c6faa 100644 --- a/crates/polars-plan/src/dsl/function_expr/row_hash.rs +++ b/crates/polars-plan/src/dsl/function_expr/row_hash.rs @@ -1,6 +1,8 @@ use super::*; -pub(super) fn row_hash(s: &Series, k0: u64, k1: u64, k2: u64, k3: u64) -> PolarsResult { - Ok(s.hash(PlRandomState::with_seeds(k0, k1, k2, k3)) - .into_series()) +pub(super) fn row_hash(c: &Column, k0: u64, k1: u64, k2: u64, k3: u64) -> PolarsResult { + // @scalar-opt + Ok(c.as_materialized_series() + .hash(PlRandomState::with_seeds(k0, k1, k2, k3)) + .into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/search_sorted.rs b/crates/polars-plan/src/dsl/function_expr/search_sorted.rs index 87933fc7bd6c..38f6ef81d5c3 100644 --- a/crates/polars-plan/src/dsl/function_expr/search_sorted.rs +++ b/crates/polars-plan/src/dsl/function_expr/search_sorted.rs @@ -1,8 +1,14 @@ use super::*; -pub(super) fn search_sorted_impl(s: &mut [Series], side: SearchSortedSide) -> PolarsResult { +pub(super) fn search_sorted_impl(s: &mut [Column], side: SearchSortedSide) -> PolarsResult { let sorted_array = &s[0]; let search_value = &s[1]; - search_sorted(sorted_array, search_value, side, false).map(|ca| ca.into_series()) + search_sorted( + sorted_array.as_materialized_series(), + search_value.as_materialized_series(), + side, + false, + ) + .map(|ca| ca.into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs b/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs index 6ebc5f3d221e..4dafb71643bf 100644 --- a/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs +++ b/crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs @@ -16,7 +16,7 @@ where feature = "dtype-struct", feature = "dtype-categorical" ))] -fn shift_and_fill_with_mask(s: &Series, n: i64, fill_value: &Series) -> PolarsResult { +fn shift_and_fill_with_mask(s: &Column, n: i64, fill_value: &Column) -> PolarsResult { use polars_core::export::arrow::array::BooleanArray; use polars_core::export::arrow::bitmap::MutableBitmap; @@ -40,7 +40,7 @@ fn shift_and_fill_with_mask(s: &Series, n: i64, fill_value: &Series) -> PolarsRe s.shift(n).zip_with_same_type(&mask, fill_value) } -pub(super) fn shift_and_fill(args: &[Series]) -> PolarsResult { +pub(super) fn shift_and_fill(args: &[Column]) -> PolarsResult { let s = &args[0]; let n_s = &args[1]; @@ -66,16 +66,17 @@ pub(super) fn shift_and_fill(args: &[Series]) -> PolarsResult { AnyValue::Null => None, v => polars_bail!(ComputeError: "fill value '{}' is not supported", v), }; - ca.shift_and_fill(n, fill_value).into_series().cast(logical) + ca.shift_and_fill(n, fill_value).into_column().cast(logical) }, String => { let ca = s.str()?; let fill_value = match fill_value { AnyValue::String(v) => Some(v), + AnyValue::StringOwned(ref v) => Some(v.as_str()), AnyValue::Null => None, v => polars_bail!(ComputeError: "fill value '{}' is not supported", v), }; - ca.shift_and_fill(n, fill_value).into_series().cast(logical) + ca.shift_and_fill(n, fill_value).into_column().cast(logical) }, List(_) => { let ca = s.list()?; @@ -85,7 +86,7 @@ pub(super) fn shift_and_fill(args: &[Series]) -> PolarsResult { v => polars_bail!(ComputeError: "fill value '{}' is not supported", v), }; ca.shift_and_fill(n, fill_value.as_ref()) - .into_series() + .into_column() .cast(logical) }, #[cfg(feature = "object")] @@ -97,7 +98,7 @@ pub(super) fn shift_and_fill(args: &[Series]) -> PolarsResult { dt if dt.is_numeric() || dt.is_logical() => { macro_rules! dispatch { ($ca:expr, $n:expr, $fill_value:expr) => {{ - shift_and_fill_numeric($ca, $n, $fill_value).into_series() + shift_and_fill_numeric($ca, $n, $fill_value).into_column() }}; } let out = downcast_as_macro_arg_physical!(physical, dispatch, n, fill_value); @@ -106,11 +107,11 @@ pub(super) fn shift_and_fill(args: &[Series]) -> PolarsResult { dt => polars_bail!(opq = shift_and_fill, dt), } } else { - Ok(Series::full_null(s.name().clone(), s.len(), s.dtype())) + Ok(Column::full_null(s.name().clone(), s.len(), s.dtype())) } } -pub fn shift(args: &[Series]) -> PolarsResult { +pub fn shift(args: &[Column]) -> PolarsResult { let s = &args[0]; let n_s = &args[1]; polars_ensure!( @@ -123,6 +124,6 @@ pub fn shift(args: &[Series]) -> PolarsResult { match n.get(0) { Some(n) => Ok(s.shift(n)), - None => Ok(Series::full_null(s.name().clone(), s.len(), s.dtype())), + None => Ok(Column::full_null(s.name().clone(), s.len(), s.dtype())), } } diff --git a/crates/polars-plan/src/dsl/function_expr/shrink_type.rs b/crates/polars-plan/src/dsl/function_expr/shrink_type.rs index 0dee54f79e8f..224691e98ef4 100644 --- a/crates/polars-plan/src/dsl/function_expr/shrink_type.rs +++ b/crates/polars-plan/src/dsl/function_expr/shrink_type.rs @@ -1,38 +1,38 @@ use super::*; -pub(super) fn shrink(s: Series) -> PolarsResult { - if !s.dtype().is_numeric() { - return Ok(s); +pub(super) fn shrink(c: Column) -> PolarsResult { + if !c.dtype().is_numeric() { + return Ok(c); } - if s.dtype().is_float() { - return s.cast(&DataType::Float32); + if c.dtype().is_float() { + return c.cast(&DataType::Float32); } - if s.dtype().is_unsigned_integer() { - let max = s.max_reduce()?.value().extract::().unwrap_or(0_u64); + if c.dtype().is_unsigned_integer() { + let max = c.max_reduce()?.value().extract::().unwrap_or(0_u64); if cfg!(feature = "dtype-u8") && max <= u8::MAX as u64 { - s.cast(&DataType::UInt8) + c.cast(&DataType::UInt8) } else if cfg!(feature = "dtype-u16") && max <= u16::MAX as u64 { - s.cast(&DataType::UInt16) + c.cast(&DataType::UInt16) } else if max <= u32::MAX as u64 { - s.cast(&DataType::UInt32) + c.cast(&DataType::UInt32) } else { - Ok(s) + Ok(c) } } else { - let min = s.min_reduce()?.value().extract::().unwrap_or(0_i64); - let max = s.max_reduce()?.value().extract::().unwrap_or(0_i64); + let min = c.min_reduce()?.value().extract::().unwrap_or(0_i64); + let max = c.max_reduce()?.value().extract::().unwrap_or(0_i64); if cfg!(feature = "dtype-i8") && min >= i8::MIN as i64 && max <= i8::MAX as i64 { - s.cast(&DataType::Int8) + c.cast(&DataType::Int8) } else if cfg!(feature = "dtype-i16") && min >= i16::MIN as i64 && max <= i16::MAX as i64 { - s.cast(&DataType::Int16) + c.cast(&DataType::Int16) } else if min >= i32::MIN as i64 && max <= i32::MAX as i64 { - s.cast(&DataType::Int32) + c.cast(&DataType::Int32) } else { - Ok(s) + Ok(c) } } } diff --git a/crates/polars-plan/src/dsl/function_expr/sign.rs b/crates/polars-plan/src/dsl/function_expr/sign.rs index a7bf4d3277e6..471c76f8ad2d 100644 --- a/crates/polars-plan/src/dsl/function_expr/sign.rs +++ b/crates/polars-plan/src/dsl/function_expr/sign.rs @@ -4,7 +4,8 @@ use polars_core::with_match_physical_numeric_polars_type; use super::*; -pub(super) fn sign(s: &Series) -> PolarsResult { +pub(super) fn sign(s: &Column) -> PolarsResult { + let s = s.as_materialized_series(); let dt = s.dtype(); polars_ensure!(dt.is_numeric(), opq = sign, dt); with_match_physical_numeric_polars_type!(dt, |$T| { @@ -13,10 +14,10 @@ pub(super) fn sign(s: &Series) -> PolarsResult { }) } -fn sign_impl(ca: &ChunkedArray) -> Series +fn sign_impl(ca: &ChunkedArray) -> Column where T: PolarsNumericType, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { ca.apply_values(|x| { if x < T::Native::zero() { @@ -30,5 +31,5 @@ where x } }) - .into_series() + .into_column() } diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index 9a5d2a9ff537..ba06dc00e67c 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -290,7 +290,7 @@ impl Display for StringFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: StringFunction) -> Self { use StringFunction::*; match func { @@ -405,15 +405,15 @@ impl From for SpecialEq> { } #[cfg(feature = "find_many")] -fn contains_many(s: &[Series], ascii_case_insensitive: bool) -> PolarsResult { +fn contains_many(s: &[Column], ascii_case_insensitive: bool) -> PolarsResult { let ca = s[0].str()?; let patterns = s[1].str()?; polars_ops::chunked_array::strings::contains_any(ca, patterns, ascii_case_insensitive) - .map(|out| out.into_series()) + .map(|out| out.into_column()) } #[cfg(feature = "find_many")] -fn replace_many(s: &[Series], ascii_case_insensitive: bool) -> PolarsResult { +fn replace_many(s: &[Column], ascii_case_insensitive: bool) -> PolarsResult { let ca = s[0].str()?; let patterns = s[1].str()?; let replace_with = s[2].str()?; @@ -423,148 +423,148 @@ fn replace_many(s: &[Series], ascii_case_insensitive: bool) -> PolarsResult PolarsResult { +) -> PolarsResult { let ca = s[0].str()?; let patterns = &s[1]; polars_ops::chunked_array::strings::extract_many( ca, - patterns, + patterns.as_materialized_series(), ascii_case_insensitive, overlapping, ) - .map(|out| out.into_series()) + .map(|out| out.into_column()) } -fn uppercase(s: &Series) -> PolarsResult { +fn uppercase(s: &Column) -> PolarsResult { let ca = s.str()?; - Ok(ca.to_uppercase().into_series()) + Ok(ca.to_uppercase().into_column()) } -fn lowercase(s: &Series) -> PolarsResult { +fn lowercase(s: &Column) -> PolarsResult { let ca = s.str()?; - Ok(ca.to_lowercase().into_series()) + Ok(ca.to_lowercase().into_column()) } #[cfg(feature = "nightly")] -pub(super) fn titlecase(s: &Series) -> PolarsResult { +pub(super) fn titlecase(s: &Column) -> PolarsResult { let ca = s.str()?; - Ok(ca.to_titlecase().into_series()) + Ok(ca.to_titlecase().into_column()) } -pub(super) fn len_chars(s: &Series) -> PolarsResult { +pub(super) fn len_chars(s: &Column) -> PolarsResult { let ca = s.str()?; - Ok(ca.str_len_chars().into_series()) + Ok(ca.str_len_chars().into_column()) } -pub(super) fn len_bytes(s: &Series) -> PolarsResult { +pub(super) fn len_bytes(s: &Column) -> PolarsResult { let ca = s.str()?; - Ok(ca.str_len_bytes().into_series()) + Ok(ca.str_len_bytes().into_column()) } #[cfg(feature = "regex")] -pub(super) fn contains(s: &[Series], literal: bool, strict: bool) -> PolarsResult { +pub(super) fn contains(s: &[Column], literal: bool, strict: bool) -> PolarsResult { let ca = s[0].str()?; let pat = s[1].str()?; ca.contains_chunked(pat, literal, strict) - .map(|ok| ok.into_series()) + .map(|ok| ok.into_column()) } #[cfg(feature = "regex")] -pub(super) fn find(s: &[Series], literal: bool, strict: bool) -> PolarsResult { +pub(super) fn find(s: &[Column], literal: bool, strict: bool) -> PolarsResult { let ca = s[0].str()?; let pat = s[1].str()?; ca.find_chunked(pat, literal, strict) - .map(|ok| ok.into_series()) + .map(|ok| ok.into_column()) } -pub(super) fn ends_with(s: &[Series]) -> PolarsResult { +pub(super) fn ends_with(s: &[Column]) -> PolarsResult { let ca = &s[0].str()?.as_binary(); let suffix = &s[1].str()?.as_binary(); - Ok(ca.ends_with_chunked(suffix).into_series()) + Ok(ca.ends_with_chunked(suffix).into_column()) } -pub(super) fn starts_with(s: &[Series]) -> PolarsResult { +pub(super) fn starts_with(s: &[Column]) -> PolarsResult { let ca = &s[0].str()?.as_binary(); let prefix = &s[1].str()?.as_binary(); - Ok(ca.starts_with_chunked(prefix).into_series()) + Ok(ca.starts_with_chunked(prefix).into_column()) } /// Extract a regex pattern from the a string value. -pub(super) fn extract(s: &[Series], group_index: usize) -> PolarsResult { +pub(super) fn extract(s: &[Column], group_index: usize) -> PolarsResult { let ca = s[0].str()?; let pat = s[1].str()?; - ca.extract(pat, group_index).map(|ca| ca.into_series()) + ca.extract(pat, group_index).map(|ca| ca.into_column()) } #[cfg(feature = "extract_groups")] /// Extract all capture groups from a regex pattern as a struct -pub(super) fn extract_groups(s: &Series, pat: &str, dtype: &DataType) -> PolarsResult { +pub(super) fn extract_groups(s: &Column, pat: &str, dtype: &DataType) -> PolarsResult { let ca = s.str()?; - ca.extract_groups(pat, dtype) + ca.extract_groups(pat, dtype).map(Column::from) } #[cfg(feature = "string_pad")] -pub(super) fn pad_start(s: &Series, length: usize, fill_char: char) -> PolarsResult { +pub(super) fn pad_start(s: &Column, length: usize, fill_char: char) -> PolarsResult { let ca = s.str()?; - Ok(ca.pad_start(length, fill_char).into_series()) + Ok(ca.pad_start(length, fill_char).into_column()) } #[cfg(feature = "string_pad")] -pub(super) fn pad_end(s: &Series, length: usize, fill_char: char) -> PolarsResult { +pub(super) fn pad_end(s: &Column, length: usize, fill_char: char) -> PolarsResult { let ca = s.str()?; - Ok(ca.pad_end(length, fill_char).into_series()) + Ok(ca.pad_end(length, fill_char).into_column()) } #[cfg(feature = "string_pad")] -pub(super) fn zfill(s: &[Series]) -> PolarsResult { +pub(super) fn zfill(s: &[Column]) -> PolarsResult { let ca = s[0].str()?; let length_s = s[1].strict_cast(&DataType::UInt64)?; let length = length_s.u64()?; - Ok(ca.zfill(length).into_series()) + Ok(ca.zfill(length).into_column()) } -pub(super) fn strip_chars(s: &[Series]) -> PolarsResult { +pub(super) fn strip_chars(s: &[Column]) -> PolarsResult { let ca = s[0].str()?; let pat_s = &s[1]; - ca.strip_chars(pat_s).map(|ok| ok.into_series()) + ca.strip_chars(pat_s).map(|ok| ok.into_column()) } -pub(super) fn strip_chars_start(s: &[Series]) -> PolarsResult { +pub(super) fn strip_chars_start(s: &[Column]) -> PolarsResult { let ca = s[0].str()?; let pat_s = &s[1]; - ca.strip_chars_start(pat_s).map(|ok| ok.into_series()) + ca.strip_chars_start(pat_s).map(|ok| ok.into_column()) } -pub(super) fn strip_chars_end(s: &[Series]) -> PolarsResult { +pub(super) fn strip_chars_end(s: &[Column]) -> PolarsResult { let ca = s[0].str()?; let pat_s = &s[1]; - ca.strip_chars_end(pat_s).map(|ok| ok.into_series()) + ca.strip_chars_end(pat_s).map(|ok| ok.into_column()) } -pub(super) fn strip_prefix(s: &[Series]) -> PolarsResult { +pub(super) fn strip_prefix(s: &[Column]) -> PolarsResult { let ca = s[0].str()?; let prefix = s[1].str()?; - Ok(ca.strip_prefix(prefix).into_series()) + Ok(ca.strip_prefix(prefix).into_column()) } -pub(super) fn strip_suffix(s: &[Series]) -> PolarsResult { +pub(super) fn strip_suffix(s: &[Column]) -> PolarsResult { let ca = s[0].str()?; let suffix = s[1].str()?; - Ok(ca.strip_suffix(suffix).into_series()) + Ok(ca.strip_suffix(suffix).into_column()) } -pub(super) fn extract_all(args: &[Series]) -> PolarsResult { +pub(super) fn extract_all(args: &[Column]) -> PolarsResult { let s = &args[0]; let pat = &args[1]; @@ -573,20 +573,20 @@ pub(super) fn extract_all(args: &[Series]) -> PolarsResult { if pat.len() == 1 { if let Some(pat) = pat.get(0) { - ca.extract_all(pat).map(|ca| ca.into_series()) + ca.extract_all(pat).map(|ca| ca.into_column()) } else { - Ok(Series::full_null( + Ok(Column::full_null( ca.name().clone(), ca.len(), &DataType::List(Box::new(DataType::String)), )) } } else { - ca.extract_all_many(pat).map(|ca| ca.into_series()) + ca.extract_all_many(pat).map(|ca| ca.into_column()) } } -pub(super) fn count_matches(args: &[Series], literal: bool) -> PolarsResult { +pub(super) fn count_matches(args: &[Column], literal: bool) -> PolarsResult { let s = &args[0]; let pat = &args[1]; @@ -594,9 +594,9 @@ pub(super) fn count_matches(args: &[Series], literal: bool) -> PolarsResult PolarsResult PolarsResult { +) -> PolarsResult { match dtype { #[cfg(feature = "dtype-date")] DataType::Date => to_date(&s[0], options), @@ -628,62 +628,62 @@ pub(super) fn strptime( } #[cfg(feature = "dtype-struct")] -pub(super) fn split_exact(s: &[Series], n: usize, inclusive: bool) -> PolarsResult { +pub(super) fn split_exact(s: &[Column], n: usize, inclusive: bool) -> PolarsResult { let ca = s[0].str()?; let by = s[1].str()?; if inclusive { - ca.split_exact_inclusive(by, n).map(|ca| ca.into_series()) + ca.split_exact_inclusive(by, n).map(|ca| ca.into_column()) } else { - ca.split_exact(by, n).map(|ca| ca.into_series()) + ca.split_exact(by, n).map(|ca| ca.into_column()) } } #[cfg(feature = "dtype-struct")] -pub(super) fn splitn(s: &[Series], n: usize) -> PolarsResult { +pub(super) fn splitn(s: &[Column], n: usize) -> PolarsResult { let ca = s[0].str()?; let by = s[1].str()?; - ca.splitn(by, n).map(|ca| ca.into_series()) + ca.splitn(by, n).map(|ca| ca.into_column()) } -pub(super) fn split(s: &[Series], inclusive: bool) -> PolarsResult { +pub(super) fn split(s: &[Column], inclusive: bool) -> PolarsResult { let ca = s[0].str()?; let by = s[1].str()?; if inclusive { - Ok(ca.split_inclusive(by).into_series()) + Ok(ca.split_inclusive(by).into_column()) } else { - Ok(ca.split(by).into_series()) + Ok(ca.split(by).into_column()) } } #[cfg(feature = "dtype-date")] -fn to_date(s: &Series, options: &StrptimeOptions) -> PolarsResult { +fn to_date(s: &Column, options: &StrptimeOptions) -> PolarsResult { let ca = s.str()?; let out = { if options.exact { ca.as_date(options.format.as_deref(), options.cache)? - .into_series() + .into_column() } else { ca.as_date_not_exact(options.format.as_deref())? - .into_series() + .into_column() } }; if options.strict && ca.null_count() != out.null_count() { - handle_casting_failures(s, &out)?; + handle_casting_failures(s.as_materialized_series(), out.as_materialized_series())?; } - Ok(out.into_series()) + Ok(out.into_column()) } #[cfg(feature = "dtype-datetime")] fn to_datetime( - s: &[Series], + s: &[Column], time_unit: &TimeUnit, time_zone: Option<&TimeZone>, options: &StrptimeOptions, -) -> PolarsResult { +) -> PolarsResult { let datetime_strings = &s[0].str()?; let ambiguous = &s[1].str()?; let tz_aware = match &options.format { @@ -705,7 +705,7 @@ fn to_datetime( time_zone, ambiguous, )? - .into_series() + .into_column() } else { datetime_strings .as_datetime_not_exact( @@ -715,17 +715,17 @@ fn to_datetime( time_zone, ambiguous, )? - .into_series() + .into_column() }; if options.strict && datetime_strings.null_count() != out.null_count() { - handle_casting_failures(&s[0], &out)?; + handle_casting_failures(s[0].as_materialized_series(), out.as_materialized_series())?; } - Ok(out.into_series()) + Ok(out.into_column()) } #[cfg(feature = "dtype-time")] -fn to_time(s: &Series, options: &StrptimeOptions) -> PolarsResult { +fn to_time(s: &Column, options: &StrptimeOptions) -> PolarsResult { polars_ensure!( options.exact, ComputeError: "non-exact not implemented for Time data type" ); @@ -733,33 +733,33 @@ fn to_time(s: &Series, options: &StrptimeOptions) -> PolarsResult { let ca = s.str()?; let out = ca .as_time(options.format.as_deref(), options.cache)? - .into_series(); + .into_column(); if options.strict && ca.null_count() != out.null_count() { - handle_casting_failures(s, &out)?; + handle_casting_failures(s.as_materialized_series(), out.as_materialized_series())?; } - Ok(out.into_series()) + Ok(out.into_column()) } #[cfg(feature = "concat_str")] -pub(super) fn join(s: &Series, delimiter: &str, ignore_nulls: bool) -> PolarsResult { +pub(super) fn join(s: &Column, delimiter: &str, ignore_nulls: bool) -> PolarsResult { let str_s = s.cast(&DataType::String)?; let joined = polars_ops::chunked_array::str_join(str_s.str()?, delimiter, ignore_nulls); - Ok(joined.into_series()) + Ok(joined.into_column()) } #[cfg(feature = "concat_str")] pub(super) fn concat_hor( - series: &[Series], + series: &[Column], delimiter: &str, ignore_nulls: bool, -) -> PolarsResult { +) -> PolarsResult { let str_series: Vec<_> = series .iter() .map(|s| s.cast(&DataType::String)) .collect::>()?; let cas: Vec<_> = str_series.iter().map(|s| s.str().unwrap()).collect(); - Ok(polars_ops::chunked_array::hor_str_concat(&cas, delimiter, ignore_nulls)?.into_series()) + Ok(polars_ops::chunked_array::hor_str_concat(&cas, delimiter, ignore_nulls)?.into_column()) } impl From for FunctionExpr { @@ -906,7 +906,7 @@ fn replace_all<'a>( } #[cfg(feature = "regex")] -pub(super) fn replace(s: &[Series], literal: bool, n: i64) -> PolarsResult { +pub(super) fn replace(s: &[Column], literal: bool, n: i64) -> PolarsResult { let column = &s[0]; let pat = &s[1]; let val = &s[2]; @@ -921,24 +921,24 @@ pub(super) fn replace(s: &[Series], literal: bool, n: i64) -> PolarsResult PolarsResult { +pub(super) fn reverse(s: &Column) -> PolarsResult { let ca = s.str()?; - Ok(ca.str_reverse().into_series()) + Ok(ca.str_reverse().into_column()) } #[cfg(feature = "string_to_integer")] -pub(super) fn to_integer(s: &[Series], strict: bool) -> PolarsResult { +pub(super) fn to_integer(s: &[Column], strict: bool) -> PolarsResult { let ca = s[0].str()?; let base = s[1].strict_cast(&DataType::UInt32)?; ca.to_integer(base.u32()?, strict) - .map(|ok| ok.into_series()) + .map(|ok| ok.into_column()) } -fn _ensure_lengths(s: &[Series]) -> bool { +fn _ensure_lengths(s: &[Column]) -> bool { // Calculate the post-broadcast length and ensure everything is consistent. let len = s .iter() @@ -950,7 +950,7 @@ fn _ensure_lengths(s: &[Series]) -> bool { .all(|series| series.len() == 1 || series.len() == len) } -pub(super) fn str_slice(s: &[Series]) -> PolarsResult { +pub(super) fn str_slice(s: &[Column]) -> PolarsResult { polars_ensure!( _ensure_lengths(s), ComputeError: "all series in `str_slice` should have equal or unit length", @@ -958,68 +958,68 @@ pub(super) fn str_slice(s: &[Series]) -> PolarsResult { let ca = s[0].str()?; let offset = &s[1]; let length = &s[2]; - Ok(ca.str_slice(offset, length)?.into_series()) + Ok(ca.str_slice(offset, length)?.into_column()) } -pub(super) fn str_head(s: &[Series]) -> PolarsResult { +pub(super) fn str_head(s: &[Column]) -> PolarsResult { polars_ensure!( _ensure_lengths(s), ComputeError: "all series in `str_head` should have equal or unit length", ); let ca = s[0].str()?; let n = &s[1]; - Ok(ca.str_head(n)?.into_series()) + Ok(ca.str_head(n)?.into_column()) } -pub(super) fn str_tail(s: &[Series]) -> PolarsResult { +pub(super) fn str_tail(s: &[Column]) -> PolarsResult { polars_ensure!( _ensure_lengths(s), ComputeError: "all series in `str_tail` should have equal or unit length", ); let ca = s[0].str()?; let n = &s[1]; - Ok(ca.str_tail(n)?.into_series()) + Ok(ca.str_tail(n)?.into_column()) } #[cfg(feature = "string_encoding")] -pub(super) fn hex_encode(s: &Series) -> PolarsResult { - Ok(s.str()?.hex_encode().into_series()) +pub(super) fn hex_encode(s: &Column) -> PolarsResult { + Ok(s.str()?.hex_encode().into_column()) } #[cfg(feature = "binary_encoding")] -pub(super) fn hex_decode(s: &Series, strict: bool) -> PolarsResult { - s.str()?.hex_decode(strict).map(|ca| ca.into_series()) +pub(super) fn hex_decode(s: &Column, strict: bool) -> PolarsResult { + s.str()?.hex_decode(strict).map(|ca| ca.into_column()) } #[cfg(feature = "string_encoding")] -pub(super) fn base64_encode(s: &Series) -> PolarsResult { - Ok(s.str()?.base64_encode().into_series()) +pub(super) fn base64_encode(s: &Column) -> PolarsResult { + Ok(s.str()?.base64_encode().into_column()) } #[cfg(feature = "binary_encoding")] -pub(super) fn base64_decode(s: &Series, strict: bool) -> PolarsResult { - s.str()?.base64_decode(strict).map(|ca| ca.into_series()) +pub(super) fn base64_decode(s: &Column, strict: bool) -> PolarsResult { + s.str()?.base64_decode(strict).map(|ca| ca.into_column()) } #[cfg(feature = "dtype-decimal")] -pub(super) fn to_decimal(s: &Series, infer_len: usize) -> PolarsResult { +pub(super) fn to_decimal(s: &Column, infer_len: usize) -> PolarsResult { let ca = s.str()?; - ca.to_decimal(infer_len) + ca.to_decimal(infer_len).map(Column::from) } #[cfg(feature = "extract_jsonpath")] pub(super) fn json_decode( - s: &Series, + s: &Column, dtype: Option, infer_schema_len: Option, -) -> PolarsResult { +) -> PolarsResult { let ca = s.str()?; - ca.json_decode(dtype, infer_schema_len) + ca.json_decode(dtype, infer_schema_len).map(Column::from) } #[cfg(feature = "extract_jsonpath")] -pub(super) fn json_path_match(s: &[Series]) -> PolarsResult { +pub(super) fn json_path_match(s: &[Column]) -> PolarsResult { let ca = s[0].str()?; let pat = s[1].str()?; - Ok(ca.json_path_match(pat)?.into_series()) + Ok(ca.json_path_match(pat)?.into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/struct_.rs b/crates/polars-plan/src/dsl/function_expr/struct_.rs index 98753314c6e2..acc8020b8e7e 100644 --- a/crates/polars-plan/src/dsl/function_expr/struct_.rs +++ b/crates/polars-plan/src/dsl/function_expr/struct_.rs @@ -142,7 +142,7 @@ impl Display for StructFunction { } } -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: StructFunction) -> Self { use StructFunction::*; match func { @@ -159,12 +159,12 @@ impl From for SpecialEq> { } } -pub(super) fn get_by_name(s: &Series, name: &str) -> PolarsResult { +pub(super) fn get_by_name(s: &Column, name: &str) -> PolarsResult { let ca = s.struct_()?; - ca.field_by_name(name) + ca.field_by_name(name).map(Column::from) } -pub(super) fn rename_fields(s: &Series, names: Arc<[PlSmallStr]>) -> PolarsResult { +pub(super) fn rename_fields(s: &Column, names: Arc<[PlSmallStr]>) -> PolarsResult { let ca = s.struct_()?; let fields = ca .fields_as_series() @@ -176,12 +176,12 @@ pub(super) fn rename_fields(s: &Series, names: Arc<[PlSmallStr]>) -> PolarsResul s }) .collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?; out.zip_outer_validity(ca); - Ok(out.into_series()) + Ok(out.into_column()) } -pub(super) fn prefix_fields(s: &Series, prefix: &str) -> PolarsResult { +pub(super) fn prefix_fields(s: &Column, prefix: &str) -> PolarsResult { let ca = s.struct_()?; let fields = ca .fields_as_series() @@ -193,12 +193,12 @@ pub(super) fn prefix_fields(s: &Series, prefix: &str) -> PolarsResult { s }) .collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?; out.zip_outer_validity(ca); - Ok(out.into_series()) + Ok(out.into_column()) } -pub(super) fn suffix_fields(s: &Series, suffix: &str) -> PolarsResult { +pub(super) fn suffix_fields(s: &Column, suffix: &str) -> PolarsResult { let ca = s.struct_()?; let fields = ca .fields_as_series() @@ -210,13 +210,13 @@ pub(super) fn suffix_fields(s: &Series, suffix: &str) -> PolarsResult { s }) .collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?; out.zip_outer_validity(ca); - Ok(out.into_series()) + Ok(out.into_column()) } #[cfg(feature = "json")] -pub(super) fn to_json(s: &Series) -> PolarsResult { +pub(super) fn to_json(s: &Column) -> PolarsResult { let ca = s.struct_()?; let dtype = ca.dtype().to_arrow(CompatLevel::newest()); @@ -225,10 +225,10 @@ pub(super) fn to_json(s: &Series) -> PolarsResult { polars_json::json::write::serialize_to_utf8(arr.as_ref()) }); - Ok(StringChunked::from_chunk_iter(ca.name().clone(), iter).into_series()) + Ok(StringChunked::from_chunk_iter(ca.name().clone(), iter).into_column()) } -pub(super) fn with_fields(args: &[Series]) -> PolarsResult { +pub(super) fn with_fields(args: &[Column]) -> PolarsResult { let s = &args[0]; let ca = s.struct_()?; @@ -241,11 +241,11 @@ pub(super) fn with_fields(args: &[Series]) -> PolarsResult { } for field in &args[1..] { - fields.insert(field.name(), field); + fields.insert(field.name(), field.as_materialized_series()); } let new_fields = fields.into_values().cloned().collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &new_fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), new_fields.iter())?; out.zip_outer_validity(ca); - Ok(out.into_series()) + Ok(out.into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/temporal.rs b/crates/polars-plan/src/dsl/function_expr/temporal.rs index 18340a00adaf..dcb5005ae4b1 100644 --- a/crates/polars-plan/src/dsl/function_expr/temporal.rs +++ b/crates/polars-plan/src/dsl/function_expr/temporal.rs @@ -1,7 +1,7 @@ use super::*; use crate::{map, map_as_slice}; -impl From for SpecialEq> { +impl From for SpecialEq> { fn from(func: TemporalFunction) -> Self { use TemporalFunction::*; match func { @@ -71,10 +71,10 @@ impl From for SpecialEq> { } pub(super) fn datetime( - s: &[Series], + s: &[Column], time_unit: &TimeUnit, time_zone: Option<&str>, -) -> PolarsResult { +) -> PolarsResult { use polars_core::export::chrono::NaiveDate; use polars_core::utils::CustomIterTools; @@ -177,12 +177,12 @@ pub(super) fn datetime( }, }; - let mut s = ca.into_series(); + let mut s = ca.into_column(); s.rename(PlSmallStr::from_static("datetime")); Ok(s) } -pub(super) fn combine(s: &[Series], tu: TimeUnit) -> PolarsResult { +pub(super) fn combine(s: &[Column], tu: TimeUnit) -> PolarsResult { let date = &s[0]; let time = &s[1]; @@ -207,7 +207,7 @@ pub(super) fn combine(s: &[Series], tu: TimeUnit) -> PolarsResult { &StringChunked::from_iter(std::iter::once("raise")), NonExistent::Raise, )? - .into()), + .into_column()), _ => result_naive, } } diff --git a/crates/polars-plan/src/dsl/function_expr/trigonometry.rs b/crates/polars-plan/src/dsl/function_expr/trigonometry.rs index 45c989e16dcb..c0d83822aef9 100644 --- a/crates/polars-plan/src/dsl/function_expr/trigonometry.rs +++ b/crates/polars-plan/src/dsl/function_expr/trigonometry.rs @@ -48,9 +48,9 @@ impl Display for TrigonometricFunction { } pub(super) fn apply_trigonometric_function( - s: &Series, + s: &Column, trig_function: TrigonometricFunction, -) -> PolarsResult { +) -> PolarsResult { use DataType::*; match s.dtype() { Float32 => { @@ -69,7 +69,7 @@ pub(super) fn apply_trigonometric_function( } } -pub(super) fn apply_arctan2(s: &mut [Series]) -> PolarsResult> { +pub(super) fn apply_arctan2(s: &mut [Column]) -> PolarsResult> { let y = &s[0]; let x = &s[1]; @@ -77,8 +77,8 @@ pub(super) fn apply_arctan2(s: &mut [Series]) -> PolarsResult> { let x_len = x.len(); match (y_len, x_len) { - (1, _) | (_, 1) => arctan2_on_series(y, x), - (len_a, len_b) if len_a == len_b => arctan2_on_series(y, x), + (1, _) | (_, 1) => arctan2_on_columns(y, x), + (len_a, len_b) if len_a == len_b => arctan2_on_columns(y, x), _ => polars_bail!( ComputeError: "y shape: {} in `arctan2` expression does not match that of x: {}", @@ -87,7 +87,7 @@ pub(super) fn apply_arctan2(s: &mut [Series]) -> PolarsResult> { } } -fn arctan2_on_series(y: &Series, x: &Series) -> PolarsResult> { +fn arctan2_on_columns(y: &Column, x: &Column) -> PolarsResult> { use DataType::*; match y.dtype() { Float32 => { @@ -100,36 +100,38 @@ fn arctan2_on_series(y: &Series, x: &Series) -> PolarsResult> { }, _ => { let y = y.cast(&DataType::Float64)?; - arctan2_on_series(&y, x) + arctan2_on_columns(&y, x) }, } } -fn arctan2_on_floats(y: &ChunkedArray, x: &Series) -> PolarsResult> +fn arctan2_on_floats(y: &ChunkedArray, x: &Column) -> PolarsResult> where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { let dtype = T::get_dtype(); let x = x.cast(&dtype)?; - let x = y.unpack_series_matching_type(&x).unwrap(); + let x = y + .unpack_series_matching_type(x.as_materialized_series()) + .unwrap(); if x.len() == 1 { let x_value = x .get(0) .ok_or_else(|| polars_err!(ComputeError: "arctan2 x value is null"))?; - Ok(Some(y.apply_values(|v| v.atan2(x_value)).into_series())) + Ok(Some(y.apply_values(|v| v.atan2(x_value)).into_column())) } else if y.len() == 1 { let y_value = y .get(0) .ok_or_else(|| polars_err!(ComputeError: "arctan2 y value is null"))?; - Ok(Some(x.apply_values(|v| y_value.atan2(v)).into_series())) + Ok(Some(x.apply_values(|v| y_value.atan2(v)).into_column())) } else { Ok(Some( - polars_core::prelude::arity::binary(y, x, atan2_kernel).into_series(), + polars_core::prelude::arity::binary(y, x, atan2_kernel).into_column(), )) } } @@ -137,11 +139,11 @@ where fn apply_trigonometric_function_to_float( ca: &ChunkedArray, trig_function: TrigonometricFunction, -) -> PolarsResult +) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { match trig_function { TrigonometricFunction::Cos => cos(ca), @@ -162,137 +164,137 @@ where } } -fn cos(ca: &ChunkedArray) -> PolarsResult +fn cos(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.cos()).into_series()) + Ok(ca.apply_values(|v| v.cos()).into_column()) } -fn cot(ca: &ChunkedArray) -> PolarsResult +fn cot(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.tan().powi(-1)).into_series()) + Ok(ca.apply_values(|v| v.tan().powi(-1)).into_column()) } -fn sin(ca: &ChunkedArray) -> PolarsResult +fn sin(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.sin()).into_series()) + Ok(ca.apply_values(|v| v.sin()).into_column()) } -fn tan(ca: &ChunkedArray) -> PolarsResult +fn tan(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.tan()).into_series()) + Ok(ca.apply_values(|v| v.tan()).into_column()) } -fn arccos(ca: &ChunkedArray) -> PolarsResult +fn arccos(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.acos()).into_series()) + Ok(ca.apply_values(|v| v.acos()).into_column()) } -fn arcsin(ca: &ChunkedArray) -> PolarsResult +fn arcsin(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.asin()).into_series()) + Ok(ca.apply_values(|v| v.asin()).into_column()) } -fn arctan(ca: &ChunkedArray) -> PolarsResult +fn arctan(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.atan()).into_series()) + Ok(ca.apply_values(|v| v.atan()).into_column()) } -fn cosh(ca: &ChunkedArray) -> PolarsResult +fn cosh(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.cosh()).into_series()) + Ok(ca.apply_values(|v| v.cosh()).into_column()) } -fn sinh(ca: &ChunkedArray) -> PolarsResult +fn sinh(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.sinh()).into_series()) + Ok(ca.apply_values(|v| v.sinh()).into_column()) } -fn tanh(ca: &ChunkedArray) -> PolarsResult +fn tanh(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.tanh()).into_series()) + Ok(ca.apply_values(|v| v.tanh()).into_column()) } -fn arccosh(ca: &ChunkedArray) -> PolarsResult +fn arccosh(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.acosh()).into_series()) + Ok(ca.apply_values(|v| v.acosh()).into_column()) } -fn arcsinh(ca: &ChunkedArray) -> PolarsResult +fn arcsinh(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.asinh()).into_series()) + Ok(ca.apply_values(|v| v.asinh()).into_column()) } -fn arctanh(ca: &ChunkedArray) -> PolarsResult +fn arctanh(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.atanh()).into_series()) + Ok(ca.apply_values(|v| v.atanh()).into_column()) } -fn degrees(ca: &ChunkedArray) -> PolarsResult +fn degrees(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.to_degrees()).into_series()) + Ok(ca.apply_values(|v| v.to_degrees()).into_column()) } -fn radians(ca: &ChunkedArray) -> PolarsResult +fn radians(ca: &ChunkedArray) -> PolarsResult where T: PolarsFloatType, T::Native: Float, - ChunkedArray: IntoSeries, + ChunkedArray: IntoColumn, { - Ok(ca.apply_values(|v| v.to_radians()).into_series()) + Ok(ca.apply_values(|v| v.to_radians()).into_column()) } diff --git a/crates/polars-plan/src/dsl/function_expr/unique.rs b/crates/polars-plan/src/dsl/function_expr/unique.rs index 68b665056444..c9f22a841f37 100644 --- a/crates/polars-plan/src/dsl/function_expr/unique.rs +++ b/crates/polars-plan/src/dsl/function_expr/unique.rs @@ -1,6 +1,6 @@ use super::*; -pub(super) fn unique(s: &Series, stable: bool) -> PolarsResult { +pub(super) fn unique(s: &Column, stable: bool) -> PolarsResult { if stable { s.unique_stable() } else { diff --git a/crates/polars-plan/src/dsl/functions/arity.rs b/crates/polars-plan/src/dsl/functions/arity.rs index 9e4c2ac73354..e3fb7b884885 100644 --- a/crates/polars-plan/src/dsl/functions/arity.rs +++ b/crates/polars-plan/src/dsl/functions/arity.rs @@ -2,9 +2,9 @@ use super::*; macro_rules! prepare_binary_function { ($f:ident) => { - move |s: &mut [Series]| { - let s0 = std::mem::take(&mut s[0]); - let s1 = std::mem::take(&mut s[1]); + move |c: &mut [Column]| { + let s0 = std::mem::take(&mut c[0]); + let s1 = std::mem::take(&mut c[1]); $f(s0, s1) } @@ -16,7 +16,7 @@ macro_rules! prepare_binary_function { /// The closure takes two arguments, each a [`Series`]. `output_type` must be the output dtype of the resulting [`Series`]. pub fn map_binary(a: Expr, b: Expr, f: F, output_type: GetOutput) -> Expr where - F: 'static + Fn(Series, Series) -> PolarsResult> + Send + Sync, + F: 'static + Fn(Column, Column) -> PolarsResult> + Send + Sync, { let function = prepare_binary_function!(f); a.map_many(function, &[b], output_type) @@ -27,7 +27,7 @@ where /// See [`Expr::apply`] for the difference between [`map`](Expr::map) and [`apply`](Expr::apply). pub fn apply_binary(a: Expr, b: Expr, f: F, output_type: GetOutput) -> Expr where - F: 'static + Fn(Series, Series) -> PolarsResult> + Send + Sync, + F: 'static + Fn(Column, Column) -> PolarsResult> + Send + Sync, { let function = prepare_binary_function!(f); a.apply_many(function, &[b], output_type) diff --git a/crates/polars-plan/src/dsl/functions/horizontal.rs b/crates/polars-plan/src/dsl/functions/horizontal.rs index eb0c79b3b0f7..b792c98956b3 100644 --- a/crates/polars-plan/src/dsl/functions/horizontal.rs +++ b/crates/polars-plan/src/dsl/functions/horizontal.rs @@ -22,23 +22,23 @@ fn cum_fold_dtype() -> GetOutput { /// Accumulate over multiple columns horizontally / row wise. pub fn fold_exprs(acc: Expr, f: F, exprs: E) -> Expr where - F: 'static + Fn(Series, Series) -> PolarsResult> + Send + Sync + Clone, + F: 'static + Fn(Column, Column) -> PolarsResult> + Send + Sync + Clone, E: AsRef<[Expr]>, { let mut exprs = exprs.as_ref().to_vec(); exprs.push(acc); - let function = SpecialEq::new(Arc::new(move |series: &mut [Series]| { - let mut series = series.to_vec(); - let mut acc = series.pop().unwrap(); + let function = SpecialEq::new(Arc::new(move |columns: &mut [Column]| { + let mut columns = columns.to_vec(); + let mut acc = columns.pop().unwrap(); - for s in series { - if let Some(a) = f(acc.clone(), s)? { + for c in columns { + if let Some(a) = f(acc.clone(), c)? { acc = a } } Ok(Some(acc)) - }) as Arc); + }) as Arc); Expr::AnonymousFunction { input: exprs, @@ -62,20 +62,20 @@ where /// `collect` is called. pub fn reduce_exprs(f: F, exprs: E) -> Expr where - F: 'static + Fn(Series, Series) -> PolarsResult> + Send + Sync + Clone, + F: 'static + Fn(Column, Column) -> PolarsResult> + Send + Sync + Clone, E: AsRef<[Expr]>, { let exprs = exprs.as_ref().to_vec(); - let function = SpecialEq::new(Arc::new(move |series: &mut [Series]| { - let mut s_iter = series.iter(); + let function = SpecialEq::new(Arc::new(move |columns: &mut [Column]| { + let mut c_iter = columns.iter(); - match s_iter.next() { + match c_iter.next() { Some(acc) => { let mut acc = acc.clone(); - for s in s_iter { - if let Some(a) = f(acc.clone(), s.clone())? { + for c in c_iter { + if let Some(a) = f(acc.clone(), c.clone())? { acc = a } } @@ -83,7 +83,7 @@ where }, None => Err(polars_err!(ComputeError: "`reduce` did not have any expressions to fold")), } - }) as Arc); + }) as Arc); Expr::AnonymousFunction { input: exprs, @@ -104,34 +104,34 @@ where #[cfg(feature = "dtype-struct")] pub fn cum_reduce_exprs(f: F, exprs: E) -> Expr where - F: 'static + Fn(Series, Series) -> PolarsResult> + Send + Sync + Clone, + F: 'static + Fn(Column, Column) -> PolarsResult> + Send + Sync + Clone, E: AsRef<[Expr]>, { let exprs = exprs.as_ref().to_vec(); - let function = SpecialEq::new(Arc::new(move |series: &mut [Series]| { - let mut s_iter = series.iter(); + let function = SpecialEq::new(Arc::new(move |columns: &mut [Column]| { + let mut c_iter = columns.iter(); - match s_iter.next() { + match c_iter.next() { Some(acc) => { let mut acc = acc.clone(); let mut result = vec![acc.clone()]; - for s in s_iter { - let name = s.name().clone(); - if let Some(a) = f(acc.clone(), s.clone())? { + for c in c_iter { + let name = c.name().clone(); + if let Some(a) = f(acc.clone(), c.clone())? { acc = a; } acc.rename(name); result.push(acc.clone()); } - StructChunked::from_series(acc.name().clone(), &result) - .map(|ca| Some(ca.into_series())) + StructChunked::from_columns(acc.name().clone(), &result) + .map(|ca| Some(ca.into_column())) }, None => Err(polars_err!(ComputeError: "`reduce` did not have any expressions to fold")), } - }) as Arc); + }) as Arc); Expr::AnonymousFunction { input: exprs, @@ -152,32 +152,32 @@ where #[cfg(feature = "dtype-struct")] pub fn cum_fold_exprs(acc: Expr, f: F, exprs: E, include_init: bool) -> Expr where - F: 'static + Fn(Series, Series) -> PolarsResult> + Send + Sync + Clone, + F: 'static + Fn(Column, Column) -> PolarsResult> + Send + Sync + Clone, E: AsRef<[Expr]>, { let mut exprs = exprs.as_ref().to_vec(); exprs.push(acc); - let function = SpecialEq::new(Arc::new(move |series: &mut [Series]| { - let mut series = series.to_vec(); - let mut acc = series.pop().unwrap(); + let function = SpecialEq::new(Arc::new(move |columns: &mut [Column]| { + let mut columns = columns.to_vec(); + let mut acc = columns.pop().unwrap(); let mut result = vec![]; if include_init { result.push(acc.clone()) } - for s in series { - let name = s.name().clone(); - if let Some(a) = f(acc.clone(), s)? { + for c in columns { + let name = c.name().clone(); + if let Some(a) = f(acc.clone(), c)? { acc = a; acc.rename(name); result.push(acc.clone()); } } - StructChunked::from_series(acc.name().clone(), &result).map(|ca| Some(ca.into_series())) - }) as Arc); + StructChunked::from_columns(acc.name().clone(), &result).map(|ca| Some(ca.into_column())) + }) as Arc); Expr::AnonymousFunction { input: exprs, diff --git a/crates/polars-plan/src/dsl/functions/repeat.rs b/crates/polars-plan/src/dsl/functions/repeat.rs index 5c3084fb7caf..21d27a542e99 100644 --- a/crates/polars-plan/src/dsl/functions/repeat.rs +++ b/crates/polars-plan/src/dsl/functions/repeat.rs @@ -5,7 +5,7 @@ use super::*; /// Generally you won't need this function, as `lit(value)` already represents a column containing /// only `value` whose length is automatically set to the correct number of rows. pub fn repeat>(value: E, n: Expr) -> Expr { - let function = |s: Series, n: Series| { + let function = |s: Column, n: Column| { polars_ensure!( n.dtype().is_integer(), SchemaMismatch: "expected expression of dtype 'integer', got '{}'", n.dtype() diff --git a/crates/polars-plan/src/dsl/list.rs b/crates/polars-plan/src/dsl/list.rs index 11e825a7ec1f..fb0c7a83b463 100644 --- a/crates/polars-plan/src/dsl/list.rs +++ b/crates/polars-plan/src/dsl/list.rs @@ -295,7 +295,7 @@ impl ListNameSpace { move |s| { s.list()? .to_struct(n_fields, name_generator.clone()) - .map(|s| Some(s.into_series())) + .map(|s| Some(s.into_column())) }, // we don't yet know the fields GetOutput::map_dtype(move |dt: &DataType| { diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 895020ce43f5..786867f32e14 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -323,10 +323,10 @@ impl Expr { }; self.function_with_options( - move |s: Series| { - Ok(Some(Series::new( - s.name().clone(), - &[s.arg_min().map(|idx| idx as u32)], + move |c: Column| { + Ok(Some(Column::new( + c.name().clone(), + &[c.as_materialized_series().arg_min().map(|idx| idx as u32)], ))) }, GetOutput::from_type(IDX_DTYPE), @@ -344,10 +344,12 @@ impl Expr { }; self.function_with_options( - move |s: Series| { - Ok(Some(Series::new( - s.name().clone(), - &[s.arg_max().map(|idx| idx as IdxSize)], + move |c: Column| { + Ok(Some(Column::new( + c.name().clone(), + &[c.as_materialized_series() + .arg_max() + .map(|idx| idx as IdxSize)], ))) }, GetOutput::from_type(IDX_DTYPE), @@ -364,7 +366,13 @@ impl Expr { }; self.function_with_options( - move |s: Series| Ok(Some(s.arg_sort(sort_options).into_series())), + move |c: Column| { + Ok(Some( + c.as_materialized_series() + .arg_sort(sort_options) + .into_column(), + )) + }, GetOutput::from_type(IDX_DTYPE), options, ) @@ -535,9 +543,9 @@ impl Expr { /// the correct output_type. If None given the output type of the input expr is used. pub fn map(self, function: F, output_type: GetOutput) -> Self where - F: Fn(Series) -> PolarsResult> + 'static + Send + Sync, + F: Fn(Column) -> PolarsResult> + 'static + Send + Sync, { - let f = move |s: &mut [Series]| function(std::mem::take(&mut s[0])); + let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0])); Expr::AnonymousFunction { input: vec![self], @@ -568,7 +576,7 @@ impl Expr { /// See the [`Expr::map`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply). pub fn map_many(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self where - F: Fn(&mut [Series]) -> PolarsResult> + 'static + Send + Sync, + F: Fn(&mut [Column]) -> PolarsResult> + 'static + Send + Sync, { let mut input = vec![self]; input.extend_from_slice(arguments); @@ -594,9 +602,9 @@ impl Expr { /// * `map_list` should be used when the function expects a list aggregated series. pub fn map_list(self, function: F, output_type: GetOutput) -> Self where - F: Fn(Series) -> PolarsResult> + 'static + Send + Sync, + F: Fn(Column) -> PolarsResult> + 'static + Send + Sync, { - let f = move |s: &mut [Series]| function(std::mem::take(&mut s[0])); + let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0])); Expr::AnonymousFunction { input: vec![self], @@ -618,9 +626,9 @@ impl Expr { options: FunctionOptions, ) -> Self where - F: Fn(Series) -> PolarsResult> + 'static + Send + Sync, + F: Fn(Column) -> PolarsResult> + 'static + Send + Sync, { - let f = move |s: &mut [Series]| function(std::mem::take(&mut s[0])); + let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0])); Expr::AnonymousFunction { input: vec![self], @@ -641,9 +649,9 @@ impl Expr { /// * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc. pub fn apply(self, function: F, output_type: GetOutput) -> Self where - F: Fn(Series) -> PolarsResult> + 'static + Send + Sync, + F: Fn(Column) -> PolarsResult> + 'static + Send + Sync, { - let f = move |s: &mut [Series]| function(std::mem::take(&mut s[0])); + let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0])); Expr::AnonymousFunction { input: vec![self], @@ -673,7 +681,7 @@ impl Expr { /// See the [`Expr::apply`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply). pub fn apply_many(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self where - F: Fn(&mut [Series]) -> PolarsResult> + 'static + Send + Sync, + F: Fn(&mut [Column]) -> PolarsResult> + 'static + Send + Sync, { let mut input = vec![self]; input.extend_from_slice(arguments); @@ -829,8 +837,12 @@ impl Expr { }; self.function_with_options( - move |s: Series| { - Some(s.product().map(|sc| sc.into_series(s.name().clone()))).transpose() + move |c: Column| { + Some( + c.product() + .map(|sc| sc.into_series(c.name().clone()).into_column()), + ) + .transpose() }, GetOutput::map_dtype(|dt| { use DataType as T; @@ -1463,7 +1475,12 @@ impl Expr { options: RollingOptionsFixedWindow, ) -> Expr { self.apply( - move |s| s.rolling_map(f.as_ref(), options.clone()).map(Some), + move |c: Column| { + c.as_materialized_series() + .rolling_map(f.as_ref(), options.clone()) + .map(Column::from) + .map(Some) + }, output_type, ) .with_fmt("rolling_map") @@ -1478,22 +1495,22 @@ impl Expr { F: 'static + FnMut(&mut Float64Chunked) -> Option + Send + Sync + Copy, { self.apply( - move |s| { - let out = match s.dtype() { - DataType::Float64 => s + move |c: Column| { + let out = match c.dtype() { + DataType::Float64 => c .f64() .unwrap() .rolling_map_float(window_size, f) - .map(|ca| ca.into_series()), - _ => s + .map(|ca| ca.into_column()), + _ => c .cast(&DataType::Float64)? .f64() .unwrap() .rolling_map_float(window_size, f) - .map(|ca| ca.into_series()), + .map(|ca| ca.into_column()), }?; - if let DataType::Float32 = s.dtype() { - out.cast(&DataType::Float32).map(Some) + if let DataType::Float32 = c.dtype() { + out.cast(&DataType::Float32).map(Column::from).map(Some) } else { Ok(Some(out)) } @@ -1952,7 +1969,7 @@ impl Expr { /// the correct output_type. If None given the output type of the input expr is used. pub fn map_multiple(function: F, expr: E, output_type: GetOutput) -> Expr where - F: Fn(&mut [Series]) -> PolarsResult> + 'static + Send + Sync, + F: Fn(&mut [Column]) -> PolarsResult> + 'static + Send + Sync, E: AsRef<[Expr]>, { let input = expr.as_ref().to_vec(); @@ -1978,7 +1995,7 @@ where /// * `map_list_mul` should be used when the function expects a list aggregated series. pub fn map_list_multiple(function: F, expr: E, output_type: GetOutput) -> Expr where - F: Fn(&mut [Series]) -> PolarsResult> + 'static + Send + Sync, + F: Fn(&mut [Column]) -> PolarsResult> + 'static + Send + Sync, E: AsRef<[Expr]>, { let input = expr.as_ref().to_vec(); @@ -2012,7 +2029,7 @@ pub fn apply_multiple( returns_scalar: bool, ) -> Expr where - F: Fn(&mut [Series]) -> PolarsResult> + 'static + Send + Sync, + F: Fn(&mut [Column]) -> PolarsResult> + 'static + Send + Sync, E: AsRef<[Expr]>, { let input = expr.as_ref().to_vec(); diff --git a/crates/polars-plan/src/dsl/name.rs b/crates/polars-plan/src/dsl/name.rs index 70bbc830b3c0..1261b4430bec 100644 --- a/crates/polars-plan/src/dsl/name.rs +++ b/crates/polars-plan/src/dsl/name.rs @@ -76,9 +76,9 @@ impl ExprNameNameSpace { fd }) .collect::>(); - let mut out = StructChunked::from_series(s.name().clone(), &fields)?; + let mut out = StructChunked::from_series(s.name().clone(), fields.iter())?; out.zip_outer_validity(s); - Ok(Some(out.into_series())) + Ok(Some(out.into_column())) }, GetOutput::map_dtype(move |dt| match dt { DataType::Struct(fds) => { diff --git a/crates/polars-plan/src/dsl/python_udf.rs b/crates/polars-plan/src/dsl/python_udf.rs index b105f62df482..0f9ac4a3dc9a 100644 --- a/crates/polars-plan/src/dsl/python_udf.rs +++ b/crates/polars-plan/src/dsl/python_udf.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use polars_core::datatypes::{DataType, Field}; use polars_core::error::*; +use polars_core::frame::column::Column; use polars_core::frame::DataFrame; -use polars_core::prelude::Series; use polars_core::schema::Schema; use pyo3::prelude::*; use pyo3::pybacked::PyBackedBytes; @@ -19,8 +19,8 @@ use crate::constants::MAP_LIST_NAME; use crate::prelude::*; // Will be overwritten on Python Polars start up. -pub static mut CALL_SERIES_UDF_PYTHON: Option< - fn(s: Series, lambda: &PyObject) -> PolarsResult, +pub static mut CALL_COLUMNS_UDF_PYTHON: Option< + fn(s: Column, lambda: &PyObject) -> PolarsResult, > = None; pub static mut CALL_DF_UDF_PYTHON: Option< fn(s: DataFrame, lambda: &PyObject) -> PolarsResult, @@ -124,7 +124,7 @@ impl PythonUdfExpression { } #[cfg(feature = "serde")] - pub(crate) fn try_deserialize(buf: &[u8]) -> PolarsResult> { + pub(crate) fn try_deserialize(buf: &[u8]) -> PolarsResult> { debug_assert!(buf.starts_with(MAGIC_BYTE_MARK)); // skip header let buf = &buf[MAGIC_BYTE_MARK.len()..]; @@ -147,7 +147,7 @@ impl PythonUdfExpression { output_type, is_elementwise, returns_scalar, - )) as Arc) + )) as Arc) }) } } @@ -163,9 +163,9 @@ impl DataFrameUdf for PythonFunction { } } -impl SeriesUdf for PythonUdfExpression { - fn call_udf(&self, s: &mut [Series]) -> PolarsResult> { - let func = unsafe { CALL_SERIES_UDF_PYTHON.unwrap() }; +impl ColumnsUdf for PythonUdfExpression { + fn call_udf(&self, s: &mut [Column]) -> PolarsResult> { + let func = unsafe { CALL_COLUMNS_UDF_PYTHON.unwrap() }; let output_type = self .output_type diff --git a/crates/polars-plan/src/dsl/udf.rs b/crates/polars-plan/src/dsl/udf.rs index fe01cab03ea2..b09ef6f556a2 100644 --- a/crates/polars-plan/src/dsl/udf.rs +++ b/crates/polars-plan/src/dsl/udf.rs @@ -5,7 +5,7 @@ use polars_core::prelude::Field; use polars_core::schema::Schema; use polars_utils::pl_str::PlSmallStr; -use super::{Expr, GetOutput, SeriesUdf, SpecialEq}; +use super::{ColumnsUdf, Expr, GetOutput, SpecialEq}; use crate::prelude::{Context, FunctionOptions}; /// Represents a user-defined function @@ -18,7 +18,7 @@ pub struct UserDefinedFunction { /// The function output type. pub return_type: GetOutput, /// The function implementation. - pub fun: SpecialEq>, + pub fun: SpecialEq>, /// Options for the function. pub options: FunctionOptions, } @@ -40,7 +40,7 @@ impl UserDefinedFunction { name: PlSmallStr, input_fields: Vec, return_type: GetOutput, - fun: impl SeriesUdf + 'static, + fun: impl ColumnsUdf + 'static, ) -> Self { Self { name, diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs index 4be23e79df14..42bfff7cabab 100644 --- a/crates/polars-plan/src/plans/aexpr/mod.rs +++ b/crates/polars-plan/src/plans/aexpr/mod.rs @@ -174,7 +174,7 @@ pub enum AExpr { }, AnonymousFunction { input: Vec, - function: SpecialEq>, + function: SpecialEq>, output_type: GetOutput, options: FunctionOptions, }, diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs b/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs index ceb3d7dffd49..fc8f520e86ea 100644 --- a/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs +++ b/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs @@ -531,7 +531,7 @@ mod test { let optimizer = StackOptimizer {}; let rules: &mut [Box] = &mut [Box::new(TypeCoercionRule {})]; - let df = DataFrame::new(Vec::from([Series::new_empty( + let df = DataFrame::new(Vec::from([Column::new_empty( PlSmallStr::from_static("fruits"), &DataType::Categorical(None, Default::default()), )])) diff --git a/crates/polars-plan/src/plans/functions/count.rs b/crates/polars-plan/src/plans/functions/count.rs index 2b66907d6916..de2ac244ef29 100644 --- a/crates/polars-plan/src/plans/functions/count.rs +++ b/crates/polars-plan/src/plans/functions/count.rs @@ -82,7 +82,7 @@ pub fn count_rows( |_| polars_err!(ComputeError: "count of {} exceeded maximum row size", count), )?; let column_name = alias.unwrap_or(PlSmallStr::from_static(crate::constants::LEN)); - DataFrame::new(vec![Series::new(column_name, [count])]) + DataFrame::new(vec![Column::new(column_name, [count])]) } } diff --git a/crates/polars-plan/src/plans/functions/merge_sorted.rs b/crates/polars-plan/src/plans/functions/merge_sorted.rs index ffc9e1f04df6..605a628c3c88 100644 --- a/crates/polars-plan/src/plans/functions/merge_sorted.rs +++ b/crates/polars-plan/src/plans/functions/merge_sorted.rs @@ -11,9 +11,10 @@ pub(super) fn merge_sorted(df: &DataFrame, column: &str) -> PolarsResult>(), df.get_columns() @@ -21,9 +22,10 @@ pub(super) fn merge_sorted(df: &DataFrame, column: &str) -> PolarsResult>(), ) @@ -34,5 +36,11 @@ pub(super) fn merge_sorted(df: &DataFrame, column: &str) -> PolarsResult AnyValue::Date(*v), #[cfg(feature = "dtype-datetime")] - DateTime(v, tu, tz) => AnyValue::Datetime(*v, *tu, tz), + DateTime(v, tu, tz) => AnyValue::Datetime(*v, *tu, tz.as_ref()), #[cfg(feature = "dtype-time")] Time(v) => AnyValue::Time(*v), Series(_) => return None, @@ -311,7 +311,7 @@ impl TryFrom> for LiteralValue { #[cfg(feature = "dtype-date")] AnyValue::Date(v) => Ok(LiteralValue::Date(v)), #[cfg(feature = "dtype-datetime")] - AnyValue::Datetime(value, tu, tz) => Ok(LiteralValue::DateTime(value, tu, tz.clone())), + AnyValue::Datetime(value, tu, tz) => Ok(LiteralValue::DateTime(value, tu, tz.cloned())), #[cfg(feature = "dtype-duration")] AnyValue::Duration(value, tu) => Ok(LiteralValue::Duration(value, tu)), #[cfg(feature = "dtype-time")] diff --git a/crates/polars-plan/src/plans/python/pyarrow.rs b/crates/polars-plan/src/plans/python/pyarrow.rs index abd018b3a4e6..20b800fa81b1 100644 --- a/crates/polars-plan/src/plans/python/pyarrow.rs +++ b/crates/polars-plan/src/plans/python/pyarrow.rs @@ -49,7 +49,7 @@ pub fn predicate_to_pa( let s = if v { "True" } else { "False" }; write!(list_repr, "{},", s).unwrap(); } else if let AnyValue::Datetime(v, tu, tz) = av { - let dtm = to_py_datetime(v, &tu, tz.as_ref()); + let dtm = to_py_datetime(v, &tu, tz); write!(list_repr, "{dtm},").unwrap(); } else if let AnyValue::Date(v) = av { write!(list_repr, "to_py_date({v}),").unwrap(); @@ -83,7 +83,7 @@ pub fn predicate_to_pa( Some(format!("to_py_date({v})")) }, #[cfg(feature = "dtype-datetime")] - AnyValue::Datetime(v, tu, tz) => Some(to_py_datetime(v, &tu, tz.as_ref())), + AnyValue::Datetime(v, tu, tz) => Some(to_py_datetime(v, &tu, tz)), // Activate once pyarrow supports them // #[cfg(feature = "dtype-time")] // AnyValue::Time(v) => { diff --git a/crates/polars-python/src/conversion/any_value.rs b/crates/polars-python/src/conversion/any_value.rs index 3141d02799fb..70cfaaf6d3ab 100644 --- a/crates/polars-python/src/conversion/any_value.rs +++ b/crates/polars-python/src/conversion/any_value.rs @@ -5,7 +5,7 @@ use polars::chunked_array::object::PolarsObjectSafe; #[cfg(feature = "object")] use polars::datatypes::OwnedObject; use polars::datatypes::{DataType, Field, PlHashMap, TimeUnit}; -use polars::prelude::{AnyValue, PlSmallStr, Series}; +use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone}; use polars_core::export::chrono::{NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike}; use polars_core::utils::any_values_to_supertype_and_n_dtypes; use polars_core::utils::arrow::temporal_conversions::date32_to_date; @@ -65,28 +65,28 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject { }; s.into_py(py) }, + AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => { + let s = if arr.is_null() { + rev.get(idx) + } else { + unsafe { arr.deref_unchecked().value(idx as usize) } + }; + s.into_py(py) + }, AnyValue::Date(v) => { let date = date32_to_date(v); date.into_py(py) }, AnyValue::Datetime(v, time_unit, time_zone) => { - if let Some(time_zone) = time_zone { - // When https://github.com/pola-rs/polars/issues/16199 is - // implemented, we'll switch to something like: - // - // let tz: chrono_tz::Tz = time_zone.parse().unwrap(); - // let datetime = tz.from_local_datetime(&naive_datetime).earliest().unwrap(); - // datetime.into_py(py) - let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); - let time_unit = time_unit.to_ascii(); - convert - .call1((v, time_unit, time_zone.as_str())) - .unwrap() - .into_py(py) - } else { - timestamp_to_naive_datetime(v, time_unit).into_py(py) - } + datetime_to_py_object(py, utils, v, time_unit, time_zone) }, + AnyValue::DatetimeOwned(v, time_unit, time_zone) => datetime_to_py_object( + py, + utils, + v, + time_unit, + time_zone.as_ref().map(AsRef::as_ref), + ), AnyValue::Duration(v, time_unit) => { let time_delta = elapsed_offset_to_timedelta(v, time_unit); time_delta.into_py(py) @@ -127,6 +127,31 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject { } } +fn datetime_to_py_object( + py: Python, + utils: &Bound, + v: i64, + tu: TimeUnit, + tz: Option<&TimeZone>, +) -> PyObject { + if let Some(time_zone) = tz { + // When https://github.com/pola-rs/polars/issues/16199 is + // implemented, we'll switch to something like: + // + // let tz: chrono_tz::Tz = time_zone.parse().unwrap(); + // let datetime = tz.from_local_datetime(&naive_datetime).earliest().unwrap(); + // datetime.into_py(py) + let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); + let time_unit = tu.to_ascii(); + convert + .call1((v, time_unit, time_zone.as_str())) + .unwrap() + .into_py(py) + } else { + timestamp_to_naive_datetime(v, tu).into_py(py) + } +} + type TypeObjectPtr = usize; type InitFn = for<'py> fn(&Bound<'py, PyAny>, bool) -> PyResult>; pub(crate) static LUT: crate::gil_once_cell::GILOnceCell> = @@ -204,7 +229,7 @@ pub(crate) fn py_object_to_any_value<'py>( .call1((ob, intern!(py, "us"))) .unwrap(); let v = date.extract::()?; - Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, &None)) + Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, None)) }) } diff --git a/crates/polars-python/src/dataframe/general.rs b/crates/polars-python/src/dataframe/general.rs index 043564b20c99..ff635c08898b 100644 --- a/crates/polars-python/src/dataframe/general.rs +++ b/crates/polars-python/src/dataframe/general.rs @@ -27,6 +27,8 @@ impl PyDataFrame { #[new] pub fn __init__(columns: Vec) -> PyResult { let columns = columns.to_series(); + // @scalar-opt + let columns = columns.into_iter().map(|s| s.into()).collect(); let df = DataFrame::new(columns).map_err(PyPolarsErr::from)?; Ok(PyDataFrame::new(df)) } @@ -181,12 +183,16 @@ impl PyDataFrame { pub fn hstack(&self, columns: Vec) -> PyResult { let columns = columns.to_series(); + // @scalar-opt + let columns = columns.into_iter().map(Into::into).collect::>(); let df = self.df.hstack(&columns).map_err(PyPolarsErr::from)?; Ok(df.into()) } pub fn hstack_mut(&mut self, columns: Vec) -> PyResult<()> { let columns = columns.to_series(); + // @scalar-opt + let columns = columns.into_iter().map(Into::into).collect::>(); self.df.hstack_mut(&columns).map_err(PyPolarsErr::from)?; Ok(()) } @@ -208,6 +214,7 @@ impl PyDataFrame { pub fn drop_in_place(&mut self, name: &str) -> PyResult { let s = self.df.drop_in_place(name).map_err(PyPolarsErr::from)?; + let s = s.take_materialized_series(); Ok(PySeries { series: s }) } @@ -222,7 +229,7 @@ impl PyDataFrame { let s = index_adjusted.and_then(|i| df.select_at_idx(i)); match s { - Some(s) => Ok(PySeries::new(s.clone())), + Some(s) => Ok(PySeries::new(s.as_materialized_series().clone())), None => Err(PyIndexError::new_err( polars_err!(oob = index, df.width()).to_string(), )), @@ -240,7 +247,7 @@ impl PyDataFrame { let series = self .df .column(name) - .map(|s| PySeries::new(s.clone())) + .map(|s| PySeries::new(s.as_materialized_series().clone())) .map_err(PyPolarsErr::from)?; Ok(series) } diff --git a/crates/polars-python/src/expr/datetime.rs b/crates/polars-python/src/expr/datetime.rs index 69325b03a19f..31052e6189d4 100644 --- a/crates/polars-python/src/expr/datetime.rs +++ b/crates/polars-python/src/expr/datetime.rs @@ -33,8 +33,9 @@ impl PyExpr { .clone() .map( |s| { - s.timestamp(TimeUnit::Milliseconds) - .map(|ca| Some((ca / 1000).into_series())) + s.take_materialized_series() + .timestamp(TimeUnit::Milliseconds) + .map(|ca| Some((ca / 1000).into_column())) }, GetOutput::from_type(DataType::Int64), ) diff --git a/crates/polars-python/src/functions/lazy.rs b/crates/polars-python/src/functions/lazy.rs index 2d39bcdbdc09..d649b7be4cba 100644 --- a/crates/polars-python/src/functions/lazy.rs +++ b/crates/polars-python/src/functions/lazy.rs @@ -225,7 +225,14 @@ pub fn arctan2(y: PyExpr, x: PyExpr) -> PyExpr { pub fn cum_fold(acc: PyExpr, lambda: PyObject, exprs: Vec, include_init: bool) -> PyExpr { let exprs = exprs.to_exprs(); - let func = move |a: Series, b: Series| binary_lambda(&lambda, a, b); + let func = move |a: Column, b: Column| { + binary_lambda( + &lambda, + a.take_materialized_series(), + b.take_materialized_series(), + ) + .map(|v| v.map(Column::from)) + }; dsl::cum_fold_exprs(acc.inner, func, exprs, include_init).into() } @@ -233,7 +240,14 @@ pub fn cum_fold(acc: PyExpr, lambda: PyObject, exprs: Vec, include_init: pub fn cum_reduce(lambda: PyObject, exprs: Vec) -> PyExpr { let exprs = exprs.to_exprs(); - let func = move |a: Series, b: Series| binary_lambda(&lambda, a, b); + let func = move |a: Column, b: Column| { + binary_lambda( + &lambda, + a.take_materialized_series(), + b.take_materialized_series(), + ) + .map(|v| v.map(Column::from)) + }; dsl::cum_reduce_exprs(func, exprs).into() } @@ -394,7 +408,14 @@ pub fn first() -> PyExpr { pub fn fold(acc: PyExpr, lambda: PyObject, exprs: Vec) -> PyExpr { let exprs = exprs.to_exprs(); - let func = move |a: Series, b: Series| binary_lambda(&lambda, a, b); + let func = move |a: Column, b: Column| { + binary_lambda( + &lambda, + a.take_materialized_series(), + b.take_materialized_series(), + ) + .map(|v| v.map(Column::from)) + }; dsl::fold_exprs(acc.inner, func, exprs).into() } @@ -495,7 +516,14 @@ pub fn pearson_corr(a: PyExpr, b: PyExpr, ddof: u8) -> PyExpr { pub fn reduce(lambda: PyObject, exprs: Vec) -> PyExpr { let exprs = exprs.to_exprs(); - let func = move |a: Series, b: Series| binary_lambda(&lambda, a, b); + let func = move |a: Column, b: Column| { + binary_lambda( + &lambda, + a.take_materialized_series(), + b.take_materialized_series(), + ) + .map(|v| v.map(Column::from)) + }; dsl::reduce_exprs(func, exprs).into() } diff --git a/crates/polars-python/src/interop/arrow/to_py.rs b/crates/polars-python/src/interop/arrow/to_py.rs index de6c07ef31c9..017771bb1567 100644 --- a/crates/polars-python/src/interop/arrow/to_py.rs +++ b/crates/polars-python/src/interop/arrow/to_py.rs @@ -95,7 +95,11 @@ impl DataFrameStreamIterator { let dtype = ArrowDataType::Struct(schema.into_iter_values().collect()); Self { - columns: df.get_columns().to_vec(), + columns: df + .get_columns() + .iter() + .map(|v| v.as_materialized_series().clone()) + .collect(), dtype, idx: 0, n_chunks: df.n_chunks(), diff --git a/crates/polars-python/src/interop/arrow/to_rust.rs b/crates/polars-python/src/interop/arrow/to_rust.rs index 8d76f53b243a..809bd527a492 100644 --- a/crates/polars-python/src/interop/arrow/to_rust.rs +++ b/crates/polars-python/src/interop/arrow/to_rust.rs @@ -85,7 +85,8 @@ pub fn to_rust_df(rb: &[Bound]) -> PyResult { .enumerate() .map(|(i, arr)| { let s = Series::try_from((names[i].clone(), arr)) - .map_err(PyPolarsErr::from)?; + .map_err(PyPolarsErr::from)? + .into_column(); Ok(s) }) .collect::>>() @@ -95,8 +96,9 @@ pub fn to_rust_df(rb: &[Bound]) -> PyResult { .into_iter() .enumerate() .map(|(i, arr)| { - let s = - Series::try_from((names[i].clone(), arr)).map_err(PyPolarsErr::from)?; + let s = Series::try_from((names[i].clone(), arr)) + .map_err(PyPolarsErr::from)? + .into_column(); Ok(s) }) .collect::>>() diff --git a/crates/polars-python/src/interop/numpy/to_numpy_df.rs b/crates/polars-python/src/interop/numpy/to_numpy_df.rs index 2718203d46f3..c14753bdc7a3 100644 --- a/crates/polars-python/src/interop/numpy/to_numpy_df.rs +++ b/crates/polars-python/src/interop/numpy/to_numpy_df.rs @@ -113,7 +113,10 @@ fn check_df_dtypes_support_view(df: &DataFrame) -> Option<&DataType> { fn check_df_columns_contiguous(df: &DataFrame) -> bool { let columns = df.get_columns(); - if columns.iter().any(|s| s.n_chunks() > 1) { + if columns + .iter() + .any(|s| s.as_materialized_series().n_chunks() > 1) + { return false; } if columns.len() <= 1 { @@ -126,7 +129,7 @@ fn check_df_columns_contiguous(df: &DataFrame) -> bool { let slices = columns .iter() .map(|s| { - let ca: &ChunkedArray<$T> = s.unpack().unwrap(); + let ca: &ChunkedArray<$T> = s.as_materialized_series().unpack().unwrap(); ca.data_views().next().unwrap() }) .collect::>(); @@ -174,7 +177,13 @@ where T: PolarsNumericType, T::Native: Element, { - let ca: &ChunkedArray = df.get_columns().first().unwrap().unpack().unwrap(); + let ca: &ChunkedArray = df + .get_columns() + .first() + .unwrap() + .as_materialized_series() + .unpack() + .unwrap(); let first_slice = ca.data_views().next().unwrap(); let start_ptr = first_slice.as_ptr(); diff --git a/crates/polars-python/src/map/dataframe.rs b/crates/polars-python/src/map/dataframe.rs index 5be2216b0898..1fbfe5d9232a 100644 --- a/crates/polars-python/src/map/dataframe.rs +++ b/crates/polars-python/src/map/dataframe.rs @@ -10,12 +10,18 @@ use crate::PyDataFrame; /// Create iterators for all the Series in the DataFrame. fn get_iters(df: &DataFrame) -> Vec { - df.get_columns().iter().map(|s| s.iter()).collect() + df.get_columns() + .iter() + .map(|s| s.as_materialized_series().iter()) + .collect() } /// Create iterators for all the Series in the DataFrame, skipping the first `n` rows. fn get_iters_skip(df: &DataFrame, n: usize) -> Vec> { - df.get_columns().iter().map(|s| s.iter().skip(n)).collect() + df.get_columns() + .iter() + .map(|s| s.as_materialized_series().iter().skip(n)) + .collect() } // the return type is Union[PySeries, PyDataFrame] and a boolean indicating if it is a dataframe or not diff --git a/crates/polars-python/src/map/lazy.rs b/crates/polars-python/src/map/lazy.rs index f7edcbe3facb..c1a680056774 100644 --- a/crates/polars-python/src/map/lazy.rs +++ b/crates/polars-python/src/map/lazy.rs @@ -112,7 +112,7 @@ pub(crate) fn binary_lambda( .collect()?; let s = out.select_at_idx(0).unwrap().clone(); - PySeries::new(s) + PySeries::new(s.take_materialized_series()) } else { return Some(result_series_wrapper.to_series(py, &pypolars.into_py(py), "")) .transpose(); @@ -138,9 +138,9 @@ pub fn map_single( pyexpr.inner.clone().map_python(func, agg_list).into() } -pub(crate) fn call_lambda_with_series_slice( +pub(crate) fn call_lambda_with_columns_slice( py: Python, - s: &[Series], + s: &[Column], lambda: &PyObject, polars_module: &PyObject, ) -> PyObject { @@ -148,7 +148,7 @@ pub(crate) fn call_lambda_with_series_slice( // create a PySeries struct/object for Python let iter = s.iter().map(|s| { - let ps = PySeries::new(s.clone()); + let ps = PySeries::new(s.as_materialized_series().clone()); // Wrap this PySeries object in the python side Series wrapper let python_series_wrapper = pypolars.getattr("wrap_s").unwrap().call1((ps,)).unwrap(); @@ -176,17 +176,17 @@ pub fn map_mul( // do the import outside of the function to prevent import side effects in a hot loop. let pypolars = PyModule::import_bound(py, "polars").unwrap().to_object(py); - let function = move |s: &mut [Series]| { + let function = move |s: &mut [Column]| { Python::with_gil(|py| { // this is a python Series - let out = call_lambda_with_series_slice(py, s, &lambda, &pypolars); + let out = call_lambda_with_columns_slice(py, s, &lambda, &pypolars); // we return an error, because that will become a null value polars lazy apply list if map_groups && out.is_none(py) { return Ok(None); } - Ok(Some(out.to_series(py, &pypolars, "")?)) + Ok(Some(out.to_series(py, &pypolars, "")?.into_column())) }) }; diff --git a/crates/polars-python/src/map/mod.rs b/crates/polars-python/src/map/mod.rs index 8f6ed1518fe8..ef1bb4e34507 100644 --- a/crates/polars-python/src/map/mod.rs +++ b/crates/polars-python/src/map/mod.rs @@ -122,7 +122,7 @@ fn iterator_to_struct<'a>( .collect::>() }); - Ok(StructChunked::from_series(name, &fields) + Ok(StructChunked::from_series(name, fields.iter()) .unwrap() .into_series() .into()) diff --git a/crates/polars-python/src/on_startup.rs b/crates/polars-python/src/on_startup.rs index 3f08f71740b5..9b6f17d46f72 100644 --- a/crates/polars-python/src/on_startup.rs +++ b/crates/polars-python/src/on_startup.rs @@ -15,11 +15,11 @@ use crate::prelude::ObjectValue; use crate::py_modules::{POLARS, UTILS}; use crate::Wrap; -fn python_function_caller_series(s: Series, lambda: &PyObject) -> PolarsResult { +fn python_function_caller_series(s: Column, lambda: &PyObject) -> PolarsResult { Python::with_gil(|py| { - let object = call_lambda_with_series(py, s.clone(), lambda) + let object = call_lambda_with_series(py, s.clone().take_materialized_series(), lambda) .map_err(|s| ComputeError(format!("{}", s).into()))?; - object.to_series(py, &POLARS, s.name()) + object.to_series(py, &POLARS, s.name()).map(Column::from) }) } @@ -92,7 +92,7 @@ pub fn register_startup_deps() { let physical_dtype = ArrowDataType::FixedSizeBinary(object_size); registry::register_object_builder(object_builder, object_converter, physical_dtype); // register SERIES UDF - unsafe { python_udf::CALL_SERIES_UDF_PYTHON = Some(python_function_caller_series) } + unsafe { python_udf::CALL_COLUMNS_UDF_PYTHON = Some(python_function_caller_series) } // register DATAFRAME UDF unsafe { python_udf::CALL_DF_UDF_PYTHON = Some(python_function_caller_df) } // register warning function for `polars_warn!` diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs index 359f39df6291..e0563a9c327d 100644 --- a/crates/polars-python/src/series/general.rs +++ b/crates/polars-python/src/series/general.rs @@ -404,7 +404,7 @@ impl PySeries { df.pop() .map(|s| { - self.series = s; + self.series = s.take_materialized_series(); }) .ok_or_else(|| { PyPolarsErr::from(PolarsError::NoData( diff --git a/crates/polars-python/src/series/mod.rs b/crates/polars-python/src/series/mod.rs index 1b4542b06c5a..0c1ecbc40b1c 100644 --- a/crates/polars-python/src/series/mod.rs +++ b/crates/polars-python/src/series/mod.rs @@ -23,7 +23,7 @@ mod numpy_ufunc; #[cfg(feature = "pymethods")] mod scatter; -use polars::prelude::Series; +use polars::prelude::{Column, Series}; use pyo3::pyclass; #[pyclass] @@ -66,3 +66,14 @@ impl ToPySeries for Vec { unsafe { std::mem::transmute(self) } } } + +impl ToPySeries for Vec { + fn to_pyseries(self) -> Vec { + // @scalar-opt + let series: Vec = self + .into_iter() + .map(|c| c.take_materialized_series()) + .collect(); + series.to_pyseries() + } +} diff --git a/crates/polars-sql/src/context.rs b/crates/polars-sql/src/context.rs index 23ffb25070fa..c5d9f4a371b3 100644 --- a/crates/polars-sql/src/context.rs +++ b/crates/polars-sql/src/context.rs @@ -471,7 +471,8 @@ impl SQLContext { let plan = plan .split('\n') .collect::() - .with_name(PlSmallStr::from_static("Logical Plan")); + .with_name(PlSmallStr::from_static("Logical Plan")) + .into_column(); let df = DataFrame::new(vec![plan])?; Ok(df.lazy()) }, @@ -481,7 +482,7 @@ impl SQLContext { // SHOW TABLES fn execute_show_tables(&mut self, _: &Statement) -> PolarsResult { - let tables = Series::new("name".into(), self.get_tables()); + let tables = Column::new("name".into(), self.get_tables()); let df = DataFrame::new(vec![tables])?; Ok(df.lazy()) } @@ -1031,7 +1032,7 @@ impl SQLContext { "UNNEST table alias requires {} column name{}, found {}", column_values.len(), plural, column_names.len() ); } - let column_series: Vec = column_values + let column_series: Vec = column_values .into_iter() .zip(column_names) .map(|(s, name)| { @@ -1041,6 +1042,7 @@ impl SQLContext { s.clone() } }) + .map(Column::from) .collect(); let lf = DataFrame::new(column_series)?.lazy(); diff --git a/crates/polars-sql/tests/issues.rs b/crates/polars-sql/tests/issues.rs index 10ee22db49d3..7938266dc463 100644 --- a/crates/polars-sql/tests/issues.rs +++ b/crates/polars-sql/tests/issues.rs @@ -99,8 +99,6 @@ fn iss_7440() { #[test] #[cfg(feature = "csv")] fn iss_8395() -> PolarsResult<()> { - use polars_core::series::Series; - let mut context = SQLContext::new(); let sql = r#" with foods as ( @@ -113,7 +111,7 @@ fn iss_8395() -> PolarsResult<()> { // assert that the df only contains [vegetables, seafood] let s = df.column("category")?.unique()?.sort(Default::default())?; - let expected = Series::new("category".into(), &["seafood", "vegetables"]); + let expected = Column::new("category".into(), &["seafood", "vegetables"]); assert!(s.equals(&expected)); Ok(()) } diff --git a/crates/polars-sql/tests/simple_exprs.rs b/crates/polars-sql/tests/simple_exprs.rs index b84c6e681cd2..64b46c88656e 100644 --- a/crates/polars-sql/tests/simple_exprs.rs +++ b/crates/polars-sql/tests/simple_exprs.rs @@ -4,11 +4,11 @@ use polars_sql::*; use polars_time::Duration; fn create_sample_df() -> DataFrame { - let a = Series::new( + let a = Column::new( "a".into(), (1..10000i64).map(|i| i / 100).collect::>(), ); - let b = Series::new("b".into(), 1..10000i64); + let b = Column::new("b".into(), 1..10000i64); DataFrame::new(vec![a, b]).unwrap() } diff --git a/crates/polars-sql/tests/statements.rs b/crates/polars-sql/tests/statements.rs index 2657ec443077..c4af146eab9d 100644 --- a/crates/polars-sql/tests/statements.rs +++ b/crates/polars-sql/tests/statements.rs @@ -3,8 +3,8 @@ use polars_lazy::prelude::*; use polars_sql::*; fn create_ctx() -> SQLContext { - let a = Series::new("a".into(), (1..10i64).map(|i| i / 100).collect::>()); - let b = Series::new("b".into(), 1..10i64); + let a = Column::new("a".into(), (1..10i64).map(|i| i / 100).collect::>()); + let b = Column::new("b".into(), 1..10i64); let df = DataFrame::new(vec![a, b]).unwrap().lazy(); let mut ctx = SQLContext::new(); ctx.register("df", df); diff --git a/crates/polars-sql/tests/udf.rs b/crates/polars-sql/tests/udf.rs index 3ccd1c4d6395..d8d4eec83d9c 100644 --- a/crates/polars-sql/tests/udf.rs +++ b/crates/polars-sql/tests/udf.rs @@ -39,10 +39,10 @@ fn test_udfs() -> PolarsResult<()> { Field::new("b".into(), DataType::Int32), ], GetOutput::same_type(), - move |s: &mut [Series]| { - let first = s[0].clone(); - let second = s[1].clone(); - (first + second).map(Some) + move |c: &mut [Column]| { + let first = c[0].as_materialized_series().clone(); + let second = c[1].as_materialized_series().clone(); + (first + second).map(Column::from).map(Some) }, ); @@ -74,10 +74,10 @@ fn test_udfs() -> PolarsResult<()> { Field::new("b".into(), DataType::Int32), ], GetOutput::same_type(), - move |s: &mut [Series]| { - let first = s[0].clone(); - let second = s[1].clone(); - (first / second).map(Some) + move |c: &mut [Column]| { + let first = c[0].as_materialized_series().clone(); + let second = c[1].as_materialized_series().clone(); + (first / second).map(Column::from).map(Some) }, ); diff --git a/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs b/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs index e0944203cfe6..ae32dd38025c 100644 --- a/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs +++ b/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs @@ -2,9 +2,10 @@ use std::sync::Arc; use polars_core::frame::DataFrame; use polars_core::prelude::{ - ArrowField, ArrowSchema, BooleanChunked, ChunkFull, IdxCa, StringChunked, + AnyValue, ArrowField, ArrowSchema, BooleanChunked, Column, DataType, IdxCa, IntoColumn, }; -use polars_core::series::{IntoSeries, IsSorted, Series}; +use polars_core::scalar::Scalar; +use polars_core::series::{IsSorted, Series}; use polars_core::utils::arrow::bitmap::{Bitmap, MutableBitmap}; use polars_error::{polars_bail, PolarsResult}; use polars_io::predicates::PhysicalIoExpr; @@ -68,7 +69,7 @@ impl RowGroupDecoder { if self.row_index.is_some() { // Add a placeholder so that we don't have to shift the entire vec // later. - out_columns.push(Series::default()); + out_columns.push(Column::default()); } let slice_range = row_group_data @@ -137,25 +138,33 @@ impl RowGroupDecoder { let path_index = row_group_data.path_index; let hive_series = if let Some(hp) = self.hive_partitions.as_deref() { - let mut v = hp[path_index].materialize_partition_columns(); - for s in v.iter_mut() { - *s = s.new_from_index(0, row_group_data.file_max_row_group_height); - } - v + let v = hp[path_index].materialize_partition_columns(); + v.into_iter() + .map(|s| { + s.into_column() + .new_from_index(0, row_group_data.file_max_row_group_height) + }) + .collect() } else { vec![] }; + // @scalar-opt let file_path_series = self.include_file_paths.clone().map(|file_path_col| { - StringChunked::full( + Column::new_scalar( file_path_col, - self.scan_sources - .get(path_index) - .unwrap() - .to_include_path_name(), + Scalar::new( + DataType::String, + AnyValue::StringOwned( + self.scan_sources + .get(path_index) + .unwrap() + .to_include_path_name() + .into(), + ), + ), row_group_data.file_max_row_group_height, ) - .into_series() }); SharedFileState { @@ -169,7 +178,7 @@ impl RowGroupDecoder { &self, row_group_data: &RowGroupData, slice_range: core::ops::Range, - ) -> PolarsResult> { + ) -> PolarsResult> { if let Some(RowIndex { name, offset }) = self.row_index.as_ref() { let projection_height = row_group_data.row_group_metadata.num_rows(); @@ -197,7 +206,7 @@ impl RowGroupDecoder { ); ca.set_sorted_flag(IsSorted::Ascending); - Ok(Some(ca.into_series())) + Ok(Some(ca.into_column())) } else { Ok(None) } @@ -207,7 +216,7 @@ impl RowGroupDecoder { /// `out_vec`. async fn decode_all_columns( &self, - out_vec: &mut Vec, + out_vec: &mut Vec, row_group_data: &Arc, filter: Option, ) -> PolarsResult<()> { @@ -304,7 +313,7 @@ fn decode_column( arrow_field: &ArrowField, row_group_data: &RowGroupData, filter: Option, -) -> PolarsResult { +) -> PolarsResult { let columns_to_deserialize = row_group_data .row_group_metadata .columns_under_root_iter(&arrow_field.name) @@ -330,16 +339,16 @@ fn decode_column( // TODO: Also load in the metadata. - Ok(series) + Ok(series.into()) } /// # Safety /// All series in `cols` have the same length. async unsafe fn filter_cols( - mut cols: Vec, + mut cols: Vec, mask: &BooleanChunked, min_values_per_thread: usize, -) -> PolarsResult> { +) -> PolarsResult> { if cols.is_empty() { return Ok(cols); } @@ -417,8 +426,8 @@ fn calc_cols_per_thread( /// State shared across row groups for a single file. pub(super) struct SharedFileState { path_index: usize, - hive_series: Vec, - file_path_series: Option, + hive_series: Vec, + file_path_series: Option, } /// @@ -566,7 +575,7 @@ fn decode_column_prefiltered( prefilter_setting: &PrefilterMaskSetting, mask: &BooleanChunked, mask_bitmap: &Bitmap, -) -> PolarsResult { +) -> PolarsResult { let columns_to_deserialize = row_group_data .row_group_metadata .columns_under_root_iter(&arrow_field.name) @@ -593,12 +602,12 @@ fn decode_column_prefiltered( deserialize_filter, )?; - let series = Series::try_from((arrow_field, array))?; + let column = Series::try_from((arrow_field, array))?.into_column(); if !prefilter { - series.filter(mask) + column.filter(mask) } else { - Ok(series) + Ok(column) } } diff --git a/crates/polars-stream/src/nodes/reduce.rs b/crates/polars-stream/src/nodes/reduce.rs index f6de3bd1124a..15048daba4f8 100644 --- a/crates/polars-stream/src/nodes/reduce.rs +++ b/crates/polars-stream/src/nodes/reduce.rs @@ -120,7 +120,7 @@ impl ComputeNode for ReduceNode { .map(|(r, field)| { r.finalize().map(|scalar| { scalar - .into_series(field.name.clone()) + .into_column(field.name.clone()) .cast(&field.dtype) .unwrap() }) diff --git a/crates/polars-stream/src/nodes/select.rs b/crates/polars-stream/src/nodes/select.rs index 688580e10319..3b060e78e654 100644 --- a/crates/polars-stream/src/nodes/select.rs +++ b/crates/polars-stream/src/nodes/select.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use polars_core::prelude::IntoColumn; use polars_core::schema::Schema; use super::compute_node_prelude::*; @@ -52,7 +53,7 @@ impl ComputeNode for SelectNode { let mut selected = Vec::new(); for selector in slf.selectors.iter() { let s = selector.evaluate(&df, state).await?; - selected.push(s); + selected.push(s.into_column()); } let ret = if slf.extend_original { diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index af0e138ec30c..6c4126a9e779 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -2,7 +2,7 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use polars_core::frame::DataFrame; -use polars_core::prelude::{Field, InitHashMaps, PlHashMap, PlHashSet}; +use polars_core::prelude::{Column, Field, InitHashMaps, PlHashMap, PlHashSet}; use polars_core::schema::{Schema, SchemaExt}; use polars_error::PolarsResult; use polars_expr::planner::get_expr_depth_limit; @@ -238,7 +238,9 @@ fn build_input_independent_node_with_ctx( let phys_expr = create_physical_expr(expr, Context::Default, ctx.expr_arena, None, &mut state)?; - phys_expr.evaluate(&empty, &execution_state) + phys_expr + .evaluate(&empty, &execution_state) + .map(Column::from) }) .try_collect_vec()?; @@ -352,7 +354,7 @@ fn build_fallback_node_with_ctx( let exec_state = ExecutionState::new(); let columns = phys_exprs .iter() - .map(|phys_expr| phys_expr.evaluate(&df, &exec_state)) + .map(|phys_expr| phys_expr.evaluate(&df, &exec_state).map(Column::from)) .try_collect()?; DataFrame::new_with_broadcast(columns) }; diff --git a/crates/polars-time/src/group_by/dynamic.rs b/crates/polars-time/src/group_by/dynamic.rs index 9e428794365e..8a8d2312d580 100644 --- a/crates/polars-time/src/group_by/dynamic.rs +++ b/crates/polars-time/src/group_by/dynamic.rs @@ -84,31 +84,31 @@ const UP_NAME: &str = "_upper_boundary"; pub trait PolarsTemporalGroupby { fn rolling( &self, - group_by: Vec, + group_by: Vec, options: &RollingGroupOptions, - ) -> PolarsResult<(Series, Vec, GroupsProxy)>; + ) -> PolarsResult<(Column, Vec, GroupsProxy)>; fn group_by_dynamic( &self, - group_by: Vec, + group_by: Vec, options: &DynamicGroupOptions, - ) -> PolarsResult<(Series, Vec, GroupsProxy)>; + ) -> PolarsResult<(Column, Vec, GroupsProxy)>; } impl PolarsTemporalGroupby for DataFrame { fn rolling( &self, - group_by: Vec, + group_by: Vec, options: &RollingGroupOptions, - ) -> PolarsResult<(Series, Vec, GroupsProxy)> { + ) -> PolarsResult<(Column, Vec, GroupsProxy)> { Wrap(self).rolling(group_by, options) } fn group_by_dynamic( &self, - group_by: Vec, + group_by: Vec, options: &DynamicGroupOptions, - ) -> PolarsResult<(Series, Vec, GroupsProxy)> { + ) -> PolarsResult<(Column, Vec, GroupsProxy)> { Wrap(self).group_by_dynamic(group_by, options) } } @@ -116,9 +116,9 @@ impl PolarsTemporalGroupby for DataFrame { impl Wrap<&DataFrame> { fn rolling( &self, - group_by: Vec, + group_by: Vec, options: &RollingGroupOptions, - ) -> PolarsResult<(Series, Vec, GroupsProxy)> { + ) -> PolarsResult<(Column, Vec, GroupsProxy)> { polars_ensure!( !options.period.is_zero() && !options.period.negative, ComputeError: @@ -128,7 +128,7 @@ impl Wrap<&DataFrame> { if group_by.is_empty() { // If by is given, the column must be sorted in the 'by' arg, which we can not check now // this will be checked when the groups are materialized. - time.ensure_sorted_arg("rolling")?; + time.as_materialized_series().ensure_sorted_arg("rolling")?; } let time_type = time.dtype(); @@ -137,7 +137,7 @@ impl Wrap<&DataFrame> { ensure_duration_matches_dtype(options.offset, time_type, "offset")?; use DataType::*; - let (dt, tu, tz): (Series, TimeUnit, Option) = match time_type { + let (dt, tu, tz): (Column, TimeUnit, Option) = match time_type { Datetime(tu, tz) => (time.clone(), *tu, tz.clone()), Date => ( time.cast(&Datetime(TimeUnit::Milliseconds, None))?, @@ -190,14 +190,15 @@ impl Wrap<&DataFrame> { /// Returns: time_keys, keys, groupsproxy. fn group_by_dynamic( &self, - group_by: Vec, + group_by: Vec, options: &DynamicGroupOptions, - ) -> PolarsResult<(Series, Vec, GroupsProxy)> { + ) -> PolarsResult<(Column, Vec, GroupsProxy)> { let time = self.0.column(&options.index_column)?.rechunk(); if group_by.is_empty() { // If by is given, the column must be sorted in the 'by' arg, which we can not check now // this will be checked when the groups are materialized. - time.ensure_sorted_arg("group_by_dynamic")?; + time.as_materialized_series() + .ensure_sorted_arg("group_by_dynamic")?; } let time_type = time.dtype(); @@ -260,12 +261,12 @@ impl Wrap<&DataFrame> { fn impl_group_by_dynamic( &self, - mut dt: Series, - mut by: Vec, + mut dt: Column, + mut by: Vec, options: &DynamicGroupOptions, tu: TimeUnit, time_type: &DataType, - ) -> PolarsResult<(Series, Vec, GroupsProxy)> { + ) -> PolarsResult<(Column, Vec, GroupsProxy)> { polars_ensure!(!options.every.negative, ComputeError: "'every' argument must be positive"); if dt.is_empty() { return dt.cast(time_type).map(|s| (s, by, GroupsProxy::default())); @@ -501,12 +502,12 @@ impl Wrap<&DataFrame> { lower.set_sorted_flag(IsSorted::Ascending); upper.set_sorted_flag(IsSorted::Ascending); } - by.push(lower.into_datetime(tu, tz.clone()).into_series()); - by.push(upper.into_datetime(tu, tz.clone()).into_series()); + by.push(lower.into_datetime(tu, tz.clone()).into_column()); + by.push(upper.into_datetime(tu, tz.clone()).into_column()); } dt.into_datetime(tu, None) - .into_series() + .into_column() .cast(time_type) .map(|s| (s, by, groups)) } @@ -514,13 +515,13 @@ impl Wrap<&DataFrame> { /// Returns: time_keys, keys, groupsproxy fn impl_rolling( &self, - dt: Series, - group_by: Vec, + dt: Column, + group_by: Vec, options: &RollingGroupOptions, tu: TimeUnit, tz: Option, time_type: &DataType, - ) -> PolarsResult<(Series, Vec, GroupsProxy)> { + ) -> PolarsResult<(Column, Vec, GroupsProxy)> { let mut dt = dt.rechunk(); let groups = if group_by.is_empty() { @@ -691,9 +692,9 @@ mod test { None, &StringChunked::from_iter(std::iter::once("raise")), )? - .into_series(); + .into_column(); date.set_sorted_flag(IsSorted::Ascending); - let a = Series::new("a".into(), [3, 7, 5, 9, 2, 1]); + let a = Column::new("a".into(), [3, 7, 5, 9, 2, 1]); let df = DataFrame::new(vec![date, a.clone()])?; let (_, _, groups) = df @@ -709,7 +710,7 @@ mod test { .unwrap(); let sum = unsafe { a.agg_sum(&groups) }; - let expected = Series::new("".into(), [3, 10, 15, 24, 11, 1]); + let expected = Column::new("".into(), [3, 10, 15, 24, 11, 1]); assert_eq!(sum, expected); } @@ -737,10 +738,10 @@ mod test { None, &StringChunked::from_iter(std::iter::once("raise")), )? - .into_series(); + .into_column(); date.set_sorted_flag(IsSorted::Ascending); - let a = Series::new("a".into(), [3, 7, 5, 9, 2, 1]); + let a = Column::new("a".into(), [3, 7, 5, 9, 2, 1]); let df = DataFrame::new(vec![date, a.clone()])?; let (_, _, groups) = df @@ -760,7 +761,7 @@ mod test { [Some(3), Some(7), None, Some(9), Some(2), Some(1)], ); - let min = unsafe { a.agg_min(&groups) }; + let min = unsafe { a.as_materialized_series().agg_min(&groups) }; let expected = Series::new("".into(), [3, 3, 3, 3, 2, 1]); assert_eq!(min, expected); @@ -768,14 +769,14 @@ mod test { let min = unsafe { nulls.agg_min(&groups) }; assert_eq!(min, expected); - let max = unsafe { a.agg_max(&groups) }; + let max = unsafe { a.as_materialized_series().agg_max(&groups) }; let expected = Series::new("".into(), [3, 7, 7, 9, 9, 1]); assert_eq!(max, expected); let max = unsafe { nulls.agg_max(&groups) }; assert_eq!(max, expected); - let var = unsafe { a.agg_var(&groups, 1) }; + let var = unsafe { a.as_materialized_series().agg_var(&groups, 1) }; let expected = Series::new( "".into(), [0.0, 8.0, 4.000000000000002, 6.666666666666667, 24.5, 0.0], @@ -786,7 +787,10 @@ mod test { let expected = Series::new("".into(), [0.0, 8.0, 8.0, 9.333333333333343, 24.5, 0.0]); assert!(abs(&(var - expected)?).unwrap().lt(1e-12).unwrap().all()); - let quantile = unsafe { a.agg_quantile(&groups, 0.5, QuantileInterpolOptions::Linear) }; + let quantile = unsafe { + a.as_materialized_series() + .agg_quantile(&groups, 0.5, QuantileInterpolOptions::Linear) + }; let expected = Series::new("".into(), [3.0, 5.0, 5.0, 6.0, 5.5, 1.0]); assert_eq!(quantile, expected); @@ -820,9 +824,9 @@ mod test { TimeUnit::Milliseconds, None, )? - .into_series(); + .into_column(); - let groups = Series::new("groups".into(), ["a", "a", "a", "b", "b", "a", "a"]); + let groups = Column::new("groups".into(), ["a", "a", "a", "b", "b", "a", "a"]); let df = DataFrame::new(vec![range, groups.clone()]).unwrap(); let (time_key, mut keys, groups) = df @@ -874,7 +878,7 @@ mod test { TimeUnit::Milliseconds, None, )? - .into_series(); + .into_column(); assert_eq!(&upper, &range); let upper = out.column("_lower_boundary").unwrap().slice(0, 3); @@ -899,7 +903,7 @@ mod test { TimeUnit::Milliseconds, None, )? - .into_series(); + .into_column(); assert_eq!(&upper, &range); let expected = GroupsProxy::Idx( @@ -940,9 +944,9 @@ mod test { TimeUnit::Milliseconds, None, )? - .into_series(); + .into_column(); - let groups = Series::new("groups".into(), ["a", "a", "a", "b", "b", "a", "a"]); + let groups = Column::new("groups".into(), ["a", "a", "a", "b", "b", "a", "a"]); let df = DataFrame::new(vec![range, groups.clone()]).unwrap(); let (mut time_key, keys, _groups) = df diff --git a/crates/polars-time/src/upsample.rs b/crates/polars-time/src/upsample.rs index 37119317ccfa..47fef8180751 100644 --- a/crates/polars-time/src/upsample.rs +++ b/crates/polars-time/src/upsample.rs @@ -163,7 +163,7 @@ fn upsample_impl( Ok(out) } else if by.is_empty() { let index_column = source.column(index_column)?; - upsample_single_impl(source, index_column, every) + upsample_single_impl(source, index_column.as_materialized_series(), every) } else { let gb = if stable { source.group_by_stable(by) @@ -173,7 +173,7 @@ fn upsample_impl( // don't parallelize this, this may SO on large data. gb?.apply(|df| { let index_column = df.column(index_column)?; - upsample_single_impl(&df, index_column, every) + upsample_single_impl(&df, index_column.as_materialized_series(), every) }) } } diff --git a/crates/polars-utils/src/index.rs b/crates/polars-utils/src/index.rs index 1ca29d394727..f21ba1b39284 100644 --- a/crates/polars-utils/src/index.rs +++ b/crates/polars-utils/src/index.rs @@ -127,18 +127,11 @@ impl Indexable for &[T] { pub fn check_bounds(idx: &[IdxSize], len: IdxSize) -> PolarsResult<()> { // We iterate in large uninterrupted chunks to help auto-vectorization. - let mut in_bounds = true; - for chunk in idx.chunks(1024) { - for i in chunk { - if *i >= len { - in_bounds = false; - } - } - if !in_bounds { - break; - } - } - polars_ensure!(in_bounds, OutOfBounds: "indices are out of bounds"); + let Some(max_idx) = idx.iter().copied().max() else { + return Ok(()); + }; + + polars_ensure!(max_idx < len, OutOfBounds: "indices are out of bounds"); Ok(()) } diff --git a/crates/polars/src/docs/eager.rs b/crates/polars/src/docs/eager.rs index 95c759f836e7..6d3a6e90ea4c 100644 --- a/crates/polars/src/docs/eager.rs +++ b/crates/polars/src/docs/eager.rs @@ -72,6 +72,9 @@ //! // from a chunked-array //! let ca = UInt32Chunked::new("foo".into(), &[Some(1), None, Some(3)]); //! let s = ca.into_series(); +//! +//! // into a Column +//! let s = s.into_column(); //! ``` //! //! ### DataFrame @@ -88,10 +91,10 @@ //! "values_nulls" => [Some(1), None, Some(3)] //! ]?; //! -//! // from a Vec -//! let s1 = Series::new("names".into(), &["a", "b", "c"]); -//! let s2 = Series::new("values".into(), &[Some(1), None, Some(3)]); -//! let df = DataFrame::new(vec![s1, s2])?; +//! // from a Vec +//! let c1 = Column::new("names".into(), &["a", "b", "c"]); +//! let c2 = Column::new("values".into(), &[Some(1), None, Some(3)]); +//! let df = DataFrame::new(vec![c1, c2])?; //! # Ok(()) //! # } //! ``` @@ -256,7 +259,7 @@ //! //! // count string lengths //! let s = Series::new("foo".into(), &["foo", "bar", "foobar"]); -//! unary_elementwise_values(s.str()?, |str_val| str_val.len() as u64); +//! unary_elementwise_values::(s.str()?, |str_val| str_val.len() as u64); //! //! # Ok(()) //! # } @@ -354,9 +357,14 @@ //! // ordering of the columns //! let descending = vec![true, false]; //! // columns to sort by -//! let by = &["b", "a"]; +//! let by = [PlSmallStr::from_static("b"), PlSmallStr::from_static("a")]; //! // do the sort operation -//! let sorted = df.sort(by, descending, true)?; +//! let sorted = df.sort( +//! by, +//! SortMultipleOptions::default() +//! .with_order_descending_multi(descending) +//! .with_maintain_order(true) +//! )?; //! //! // sorted: //! @@ -442,7 +450,14 @@ //! )?; //! //! // group_by "foo" | pivot "bar" column | aggregate "N" -//! let pivoted = pivot::pivot(&df, ["foo"], ["bar"], ["N"], false, Some(first()), None); +//! let pivoted = pivot::pivot( +//! &df, +//! [PlSmallStr::from_static("foo")], +//! Some([PlSmallStr::from_static("bar")]), +//! Some([PlSmallStr::from_static("N")]), +//! false, Some(first()), +//! None +//! ); //! //! // pivoted: //! // +-----+------+------+------+------+------+ @@ -474,7 +489,10 @@ //! "D" => &[2, 4, 6] //! ]?; //! -//! let unpivoted = df.unpivot(&["A", "B"], &["C", "D"]).unwrap(); +//! let unpivoted = df.unpivot( +//! [PlSmallStr::from_static("A"), PlSmallStr::from_static("B")], +//! [PlSmallStr::from_static("C"), PlSmallStr::from_static("D")], +//! ).unwrap(); //! // unpivoted: //! //! // +-----+-----+----------+-------+ @@ -510,14 +528,14 @@ //! let s1 = Series::new("b".into(), &[1i64, 1, 1]); //! let s2 = Series::new("c".into(), &[2i64, 2, 2]); //! // construct a new ListChunked for a slice of Series. -//! let list = Series::new("foo", &[s0, s1, s2]); +//! let list = Column::new("foo".into(), &[s0, s1, s2]); //! //! // construct a few more Series. -//! let s0 = Series::new("B".into(), [1, 2, 3]); -//! let s1 = Series::new("C".into(), [1, 1, 1]); +//! let s0 = Column::new("B".into(), [1, 2, 3]); +//! let s1 = Column::new("C".into(), [1, 1, 1]); //! let df = DataFrame::new(vec![list, s0, s1])?; //! -//! let exploded = df.explode(["foo"])?; +//! let exploded = df.explode([PlSmallStr::from("foo")])?; //! // exploded: //! //! // +-----+-----+-----+ @@ -557,10 +575,8 @@ //! //! # fn example(df: &DataFrame) -> PolarsResult<()> { //! // read from path -//! let df = CsvReader::from_path("iris_csv")? -//! .infer_schema(None) -//! .has_header(true) -//! .finish()?; +//! let mut file = std::fs::File::open("iris_csv")?; +//! let df = CsvReader::new(file).finish()?; //! # Ok(()) //! # } //! ``` diff --git a/crates/polars/src/docs/lazy.rs b/crates/polars/src/docs/lazy.rs index c91367490130..c77bf58d5cac 100644 --- a/crates/polars/src/docs/lazy.rs +++ b/crates/polars/src/docs/lazy.rs @@ -30,7 +30,7 @@ //! //! // scan a csv file lazily //! let lf: LazyFrame = LazyCsvReader::new("some_path") -//! .has_header(true) +//! .with_has_header(true) //! .finish()?; //! //! // scan a parquet file lazily @@ -81,11 +81,8 @@ //! ]?; //! // sort this DataFrame by multiple columns //! -//! // ordering of the columns -//! let descending = vec![true, false]; -//! //! let sorted = df.lazy() -//! .sort_by_exprs(vec![col("b"), col("a")], descending, false, false) +//! .sort_by_exprs(vec![col("b"), col("a")], SortMultipleOptions::default()) //! .collect()?; //! //! // sorted: @@ -113,7 +110,7 @@ //! # fn example() -> PolarsResult<()> { //! //! let df = LazyCsvReader::new("reddit.csv") -//! .has_header(true) +//! .with_has_header(true) //! .with_separator(b',') //! .finish()? //! .group_by([col("comment_karma")]) diff --git a/crates/polars/src/lib.rs b/crates/polars/src/lib.rs index 9910df124fa5..5ecc28c94c34 100644 --- a/crates/polars/src/lib.rs +++ b/crates/polars/src/lib.rs @@ -20,7 +20,7 @@ //! .agg([ //! // expressions can be combined into powerful aggregations //! col("foo") -//! .sort_by([col("ham").rank(Default::default(), None)], [false]) +//! .sort_by([col("ham").rank(Default::default(), None)], SortMultipleOptions::default()) //! .last() //! .alias("last_foo_ranked_by_ham"), //! // every expression runs in parallel diff --git a/crates/polars/tests/it/core/date_like.rs b/crates/polars/tests/it/core/date_like.rs index 7777d3fd1eb0..0d08c6079539 100644 --- a/crates/polars/tests/it/core/date_like.rs +++ b/crates/polars/tests/it/core/date_like.rs @@ -4,7 +4,7 @@ use super::*; #[cfg(feature = "dtype-datetime")] #[cfg_attr(miri, ignore)] fn test_datelike_join() -> PolarsResult<()> { - let s = Series::new("foo".into(), &[1, 2, 3]); + let s = Column::new("foo".into(), &[1, 2, 3]); let mut s1 = s.cast(&DataType::Datetime(TimeUnit::Nanoseconds, None))?; s1.rename("bar".into()); diff --git a/crates/polars/tests/it/core/joins.rs b/crates/polars/tests/it/core/joins.rs index fe4ec8ba78cb..92f3d883f9dc 100644 --- a/crates/polars/tests/it/core/joins.rs +++ b/crates/polars/tests/it/core/joins.rs @@ -39,13 +39,13 @@ fn test_chunked_left_join() -> PolarsResult<()> { } fn create_frames() -> (DataFrame, DataFrame) { - let s0 = Series::new("days".into(), &[0, 1, 2]); - let s1 = Series::new("temp".into(), &[22.1, 19.9, 7.]); - let s2 = Series::new("rain".into(), &[0.2, 0.1, 0.3]); + let s0 = Column::new("days".into(), &[0, 1, 2]); + let s1 = Column::new("temp".into(), &[22.1, 19.9, 7.]); + let s2 = Column::new("rain".into(), &[0.2, 0.1, 0.3]); let temp = DataFrame::new(vec![s0, s1, s2]).unwrap(); - let s0 = Series::new("days".into(), &[1, 2, 3, 1]); - let s1 = Series::new("rain".into(), &[0.1, 0.2, 0.3, 0.4]); + let s0 = Column::new("days".into(), &[1, 2, 3, 1]); + let s1 = Column::new("rain".into(), &[0.1, 0.2, 0.3, 0.4]); let rain = DataFrame::new(vec![s0, s1]).unwrap(); (temp, rain) } @@ -59,10 +59,10 @@ fn test_inner_join() { std::env::set_var("POLARS_MAX_THREADS", format!("{}", i)); let joined = temp.inner_join(&rain, ["days"], ["days"]).unwrap(); - let join_col_days = Series::new("days".into(), &[1, 2, 1]); - let join_col_temp = Series::new("temp".into(), &[19.9, 7., 19.9]); - let join_col_rain = Series::new("rain".into(), &[0.1, 0.3, 0.1]); - let join_col_rain_right = Series::new("rain_right".into(), [0.1, 0.2, 0.4].as_ref()); + let join_col_days = Column::new("days".into(), &[1, 2, 1]); + let join_col_temp = Column::new("temp".into(), &[19.9, 7., 19.9]); + let join_col_rain = Column::new("rain".into(), &[0.1, 0.3, 0.1]); + let join_col_rain_right = Column::new("rain_right".into(), [0.1, 0.2, 0.4].as_ref()); let true_df = DataFrame::new(vec![ join_col_days, join_col_temp, @@ -81,31 +81,45 @@ fn test_inner_join() { fn test_left_join() { for i in 1..8 { std::env::set_var("POLARS_MAX_THREADS", format!("{}", i)); - let s0 = Series::new("days".into(), &[0, 1, 2, 3, 4]); - let s1 = Series::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); + let s0 = Column::new("days".into(), &[0, 1, 2, 3, 4]); + let s1 = Column::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); let temp = DataFrame::new(vec![s0, s1]).unwrap(); - let s0 = Series::new("days".into(), &[1, 2]); - let s1 = Series::new("rain".into(), &[0.1, 0.2]); + let s0 = Column::new("days".into(), &[1, 2]); + let s1 = Column::new("rain".into(), &[0.1, 0.2]); let rain = DataFrame::new(vec![s0, s1]).unwrap(); let joined = temp.left_join(&rain, ["days"], ["days"]).unwrap(); assert_eq!( - (joined.column("rain").unwrap().sum::().unwrap() * 10.).round(), + (joined + .column("rain") + .unwrap() + .as_materialized_series() + .sum::() + .unwrap() + * 10.) + .round(), 3. ); assert_eq!(joined.column("rain").unwrap().null_count(), 3); // test join on string - let s0 = Series::new("days".into(), &["mo", "tue", "wed", "thu", "fri"]); - let s1 = Series::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); + let s0 = Column::new("days".into(), &["mo", "tue", "wed", "thu", "fri"]); + let s1 = Column::new("temp".into(), &[22.1, 19.9, 7., 2., 3.]); let temp = DataFrame::new(vec![s0, s1]).unwrap(); - let s0 = Series::new("days".into(), &["tue", "wed"]); - let s1 = Series::new("rain".into(), &[0.1, 0.2]); + let s0 = Column::new("days".into(), &["tue", "wed"]); + let s1 = Column::new("rain".into(), &[0.1, 0.2]); let rain = DataFrame::new(vec![s0, s1]).unwrap(); let joined = temp.left_join(&rain, ["days"], ["days"]).unwrap(); assert_eq!( - (joined.column("rain").unwrap().sum::().unwrap() * 10.).round(), + (joined + .column("rain") + .unwrap() + .as_materialized_series() + .sum::() + .unwrap() + * 10.) + .round(), 3. ); assert_eq!(joined.column("rain").unwrap().null_count(), 3); @@ -123,7 +137,14 @@ fn test_full_outer_join() -> PolarsResult<()> { JoinArgs::new(JoinType::Full).with_coalesce(JoinCoalesce::CoalesceColumns), )?; assert_eq!(joined.height(), 5); - assert_eq!(joined.column("days")?.sum::().unwrap(), 7); + assert_eq!( + joined + .column("days")? + .as_materialized_series() + .sum::() + .unwrap(), + 7 + ); let df_left = df!( "a"=> ["a", "b", "a", "z"], @@ -153,15 +174,15 @@ fn test_join_with_nulls() { let dts = &[20, 21, 22, 23, 24, 25, 27, 28]; let vals = &[1.2, 2.4, 4.67, 5.8, 4.4, 3.6, 7.6, 6.5]; let df = DataFrame::new(vec![ - Series::new("date".into(), dts), - Series::new("val".into(), vals), + Column::new("date".into(), dts), + Column::new("val".into(), vals), ]) .unwrap(); let vals2 = &[Some(1.1), None, Some(3.3), None, None]; let df2 = DataFrame::new(vec![ - Series::new("date".into(), &dts[3..]), - Series::new("val2".into(), vals2), + Column::new("date".into(), &dts[3..]), + Column::new("val2".into(), vals2), ]) .unwrap(); @@ -338,14 +359,14 @@ fn test_join_categorical() { fn test_empty_df_join() -> PolarsResult<()> { let empty: Vec = vec![]; let empty_df = DataFrame::new(vec![ - Series::new("key".into(), &empty), - Series::new("eval".into(), &empty), + Column::new("key".into(), &empty), + Column::new("eval".into(), &empty), ]) .unwrap(); let df = DataFrame::new(vec![ - Series::new("key".into(), &["foo"]), - Series::new("aval".into(), &[4]), + Column::new("key".into(), &["foo"]), + Column::new("aval".into(), &[4]), ]) .unwrap(); @@ -361,8 +382,8 @@ fn test_empty_df_join() -> PolarsResult<()> { let empty: Vec = vec![]; let _empty_df = DataFrame::new(vec![ - Series::new("key".into(), &empty), - Series::new("eval".into(), &empty), + Column::new("key".into(), &empty), + Column::new("eval".into(), &empty), ]) .unwrap(); @@ -374,9 +395,9 @@ fn test_empty_df_join() -> PolarsResult<()> { // https://github.com/pola-rs/polars/issues/1824 let empty: Vec = vec![]; let empty_df = DataFrame::new(vec![ - Series::new("key".into(), &empty), - Series::new("1val".into(), &empty), - Series::new("2val".into(), &empty), + Column::new("key".into(), &empty), + Column::new("1val".into(), &empty), + Column::new("2val".into(), &empty), ])?; let out = df.left_join(&empty_df, ["key"], ["key"])?; @@ -610,7 +631,7 @@ fn test_4_threads_bit_offset() -> PolarsResult<()> { .collect::(); left_a.rename("a".into()); left_b.rename("b".into()); - let left_df = DataFrame::new(vec![left_a.into_series(), left_b.into_series()])?; + let left_df = DataFrame::new(vec![left_a.into_column(), left_b.into_column()])?; let i = 1; let len = 8; @@ -622,7 +643,7 @@ fn test_4_threads_bit_offset() -> PolarsResult<()> { right_a.rename("a".into()); right_b.rename("b".into()); - let right_df = DataFrame::new(vec![right_a.into_series(), right_b.into_series()])?; + let right_df = DataFrame::new(vec![right_a.into_column(), right_b.into_column()])?; let out = JoinBuilder::new(left_df.lazy()) .with(right_df.lazy()) .on([col("a"), col("b")]) diff --git a/crates/polars/tests/it/core/pivot.rs b/crates/polars/tests/it/core/pivot.rs index 85cf69ec1494..51367d2b7e42 100644 --- a/crates/polars/tests/it/core/pivot.rs +++ b/crates/polars/tests/it/core/pivot.rs @@ -56,9 +56,9 @@ fn test_pivot_date_() -> PolarsResult<()> { #[test] fn test_pivot_old() { - let s0 = Series::new("index".into(), ["A", "A", "B", "B", "C"].as_ref()); - let s2 = Series::new("columns".into(), ["k", "l", "m", "m", "l"].as_ref()); - let s1 = Series::new("values".into(), [1, 2, 2, 4, 2].as_ref()); + let s0 = Column::new("index".into(), ["A", "A", "B", "B", "C"].as_ref()); + let s2 = Column::new("columns".into(), ["k", "l", "m", "m", "l"].as_ref()); + let s1 = Column::new("values".into(), [1, 2, 2, 4, 2].as_ref()); let df = DataFrame::new(vec![s0, s1, s2]).unwrap(); let pvt = pivot( diff --git a/crates/polars/tests/it/io/csv.rs b/crates/polars/tests/it/io/csv.rs index 7c08998e69af..ebd7419c514e 100644 --- a/crates/polars/tests/it/io/csv.rs +++ b/crates/polars/tests/it/io/csv.rs @@ -44,15 +44,15 @@ fn write_csv() { fn write_dates() { use polars_core::export::chrono; - let s0 = Series::new( + let s0 = Column::new( "date".into(), [chrono::NaiveDate::from_yo_opt(2024, 33), None], ); - let s1 = Series::new( + let s1 = Column::new( "time".into(), [None, chrono::NaiveTime::from_hms_opt(19, 50, 0)], ); - let s2 = Series::new( + let s2 = Column::new( "datetime".into(), [ Some(chrono::NaiveDateTime::new( @@ -122,7 +122,7 @@ fn write_dates() { NonExistent::Raise, ) .unwrap() - .into_series(); + .into_column(); let mut with_timezone_df = DataFrame::new(vec![with_timezone]).unwrap(); buf.clear(); CsvWriter::new(&mut buf) @@ -150,7 +150,7 @@ fn test_read_csv_filter() -> PolarsResult<()> { .try_into_reader_with_file_path(Some(FOODS_CSV.into()))? .finish()?; - let out = df.filter(&df.column("fats_g")?.gt(4)?)?; + let out = df.filter(&df.column("fats_g")?.as_materialized_series().gt(4)?)?; // This fails if all columns are not equal. println!("{out}"); @@ -221,7 +221,14 @@ fn test_parser() -> PolarsResult<()> { assert_eq!(col.get(2)?, AnyValue::String("Setosa")); assert_eq!("sepal_length", df.get_columns()[0].name().as_str()); - assert_eq!(1, df.column("sepal_length").unwrap().chunks().len()); + assert_eq!( + 1, + df.column("sepal_length") + .unwrap() + .as_materialized_series() + .chunks() + .len() + ); assert_eq!(df.height(), 7); // test windows line endings @@ -309,15 +316,15 @@ fn test_missing_data() { assert!(df .column("column_1") .unwrap() - .equals(&Series::new("column_1".into(), &[1_i64, 1]))); + .equals(&Column::new("column_1".into(), &[1_i64, 1]))); assert!(df .column("column_2") .unwrap() - .equals_missing(&Series::new("column_2".into(), &[Some(2_i64), None]))); + .equals_missing(&Column::new("column_2".into(), &[Some(2_i64), None]))); assert!(df .column("column_3") .unwrap() - .equals(&Series::new("column_3".into(), &[3_i64, 3]))); + .equals(&Column::new("column_3".into(), &[3_i64, 3]))); } #[test] @@ -332,7 +339,7 @@ fn test_escape_comma() { assert!(df .column("column_3") .unwrap() - .equals(&Series::new("column_3".into(), &[11_i64, 12]))); + .equals(&Column::new("column_3".into(), &[11_i64, 12]))); } #[test] @@ -344,7 +351,7 @@ fn test_escape_double_quotes() { let file = Cursor::new(csv); let df = CsvReader::new(file).finish().unwrap(); assert_eq!(df.shape(), (2, 3)); - assert!(df.column("column_2").unwrap().equals(&Series::new( + assert!(df.column("column_2").unwrap().equals(&Column::new( "column_2".into(), &[ r#"with "double quotes" US"#, @@ -403,7 +410,7 @@ hello,","," ",world,"!" assert!(df .column(col) .unwrap() - .equals(&Series::new(col.into(), &[val; 4]))); + .equals(&Column::new(col.into(), &[val; 4]))); } } @@ -425,7 +432,7 @@ versions of Lorem Ipsum.",11 .finish() .unwrap(); - assert!(df.column("column_2").unwrap().equals(&Series::new( + assert!(df.column("column_2").unwrap().equals(&Column::new( "column_2".into(), &[ r#"Lorem Ipsum is simply dummy text of the printing and typesetting diff --git a/crates/polars/tests/it/io/ipc.rs b/crates/polars/tests/it/io/ipc.rs index 8a5602c86051..959886e33b72 100644 --- a/crates/polars/tests/it/io/ipc.rs +++ b/crates/polars/tests/it/io/ipc.rs @@ -24,8 +24,8 @@ fn test_ipc_compression_variadic_buffers() { #[cfg(test)] pub(crate) fn create_df() -> DataFrame { - let s0 = Series::new("days".into(), [0, 1, 2, 3, 4].as_ref()); - let s1 = Series::new("temp".into(), [22.1, 19.9, 7., 2., 3.].as_ref()); + let s0 = Column::new("days".into(), [0, 1, 2, 3, 4].as_ref()); + let s1 = Column::new("temp".into(), [22.1, 19.9, 7., 2., 3.].as_ref()); DataFrame::new(vec![s0, s1]).unwrap() } @@ -141,7 +141,7 @@ fn test_write_with_compression() { fn write_and_read_ipc_empty_series() { let mut buf: Cursor> = Cursor::new(Vec::new()); let chunked_array = Float64Chunked::new("empty".into(), &[0_f64; 0]); - let mut df = DataFrame::new(vec![chunked_array.into_series()]).unwrap(); + let mut df = DataFrame::new(vec![chunked_array.into_column()]).unwrap(); IpcWriter::new(&mut buf) .finish(&mut df) .expect("ipc writer"); diff --git a/crates/polars/tests/it/io/ipc_stream.rs b/crates/polars/tests/it/io/ipc_stream.rs index d12082d0dd71..770e0d88faec 100644 --- a/crates/polars/tests/it/io/ipc_stream.rs +++ b/crates/polars/tests/it/io/ipc_stream.rs @@ -146,7 +146,7 @@ mod test { fn write_and_read_ipc_stream_empty_series() { fn df() -> DataFrame { DataFrame::new(vec![ - Float64Chunked::new("empty".into(), &[0_f64; 0]).into_series() + Float64Chunked::new("empty".into(), &[0_f64; 0]).into_column() ]) .unwrap() } diff --git a/crates/polars/tests/it/io/mod.rs b/crates/polars/tests/it/io/mod.rs index 2fd9aab899d1..6ea615799996 100644 --- a/crates/polars/tests/it/io/mod.rs +++ b/crates/polars/tests/it/io/mod.rs @@ -17,7 +17,7 @@ mod ipc_stream; use polars::prelude::*; pub(crate) fn create_df() -> DataFrame { - let s0 = Series::new("days".into(), [0, 1, 2, 3, 4].as_ref()); - let s1 = Series::new("temp".into(), [22.1, 19.9, 7., 2., 3.].as_ref()); + let s0 = Column::new("days".into(), [0, 1, 2, 3, 4].as_ref()); + let s1 = Column::new("temp".into(), [22.1, 19.9, 7., 2., 3.].as_ref()); DataFrame::new(vec![s0, s1]).unwrap() } diff --git a/crates/polars/tests/it/joins.rs b/crates/polars/tests/it/joins.rs index 0fa0ba1c66a9..19e4911df3a9 100644 --- a/crates/polars/tests/it/joins.rs +++ b/crates/polars/tests/it/joins.rs @@ -36,14 +36,14 @@ fn join_nans_outer() -> PolarsResult<()> { #[test] #[cfg(feature = "lazy")] fn join_empty_datasets() -> PolarsResult<()> { - let a = DataFrame::new(Vec::from([Series::new_empty( + let a = DataFrame::new(Vec::from([Column::new_empty( "foo".into(), &DataType::Int64, )])) .unwrap(); let b = DataFrame::new(Vec::from([ - Series::new_empty("foo".into(), &DataType::Int64), - Series::new_empty("bar".into(), &DataType::Int64), + Column::new_empty("foo".into(), &DataType::Int64), + Column::new_empty("bar".into(), &DataType::Int64), ])) .unwrap(); diff --git a/crates/polars/tests/it/lazy/aggregation.rs b/crates/polars/tests/it/lazy/aggregation.rs index ad433e139775..85ded9c742d0 100644 --- a/crates/polars/tests/it/lazy/aggregation.rs +++ b/crates/polars/tests/it/lazy/aggregation.rs @@ -14,9 +14,9 @@ fn test_lazy_agg() { ], "%Y-%m-%d", ) - .into_series(); - let s1 = Series::new("temp".into(), [20, 10, 7, 9, 1].as_ref()); - let s2 = Series::new("rain".into(), [0.2, 0.1, 0.3, 0.1, 0.01].as_ref()); + .into_column(); + let s1 = Column::new("temp".into(), [20, 10, 7, 9, 1].as_ref()); + let s2 = Column::new("rain".into(), [0.2, 0.1, 0.3, 0.1, 0.01].as_ref()); let df = DataFrame::new(vec![s0, s1, s2]).unwrap(); let lf = df @@ -33,7 +33,7 @@ fn test_lazy_agg() { let new = lf.collect().unwrap(); let min = new.column("min").unwrap(); - assert_eq!(min, &Series::new("min".into(), [0.1f64, 0.01, 0.1])); + assert_eq!(min, &Column::new("min".into(), [0.1f64, 0.01, 0.1])); } #[test] diff --git a/crates/polars/tests/it/lazy/cwc.rs b/crates/polars/tests/it/lazy/cwc.rs index 2ad0ab11ede4..5be002410391 100644 --- a/crates/polars/tests/it/lazy/cwc.rs +++ b/crates/polars/tests/it/lazy/cwc.rs @@ -59,7 +59,7 @@ fn fuzz_cluster_with_columns() { let mut unused_cols: Vec = Vec::with_capacity(26); let mut used_cols: Vec = Vec::with_capacity(26); - let mut series: Vec = Vec::with_capacity(*NUM_ORIGINAL_COLS.end()); + let mut columns: Vec = Vec::with_capacity(*NUM_ORIGINAL_COLS.end()); let mut used: Vec = Vec::with_capacity(26); @@ -76,11 +76,11 @@ fn fuzz_cluster_with_columns() { let column = rng.gen_range(0..unused_cols.len()); let column = unused_cols.swap_remove(column); - series.push(Series::new(to_str!(column).into(), vec![rnd_prime(rng)])); + columns.push(Column::new(to_str!(column).into(), vec![rnd_prime(rng)])); used_cols.push(column); } - let mut lf = DataFrame::new(std::mem::take(&mut series)).unwrap().lazy(); + let mut lf = DataFrame::new(std::mem::take(&mut columns)).unwrap().lazy(); for _ in 0..num_with_columns { let num_exprs = rng.gen_range(0..8); diff --git a/crates/polars/tests/it/lazy/expressions/arity.rs b/crates/polars/tests/it/lazy/expressions/arity.rs index 52ac97c56e62..56bfa432f8ce 100644 --- a/crates/polars/tests/it/lazy/expressions/arity.rs +++ b/crates/polars/tests/it/lazy/expressions/arity.rs @@ -58,7 +58,7 @@ fn includes_null_predicate_3038() -> PolarsResult<()> { s.str()? .to_lowercase() .contains("not_exist", true) - .map(|ca| Some(ca.into_series())) + .map(|ca| Some(ca.into_column())) }, GetOutput::from_type(DataType::Boolean), )) @@ -88,7 +88,7 @@ fn includes_null_predicate_3038() -> PolarsResult<()> { s.str()? .to_lowercase() .contains_literal("non-existent") - .map(|ca| Some(ca.into_series())) + .map(|ca| Some(ca.into_column())) }, GetOutput::from_type(DataType::Boolean), )) diff --git a/crates/polars/tests/it/lazy/expressions/window.rs b/crates/polars/tests/it/lazy/expressions/window.rs index d617dd46574a..21d8a3d26bf7 100644 --- a/crates/polars/tests/it/lazy/expressions/window.rs +++ b/crates/polars/tests/it/lazy/expressions/window.rs @@ -217,7 +217,7 @@ fn test_window_mapping() -> PolarsResult<()> { .select([(lit(10) + col("A")).alias("foo").over([col("fruits")])]) .collect()?; - let expected = Series::new("foo".into(), [11, 12, 13, 14, 15]); + let expected = Column::new("foo".into(), [11, 12, 13, 14, 15]); assert!(out.column("foo")?.equals(&expected)); let out = df @@ -232,7 +232,7 @@ fn test_window_mapping() -> PolarsResult<()> { .over([col("fruits")]), ]) .collect()?; - let expected = Series::new("foo".into(), [11, 12, 8, 9, 15]); + let expected = Column::new("foo".into(), [11, 12, 8, 9, 15]); assert!(out.column("foo")?.equals(&expected)); let out = df @@ -247,7 +247,7 @@ fn test_window_mapping() -> PolarsResult<()> { .over([col("fruits")]), ]) .collect()?; - let expected = Series::new("foo".into(), [None, Some(3), None, Some(-1), Some(-1)]); + let expected = Column::new("foo".into(), [None, Some(3), None, Some(-1), Some(-1)]); assert!(out.column("foo")?.equals_missing(&expected)); // now sorted @@ -259,7 +259,7 @@ fn test_window_mapping() -> PolarsResult<()> { .lazy() .select([(lit(10) + col("A")).alias("foo").over([col("fruits")])]) .collect()?; - let expected = Series::new("foo".into(), [13, 14, 11, 12, 15]); + let expected = Column::new("foo".into(), [13, 14, 11, 12, 15]); assert!(out.column("foo")?.equals(&expected)); let out = df @@ -275,7 +275,7 @@ fn test_window_mapping() -> PolarsResult<()> { ]) .collect()?; - let expected = Series::new("foo".into(), [8, 9, 11, 12, 15]); + let expected = Column::new("foo".into(), [8, 9, 11, 12, 15]); assert!(out.column("foo")?.equals(&expected)); let out = df @@ -290,7 +290,7 @@ fn test_window_mapping() -> PolarsResult<()> { ]) .collect()?; - let expected = Series::new("foo".into(), [None, Some(-1), None, Some(3), Some(-1)]); + let expected = Column::new("foo".into(), [None, Some(-1), None, Some(3), Some(-1)]); assert!(out.column("foo")?.equals_missing(&expected)); Ok(()) @@ -381,7 +381,7 @@ fn test_window_naive_any() -> PolarsResult<()> { .collect()?; let res = df.column("res")?; - assert_eq!(res.sum::().unwrap(), 5); + assert_eq!(res.as_materialized_series().sum::().unwrap(), 5); Ok(()) } diff --git a/crates/polars/tests/it/lazy/exprs.rs b/crates/polars/tests/it/lazy/exprs.rs index 45d550ae85a1..84dfb7ade3cf 100644 --- a/crates/polars/tests/it/lazy/exprs.rs +++ b/crates/polars/tests/it/lazy/exprs.rs @@ -7,9 +7,9 @@ fn fuzz_exprs() { use rand::Rng; let lf = DataFrame::new(vec![ - Series::new("A".into(), vec![1, 2, 3, 4, 5]), - Series::new("B".into(), vec![Some(5), Some(4), None, Some(2), Some(1)]), - Series::new( + Column::new("A".into(), vec![1, 2, 3, 4, 5]), + Column::new("B".into(), vec![Some(5), Some(4), None, Some(2), Some(1)]), + Column::new( "C".into(), vec!["str", "", "a quite long string", "my", "string"], ), @@ -17,9 +17,9 @@ fn fuzz_exprs() { .unwrap() .lazy(); let empty = DataFrame::new(vec![ - Series::new("A".into(), Vec::::new()), - Series::new("B".into(), Vec::::new()), - Series::new("C".into(), Vec::<&str>::new()), + Column::new("A".into(), Vec::::new()), + Column::new("B".into(), Vec::::new()), + Column::new("C".into(), Vec::<&str>::new()), ]) .unwrap() .lazy(); diff --git a/crates/polars/tests/it/lazy/group_by.rs b/crates/polars/tests/it/lazy/group_by.rs index ac76e4921e40..cbae14aca5f0 100644 --- a/crates/polars/tests/it/lazy/group_by.rs +++ b/crates/polars/tests/it/lazy/group_by.rs @@ -79,7 +79,7 @@ fn test_filter_diff_arithmetic() -> PolarsResult<()> { let out = out.column("diff")?; assert_eq!( out, - &Series::new("diff".into(), &[None, Some(26), Some(6), None]) + &Column::new("diff".into(), &[None, Some(26), Some(6), None]) ); Ok(()) @@ -123,7 +123,7 @@ fn test_group_by_agg_list_with_not_aggregated() -> PolarsResult<()> { let out = out.explode()?; assert_eq!( out, - Series::new("value".into(), &[0, 2, 1, 3, 2, 2, 7, 2, 3, 1, 2, 1]) + Column::new("value".into(), &[0, 2, 1, 3, 2, 2, 7, 2, 3, 1, 2, 1]) ); Ok(()) } diff --git a/crates/polars/tests/it/lazy/queries.rs b/crates/polars/tests/it/lazy/queries.rs index 0be10b20f60e..f140a0461639 100644 --- a/crates/polars/tests/it/lazy/queries.rs +++ b/crates/polars/tests/it/lazy/queries.rs @@ -7,7 +7,7 @@ fn test_with_duplicate_column_empty_df() { let a = Int32Chunked::from_slice("a".into(), &[]); assert_eq!( - DataFrame::new(vec![a.into_series()]) + DataFrame::new(vec![a.into_column()]) .unwrap() .lazy() .with_columns([lit(true).alias("a")]) @@ -195,7 +195,7 @@ fn test_unknown_supertype_ignore() -> PolarsResult<()> { fn test_apply_multiple_columns() -> PolarsResult<()> { let df = fruits_cars(); - let multiply = |s: &mut [Series]| (&(&s[0] * &s[0])? * &s[1]).map(Some); + let multiply = |s: &mut [Column]| (&(&s[0] * &s[0])? * &s[1]).map(Some); let out = df .clone() @@ -234,14 +234,14 @@ fn test_apply_multiple_columns() -> PolarsResult<()> { #[test] fn test_group_by_on_lists() -> PolarsResult<()> { - let s0 = Series::new("".into(), [1i32, 2, 3]); - let s1 = Series::new("groups".into(), [4i32, 5]); + let s0 = Column::new("".into(), [1i32, 2, 3]); + let s1 = Column::new("groups".into(), [4i32, 5]); let mut builder = ListPrimitiveChunkedBuilder::::new("arrays".into(), 10, 10, DataType::Int32); - builder.append_series(&s0).unwrap(); - builder.append_series(&s1).unwrap(); - let s2 = builder.finish().into_series(); + builder.append_series(s0.as_materialized_series()).unwrap(); + builder.append_series(s1.as_materialized_series()).unwrap(); + let s2 = builder.finish().into_column(); let df = DataFrame::new(vec![s1, s2])?; let out = df diff --git a/docs/src/rust/user-guide/expressions/lists.rs b/docs/src/rust/user-guide/expressions/lists.rs index 9ce160cd58aa..fd097d98df7e 100644 --- a/docs/src/rust/user-guide/expressions/lists.rs +++ b/docs/src/rust/user-guide/expressions/lists.rs @@ -142,8 +142,8 @@ fn main() -> Result<(), Box> { col2.append_slice(&[1, 7, 3]); col2.append_slice(&[8, 1, 0]); let array_df = DataFrame::new(vec![ - col1.finish().into_series(), - col2.finish().into_series(), + col1.finish().into_column(), + col2.finish().into_column(), ])?; println!("{}", &array_df); diff --git a/docs/src/rust/user-guide/expressions/structs.rs b/docs/src/rust/user-guide/expressions/structs.rs index 25ed02daf827..cc6fff831d06 100644 --- a/docs/src/rust/user-guide/expressions/structs.rs +++ b/docs/src/rust/user-guide/expressions/structs.rs @@ -50,7 +50,7 @@ fn main() -> Result<(), Box> { // --8<-- [end:series_struct_extract] // --8<-- [start:series_struct_rename] - let out = DataFrame::new([rating_series].into())? + let out = DataFrame::new([rating_series.into_column()].into())? .lazy() .select([col("ratings") .struct_() @@ -130,7 +130,7 @@ fn main() -> Result<(), Box> { }) .collect(); - Ok(Some(out.into_series())) + Ok(Some(out.into_column())) }, GetOutput::from_type(DataType::Int32), ) diff --git a/docs/src/rust/user-guide/transformations/time-series/rolling.rs b/docs/src/rust/user-guide/transformations/time-series/rolling.rs index 19b57f2d0c33..4db0ea1be92a 100644 --- a/docs/src/rust/user-guide/transformations/time-series/rolling.rs +++ b/docs/src/rust/user-guide/transformations/time-series/rolling.rs @@ -93,7 +93,7 @@ fn main() -> Result<(), Box> { .into_iter() .map(|d| d.map(|v| v / 1000 / 24 / 60 / 60)) .collect::() - .into_series(), + .into_column(), )) }, GetOutput::from_type(DataType::Int64), diff --git a/docs/src/rust/user-guide/transformations/time-series/timezones.rs b/docs/src/rust/user-guide/transformations/time-series/timezones.rs index 489786cb844e..476a7a332b5c 100644 --- a/docs/src/rust/user-guide/transformations/time-series/timezones.rs +++ b/docs/src/rust/user-guide/transformations/time-series/timezones.rs @@ -5,7 +5,7 @@ use polars::prelude::*; fn main() -> Result<(), Box> { // --8<-- [start:example] let ts = ["2021-03-27 03:00", "2021-03-28 03:00"]; - let tz_naive = Series::new("tz_naive".into(), &ts); + let tz_naive = Column::new("tz_naive".into(), &ts); let time_zones_df = DataFrame::new(vec![tz_naive])? .lazy() .select([col("tz_naive").str().to_datetime( diff --git a/py-polars/tests/unit/dataframe/test_serde.py b/py-polars/tests/unit/dataframe/test_serde.py index 29d4eb5b05a6..71936c9eae81 100644 --- a/py-polars/tests/unit/dataframe/test_serde.py +++ b/py-polars/tests/unit/dataframe/test_serde.py @@ -65,6 +65,7 @@ def test_df_serialize_json() -> None: df = pl.DataFrame({"a": [1, 2, 3], "b": [9, 5, 6]}).sort("a") result = df.serialize(format="json") expected = '{"columns":[{"name":"a","datatype":"Int64","bit_settings":"SORTED_ASC","values":[1,2,3]},{"name":"b","datatype":"Int64","bit_settings":"","values":[9,5,6]}]}' + print(result) assert result == expected