From e797f70017094590d47324e36e7b20174728e8b1 Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Fri, 13 Sep 2024 11:08:09 +0200 Subject: [PATCH] scalar-opt StructArray::from_series --- crates/polars-core/src/chunked_array/cast.rs | 2 +- .../polars-core/src/chunked_array/ops/full.rs | 4 +- .../src/chunked_array/ops/sort/mod.rs | 2 +- .../polars-core/src/chunked_array/ops/zip.rs | 2 +- .../src/chunked_array/struct_/frame.rs | 4 +- .../src/chunked_array/struct_/mod.rs | 51 ++++++++-------- crates/polars-core/src/frame/row/av_buffer.rs | 2 +- crates/polars-core/src/serde/series.rs | 2 +- crates/polars-core/src/series/any_value.rs | 2 +- crates/polars-core/src/series/mod.rs | 2 +- crates/polars-core/src/series/ops/null.rs | 2 +- .../src/expressions/aggregation.rs | 2 +- .../src/chunked_array/array/to_struct.rs | 2 +- crates/polars-ops/src/chunked_array/hist.rs | 2 +- .../src/chunked_array/list/to_struct.rs | 2 +- .../src/chunked_array/strings/extract.rs | 2 +- .../src/chunked_array/strings/json_path.rs | 4 +- .../src/chunked_array/strings/split.rs | 2 +- .../polars-ops/src/frame/join/merge_sorted.rs | 2 +- crates/polars-ops/src/frame/pivot/mod.rs | 12 +--- .../polars-ops/src/frame/pivot/positioning.rs | 7 +-- crates/polars-ops/src/series/ops/cut.rs | 4 +- .../src/dsl/function_expr/struct_.rs | 8 +-- crates/polars-plan/src/dsl/name.rs | 2 +- crates/polars-python/src/map/mod.rs | 2 +- .../nodes/parquet_source/row_group_decode.rs | 60 +++++++++++-------- 26 files changed, 92 insertions(+), 96 deletions(-) diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 53f6e85f221d..ea758742169e 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -125,7 +125,7 @@ fn cast_single_to_struct( new_fields.push(Series::full_null(fld.name.clone(), length, &fld.dtype)); } - StructChunked::from_series(name, &new_fields).map(|ca| ca.into_series()) + StructChunked::from_series(name, new_fields.iter()).map(|ca| ca.into_series()) } impl ChunkedArray diff --git a/crates/polars-core/src/chunked_array/ops/full.rs b/crates/polars-core/src/chunked_array/ops/full.rs index 3f797d588e47..ee307cc3ca8e 100644 --- a/crates/polars-core/src/chunked_array/ops/full.rs +++ b/crates/polars-core/src/chunked_array/ops/full.rs @@ -192,8 +192,8 @@ impl ListChunked { #[cfg(feature = "dtype-struct")] impl ChunkFullNull for StructChunked { fn full_null(name: PlSmallStr, length: usize) -> StructChunked { - let s = vec![Series::new_null(PlSmallStr::EMPTY, length)]; - StructChunked::from_series(name, &s) + let s = [Series::new_null(PlSmallStr::EMPTY, length)]; + StructChunked::from_series(name, s.iter()) .unwrap() .with_outer_validity(Some(Bitmap::new_zeroed(length))) } diff --git a/crates/polars-core/src/chunked_array/ops/sort/mod.rs b/crates/polars-core/src/chunked_array/ops/sort/mod.rs index cfe30bb59c7d..0aa70dae1c83 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/mod.rs @@ -724,7 +724,7 @@ pub(crate) fn convert_sort_column_multi_sort(s: &Series) -> PolarsResult .iter() .map(convert_sort_column_multi_sort) .collect::>>()?; - let mut out = StructChunked::from_series(ca.name().clone(), &new_fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), new_fields.iter())?; out.zip_outer_validity(ca); out.into_series() }, diff --git a/crates/polars-core/src/chunked_array/ops/zip.rs b/crates/polars-core/src/chunked_array/ops/zip.rs index eb24468d892d..7fe09ba2c7d1 100644 --- a/crates/polars-core/src/chunked_array/ops/zip.rs +++ b/crates/polars-core/src/chunked_array/ops/zip.rs @@ -237,7 +237,7 @@ impl ChunkZip for StructChunked { .map(|(lhs, rhs)| lhs.zip_with_same_type(&mask, &rhs)) .collect::>>()?; - let mut out = StructChunked::from_series(self.name().clone(), &fields)?; + let mut out = StructChunked::from_series(self.name().clone(), fields.iter())?; // Zip the validities. if (l.null_count + r.null_count) > 0 { diff --git a/crates/polars-core/src/chunked_array/struct_/frame.rs b/crates/polars-core/src/chunked_array/struct_/frame.rs index 92e46ac8635a..83f0f1299667 100644 --- a/crates/polars-core/src/chunked_array/struct_/frame.rs +++ b/crates/polars-core/src/chunked_array/struct_/frame.rs @@ -5,8 +5,6 @@ use crate::prelude::StructChunked; impl DataFrame { pub fn into_struct(self, name: PlSmallStr) -> StructChunked { - // @scalar-opt - let series = self.materialized_column_iter().cloned().collect::>(); - StructChunked::from_series(name, &series).expect("same invariants") + StructChunked::from_columns(name, &self.columns).expect("same invariants") } } diff --git a/crates/polars-core/src/chunked_array/struct_/mod.rs b/crates/polars-core/src/chunked_array/struct_/mod.rs index e635be7f8f13..0c4eb50ddc58 100644 --- a/crates/polars-core/src/chunked_array/struct_/mod.rs +++ b/crates/polars-core/src/chunked_array/struct_/mod.rs @@ -18,21 +18,24 @@ use crate::utils::Container; pub type StructChunked = ChunkedArray; -fn constructor(name: PlSmallStr, fields: &[Series]) -> PolarsResult { +fn constructor<'a, I: ExactSizeIterator + Clone>( + name: PlSmallStr, + fields: I, +) -> PolarsResult { // Different chunk lengths: rechunk and recurse. - if !fields.iter().map(|s| s.n_chunks()).all_equal() { - let fields = fields.iter().map(|s| s.rechunk()).collect::>(); - return constructor(name, &fields); + if !fields.clone().map(|s| s.n_chunks()).all_equal() { + let fields = fields.map(|s| s.rechunk()).collect::>(); + return constructor(name, fields.iter()); } - let n_chunks = fields[0].n_chunks(); - let dtype = DataType::Struct(fields.iter().map(|s| s.field().into_owned()).collect()); + let n_chunks = fields.clone().next().unwrap().n_chunks(); + let dtype = DataType::Struct(fields.clone().map(|s| s.field().into_owned()).collect()); let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest()); let chunks = (0..n_chunks) .map(|c_i| { let fields = fields - .iter() + .clone() .map(|field| field.chunks()[c_i].clone()) .collect::>(); @@ -55,30 +58,28 @@ fn constructor(name: PlSmallStr, fields: &[Series]) -> PolarsResult { - let fields = fields.iter().map(|s| s.rechunk()).collect::>(); - constructor(name, &fields) + let fields = fields.map(|s| s.rechunk()).collect::>(); + constructor(name, fields.iter()) }, } } impl StructChunked { pub fn from_columns(name: PlSmallStr, fields: &[Column]) -> PolarsResult { - // @scalar-opt! - let series = fields - .iter() - .map(|c| c.as_materialized_series().clone()) - .collect::>(); - Self::from_series(name, &series) + Self::from_series(name, fields.iter().map(|c| c.as_materialized_series())) } - pub fn from_series(name: PlSmallStr, fields: &[Series]) -> PolarsResult { + pub fn from_series<'a, I: ExactSizeIterator + Clone>( + name: PlSmallStr, + fields: I, + ) -> PolarsResult { let mut names = PlHashSet::with_capacity(fields.len()); - let first_len = fields.first().map(|s| s.len()).unwrap_or(0); + let first_len = fields.clone().next().map(|s| s.len()).unwrap_or(0); let mut max_len = first_len; let mut all_equal_len = true; let mut is_empty = false; - for s in fields { + for s in fields.clone() { let s_len = s.len(); max_len = std::cmp::max(max_len, s_len); @@ -117,10 +118,10 @@ impl StructChunked { ); } } - constructor(name, &new_fields) - } else if fields.is_empty() { - let fields = &[Series::new_null(PlSmallStr::EMPTY, 0)]; - constructor(name, fields) + constructor(name, new_fields.iter()) + } else if fields.len() == 0 { + let fields = [Series::new_null(PlSmallStr::EMPTY, 0)]; + constructor(name, fields.iter()) } else { constructor(name, fields) } @@ -184,7 +185,7 @@ impl StructChunked { }) .collect::>>()?; - let mut out = Self::from_series(self.name().clone(), &new_fields)?; + let mut out = Self::from_series(self.name().clone(), new_fields.iter())?; if self.null_count > 0 { out.zip_outer_validity(self); } @@ -240,7 +241,7 @@ impl StructChunked { } }) .collect::>>()?; - let mut out = Self::from_series(self.name().clone(), &fields)?; + let mut out = Self::from_series(self.name().clone(), fields.iter())?; if self.null_count > 0 { out.zip_outer_validity(self); } @@ -285,7 +286,7 @@ impl StructChunked { .iter() .map(func) .collect::>>()?; - Self::from_series(self.name().clone(), &fields).map(|mut ca| { + Self::from_series(self.name().clone(), fields.iter()).map(|mut ca| { if self.null_count > 0 { // SAFETY: we don't change types/ lengths. unsafe { diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 608d6ec820af..f46332021ef1 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -624,7 +624,7 @@ impl<'a> AnyValueBufferTrusted<'a> { s }) .collect::>(); - StructChunked::from_series(PlSmallStr::EMPTY, &v) + StructChunked::from_series(PlSmallStr::EMPTY, v.iter()) .unwrap() .into_series() }, diff --git a/crates/polars-core/src/serde/series.rs b/crates/polars-core/src/serde/series.rs index 3506a0e9cc89..0ef07e702374 100644 --- a/crates/polars-core/src/serde/series.rs +++ b/crates/polars-core/src/serde/series.rs @@ -277,7 +277,7 @@ impl<'de> Deserialize<'de> for Series { #[cfg(feature = "dtype-struct")] DataType::Struct(_) => { let values: Vec = map.next_value()?; - let ca = StructChunked::from_series(name.clone(), &values).unwrap(); + let ca = StructChunked::from_series(name.clone(), values.iter()).unwrap(); let mut s = ca.into_series(); s.rename(name); Ok(s) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index aaa4bc753443..30fba0a9cb14 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -743,7 +743,7 @@ fn any_values_to_struct( series_fields.push(s) } - let mut out = StructChunked::from_series(PlSmallStr::EMPTY, &series_fields)?; + let mut out = StructChunked::from_series(PlSmallStr::EMPTY, series_fields.iter())?; if has_outer_validity { let mut validity = MutableBitmap::new(); validity.extend_constant(values.len(), true); diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index cb4a9bb84030..bd46b4c6a3d8 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -630,7 +630,7 @@ impl Series { .iter() .map(|s| s.to_physical_repr().into_owned()) .collect(); - let mut ca = StructChunked::from_series(self.name().clone(), &fields).unwrap(); + let mut ca = StructChunked::from_series(self.name().clone(), fields.iter()).unwrap(); if arr.null_count() > 0 { ca.zip_outer_validity(arr); diff --git a/crates/polars-core/src/series/ops/null.rs b/crates/polars-core/src/series/ops/null.rs index ee33c309687e..edff23e5d31f 100644 --- a/crates/polars-core/src/series/ops/null.rs +++ b/crates/polars-core/src/series/ops/null.rs @@ -55,7 +55,7 @@ impl Series { .iter() .map(|fld| Series::full_null(fld.name().clone(), size, fld.dtype())) .collect::>(); - let ca = StructChunked::from_series(name, &fields).unwrap(); + let ca = StructChunked::from_series(name, fields.iter()).unwrap(); if !fields.is_empty() { ca.with_outer_validity(Some(Bitmap::new_zeroed(size))) diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 297c77b19e00..8e2563e526e0 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -502,7 +502,7 @@ impl PartitionedAggregation for AggregationExpr { }; let mut count_s = series.agg_valid_count(groups); count_s.rename(PlSmallStr::from_static("__POLARS_COUNT")); - Ok(StructChunked::from_series(new_name, &[agg_s, count_s]) + Ok(StructChunked::from_series(new_name, [agg_s, count_s].iter()) .unwrap() .into_series()) } diff --git a/crates/polars-ops/src/chunked_array/array/to_struct.rs b/crates/polars-ops/src/chunked_array/array/to_struct.rs index b79a9ffcfe9f..b00dbbf4d43b 100644 --- a/crates/polars-ops/src/chunked_array/array/to_struct.rs +++ b/crates/polars-ops/src/chunked_array/array/to_struct.rs @@ -40,7 +40,7 @@ pub trait ToStruct: AsArray { .collect::>>() })?; - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) } } diff --git a/crates/polars-ops/src/chunked_array/hist.rs b/crates/polars-ops/src/chunked_array/hist.rs index 8d7781745531..ca906d12851c 100644 --- a/crates/polars-ops/src/chunked_array/hist.rs +++ b/crates/polars-ops/src/chunked_array/hist.rs @@ -136,7 +136,7 @@ where let out = fields.pop().unwrap(); out.with_name(ca.name().clone()) } else { - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) .unwrap() .into_series() } diff --git a/crates/polars-ops/src/chunked_array/list/to_struct.rs b/crates/polars-ops/src/chunked_array/list/to_struct.rs index 73798163ed48..cdd245bce8b7 100644 --- a/crates/polars-ops/src/chunked_array/list/to_struct.rs +++ b/crates/polars-ops/src/chunked_array/list/to_struct.rs @@ -80,7 +80,7 @@ pub trait ToStruct: AsList { .collect::>>() })?; - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) } } diff --git a/crates/polars-ops/src/chunked_array/strings/extract.rs b/crates/polars-ops/src/chunked_array/strings/extract.rs index 35f38e40d61d..cb26d66f7aff 100644 --- a/crates/polars-ops/src/chunked_array/strings/extract.rs +++ b/crates/polars-ops/src/chunked_array/strings/extract.rs @@ -50,7 +50,7 @@ pub(super) fn extract_groups( if n_fields == 1 { return StructChunked::from_series( ca.name().clone(), - &[Series::new_null(ca.name().clone(), ca.len())], + [Series::new_null(ca.name().clone(), ca.len())].iter(), ) .map(|ca| ca.into_series()); } diff --git a/crates/polars-ops/src/chunked_array/strings/json_path.rs b/crates/polars-ops/src/chunked_array/strings/json_path.rs index a25ce1937332..7aa77ca23e86 100644 --- a/crates/polars-ops/src/chunked_array/strings/json_path.rs +++ b/crates/polars-ops/src/chunked_array/strings/json_path.rs @@ -204,10 +204,10 @@ mod tests { let expected_series = StructChunked::from_series( "".into(), - &[ + [ Series::new("a".into(), &[None, Some(1), Some(2), None]), Series::new("b".into(), &[None, Some("hello"), Some("goodbye"), None]), - ], + ].iter(), ) .unwrap() .with_outer_validity_chunked(BooleanChunked::new("".into(), [false, true, true, false])) diff --git a/crates/polars-ops/src/chunked_array/strings/split.rs b/crates/polars-ops/src/chunked_array/strings/split.rs index d86e0efac2ae..31c15a70cb08 100644 --- a/crates/polars-ops/src/chunked_array/strings/split.rs +++ b/crates/polars-ops/src/chunked_array/strings/split.rs @@ -149,7 +149,7 @@ where }) .collect::>(); - StructChunked::from_series(ca.name().clone(), &fields) + StructChunked::from_series(ca.name().clone(), fields.iter()) } pub fn split_helper<'a, F, I>(ca: &'a StringChunked, by: &'a StringChunked, op: F) -> ListChunked diff --git a/crates/polars-ops/src/frame/join/merge_sorted.rs b/crates/polars-ops/src/frame/join/merge_sorted.rs index 8ab303fae2c1..a180b293ca0f 100644 --- a/crates/polars-ops/src/frame/join/merge_sorted.rs +++ b/crates/polars-ops/src/frame/join/merge_sorted.rs @@ -85,7 +85,7 @@ fn merge_series(lhs: &Series, rhs: &Series, merge_indicator: &[bool]) -> PolarsR .zip(rhs.fields_as_series()) .map(|(lhs, rhs)| merge_series(lhs, &rhs, merge_indicator)) .collect::>>()?; - StructChunked::from_series(PlSmallStr::EMPTY, &new_fields) + StructChunked::from_series(PlSmallStr::EMPTY, new_fields.iter()) .unwrap() .into_series() }, diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index d681b5db8a90..15753a7c49a7 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -233,15 +233,9 @@ fn pivot_impl( already exists in the DataFrame. Please rename it prior to calling `pivot`.") } // @scalar-opt - let columns_struct = StructChunked::from_series( - column.clone(), - &fields - .iter() - .map(|c| c.as_materialized_series().clone()) - .collect::>(), - ) - .unwrap() - .into_series(); + let columns_struct = StructChunked::from_columns(column.clone(), fields) + .unwrap() + .into_series(); let mut binding = pivot_df.clone(); let pivot_df = unsafe { binding.with_column_unchecked(columns_struct) }; pivot_impl_single_column( diff --git a/crates/polars-ops/src/frame/pivot/positioning.rs b/crates/polars-ops/src/frame/pivot/positioning.rs index 7b19872a1bc3..0e0de1083c5b 100644 --- a/crates/polars-ops/src/frame/pivot/positioning.rs +++ b/crates/polars-ops/src/frame/pivot/positioning.rs @@ -484,14 +484,9 @@ pub(super) fn compute_row_idx( } } else { let binding = pivot_df.select(index.iter().cloned())?; - // @scalar-opt let fields = binding.get_columns(); - let fields = fields - .iter() - .map(|c| c.as_materialized_series().clone()) - .collect::>(); let index_struct_series = - StructChunked::from_series(PlSmallStr::from_static("placeholder"), &fields)? + StructChunked::from_columns(PlSmallStr::from_static("placeholder"), fields)? .into_series(); let index_agg = unsafe { index_struct_series.agg_first(groups) }; let index_agg_physical = index_agg.to_physical_repr(); diff --git a/crates/polars-ops/src/series/ops/cut.rs b/crates/polars-ops/src/series/ops/cut.rs index cba643cf98e9..52cc2ee5a67a 100644 --- a/crates/polars-ops/src/series/ops/cut.rs +++ b/crates/polars-ops/src/series/ops/cut.rs @@ -57,13 +57,13 @@ fn map_cats( }, }); - let outvals = vec![ + let outvals = [ brk_vals.finish().into_series(), bld.finish() ._with_fast_unique(label_has_value.iter().all(bool::clone)) .into_series(), ]; - Ok(StructChunked::from_series(out_name, &outvals)?.into_series()) + Ok(StructChunked::from_series(out_name, outvals.iter())?.into_series()) } else { Ok(bld .drain_iter_and_finish(s_iter.map(|opt| { diff --git a/crates/polars-plan/src/dsl/function_expr/struct_.rs b/crates/polars-plan/src/dsl/function_expr/struct_.rs index 3c72138e6241..acc8020b8e7e 100644 --- a/crates/polars-plan/src/dsl/function_expr/struct_.rs +++ b/crates/polars-plan/src/dsl/function_expr/struct_.rs @@ -176,7 +176,7 @@ pub(super) fn rename_fields(s: &Column, names: Arc<[PlSmallStr]>) -> PolarsResul s }) .collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?; out.zip_outer_validity(ca); Ok(out.into_column()) } @@ -193,7 +193,7 @@ pub(super) fn prefix_fields(s: &Column, prefix: &str) -> PolarsResult { s }) .collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?; out.zip_outer_validity(ca); Ok(out.into_column()) } @@ -210,7 +210,7 @@ pub(super) fn suffix_fields(s: &Column, suffix: &str) -> PolarsResult { s }) .collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?; out.zip_outer_validity(ca); Ok(out.into_column()) } @@ -245,7 +245,7 @@ pub(super) fn with_fields(args: &[Column]) -> PolarsResult { } let new_fields = fields.into_values().cloned().collect::>(); - let mut out = StructChunked::from_series(ca.name().clone(), &new_fields)?; + let mut out = StructChunked::from_series(ca.name().clone(), new_fields.iter())?; out.zip_outer_validity(ca); Ok(out.into_column()) } diff --git a/crates/polars-plan/src/dsl/name.rs b/crates/polars-plan/src/dsl/name.rs index 1df62a767721..1261b4430bec 100644 --- a/crates/polars-plan/src/dsl/name.rs +++ b/crates/polars-plan/src/dsl/name.rs @@ -76,7 +76,7 @@ impl ExprNameNameSpace { fd }) .collect::>(); - let mut out = StructChunked::from_series(s.name().clone(), &fields)?; + let mut out = StructChunked::from_series(s.name().clone(), fields.iter())?; out.zip_outer_validity(s); Ok(Some(out.into_column())) }, diff --git a/crates/polars-python/src/map/mod.rs b/crates/polars-python/src/map/mod.rs index 8f6ed1518fe8..ef1bb4e34507 100644 --- a/crates/polars-python/src/map/mod.rs +++ b/crates/polars-python/src/map/mod.rs @@ -122,7 +122,7 @@ fn iterator_to_struct<'a>( .collect::>() }); - Ok(StructChunked::from_series(name, &fields) + Ok(StructChunked::from_series(name, fields.iter()) .unwrap() .into_series() .into()) diff --git a/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs b/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs index 028595c0bfa9..ae32dd38025c 100644 --- a/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs +++ b/crates/polars-stream/src/nodes/parquet_source/row_group_decode.rs @@ -2,10 +2,10 @@ use std::sync::Arc; use polars_core::frame::DataFrame; use polars_core::prelude::{ - ArrowField, ArrowSchema, BooleanChunked, ChunkFull, IdxCa, StringChunked, - IntColumn, Column, + AnyValue, ArrowField, ArrowSchema, BooleanChunked, Column, DataType, IdxCa, IntoColumn, }; -use polars_core::series::{IntoSeries, IsSorted, Series}; +use polars_core::scalar::Scalar; +use polars_core::series::{IsSorted, Series}; use polars_core::utils::arrow::bitmap::{Bitmap, MutableBitmap}; use polars_error::{polars_bail, PolarsResult}; use polars_io::predicates::PhysicalIoExpr; @@ -138,25 +138,33 @@ impl RowGroupDecoder { let path_index = row_group_data.path_index; let hive_series = if let Some(hp) = self.hive_partitions.as_deref() { - let mut v = hp[path_index].materialize_partition_columns(); - for s in v.iter_mut() { - *s = s.new_from_index(0, row_group_data.file_max_row_group_height); - } - v + let v = hp[path_index].materialize_partition_columns(); + v.into_iter() + .map(|s| { + s.into_column() + .new_from_index(0, row_group_data.file_max_row_group_height) + }) + .collect() } else { vec![] }; + // @scalar-opt let file_path_series = self.include_file_paths.clone().map(|file_path_col| { - StringChunked::full( + Column::new_scalar( file_path_col, - self.scan_sources - .get(path_index) - .unwrap() - .to_include_path_name(), + Scalar::new( + DataType::String, + AnyValue::StringOwned( + self.scan_sources + .get(path_index) + .unwrap() + .to_include_path_name() + .into(), + ), + ), row_group_data.file_max_row_group_height, ) - .into_series() }); SharedFileState { @@ -170,7 +178,7 @@ impl RowGroupDecoder { &self, row_group_data: &RowGroupData, slice_range: core::ops::Range, - ) -> PolarsResult> { + ) -> PolarsResult> { if let Some(RowIndex { name, offset }) = self.row_index.as_ref() { let projection_height = row_group_data.row_group_metadata.num_rows(); @@ -208,7 +216,7 @@ impl RowGroupDecoder { /// `out_vec`. async fn decode_all_columns( &self, - out_vec: &mut Vec, + out_vec: &mut Vec, row_group_data: &Arc, filter: Option, ) -> PolarsResult<()> { @@ -305,7 +313,7 @@ fn decode_column( arrow_field: &ArrowField, row_group_data: &RowGroupData, filter: Option, -) -> PolarsResult { +) -> PolarsResult { let columns_to_deserialize = row_group_data .row_group_metadata .columns_under_root_iter(&arrow_field.name) @@ -331,16 +339,16 @@ fn decode_column( // TODO: Also load in the metadata. - Ok(series) + Ok(series.into()) } /// # Safety /// All series in `cols` have the same length. async unsafe fn filter_cols( - mut cols: Vec, + mut cols: Vec, mask: &BooleanChunked, min_values_per_thread: usize, -) -> PolarsResult> { +) -> PolarsResult> { if cols.is_empty() { return Ok(cols); } @@ -418,8 +426,8 @@ fn calc_cols_per_thread( /// State shared across row groups for a single file. pub(super) struct SharedFileState { path_index: usize, - hive_series: Vec, - file_path_series: Option, + hive_series: Vec, + file_path_series: Option, } /// @@ -567,7 +575,7 @@ fn decode_column_prefiltered( prefilter_setting: &PrefilterMaskSetting, mask: &BooleanChunked, mask_bitmap: &Bitmap, -) -> PolarsResult { +) -> PolarsResult { let columns_to_deserialize = row_group_data .row_group_metadata .columns_under_root_iter(&arrow_field.name) @@ -594,12 +602,12 @@ fn decode_column_prefiltered( deserialize_filter, )?; - let series = Series::try_from((arrow_field, array))?; + let column = Series::try_from((arrow_field, array))?.into_column(); if !prefilter { - series.filter(mask) + column.filter(mask) } else { - Ok(series) + Ok(column) } }