Skip to content

Commit

Permalink
scalar-opt StructArray::from_series
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite committed Sep 13, 2024
1 parent 2d4a0da commit e797f70
Show file tree
Hide file tree
Showing 26 changed files with 92 additions and 96 deletions.
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ fn cast_single_to_struct(
new_fields.push(Series::full_null(fld.name.clone(), length, &fld.dtype));
}

StructChunked::from_series(name, &new_fields).map(|ca| ca.into_series())
StructChunked::from_series(name, new_fields.iter()).map(|ca| ca.into_series())
}

impl<T> ChunkedArray<T>
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/ops/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ impl ListChunked {
#[cfg(feature = "dtype-struct")]
impl ChunkFullNull for StructChunked {
fn full_null(name: PlSmallStr, length: usize) -> StructChunked {
let s = vec![Series::new_null(PlSmallStr::EMPTY, length)];
StructChunked::from_series(name, &s)
let s = [Series::new_null(PlSmallStr::EMPTY, length)];
StructChunked::from_series(name, s.iter())
.unwrap()
.with_outer_validity(Some(Bitmap::new_zeroed(length)))
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/sort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ pub(crate) fn convert_sort_column_multi_sort(s: &Series) -> PolarsResult<Series>
.iter()
.map(convert_sort_column_multi_sort)
.collect::<PolarsResult<Vec<_>>>()?;
let mut out = StructChunked::from_series(ca.name().clone(), &new_fields)?;
let mut out = StructChunked::from_series(ca.name().clone(), new_fields.iter())?;
out.zip_outer_validity(ca);
out.into_series()
},
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ impl ChunkZip<StructType> for StructChunked {
.map(|(lhs, rhs)| lhs.zip_with_same_type(&mask, &rhs))
.collect::<PolarsResult<Vec<_>>>()?;

let mut out = StructChunked::from_series(self.name().clone(), &fields)?;
let mut out = StructChunked::from_series(self.name().clone(), fields.iter())?;

// Zip the validities.
if (l.null_count + r.null_count) > 0 {
Expand Down
4 changes: 1 addition & 3 deletions crates/polars-core/src/chunked_array/struct_/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ use crate::prelude::StructChunked;

impl DataFrame {
pub fn into_struct(self, name: PlSmallStr) -> StructChunked {
// @scalar-opt
let series = self.materialized_column_iter().cloned().collect::<Vec<_>>();
StructChunked::from_series(name, &series).expect("same invariants")
StructChunked::from_columns(name, &self.columns).expect("same invariants")
}
}
51 changes: 26 additions & 25 deletions crates/polars-core/src/chunked_array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,24 @@ use crate::utils::Container;

pub type StructChunked = ChunkedArray<StructType>;

fn constructor(name: PlSmallStr, fields: &[Series]) -> PolarsResult<StructChunked> {
fn constructor<'a, I: ExactSizeIterator<Item = &'a Series> + Clone>(
name: PlSmallStr,
fields: I,
) -> PolarsResult<StructChunked> {
// Different chunk lengths: rechunk and recurse.
if !fields.iter().map(|s| s.n_chunks()).all_equal() {
let fields = fields.iter().map(|s| s.rechunk()).collect::<Vec<_>>();
return constructor(name, &fields);
if !fields.clone().map(|s| s.n_chunks()).all_equal() {
let fields = fields.map(|s| s.rechunk()).collect::<Vec<_>>();
return constructor(name, fields.iter());
}

let n_chunks = fields[0].n_chunks();
let dtype = DataType::Struct(fields.iter().map(|s| s.field().into_owned()).collect());
let n_chunks = fields.clone().next().unwrap().n_chunks();
let dtype = DataType::Struct(fields.clone().map(|s| s.field().into_owned()).collect());
let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());

let chunks = (0..n_chunks)
.map(|c_i| {
let fields = fields
.iter()
.clone()
.map(|field| field.chunks()[c_i].clone())
.collect::<Vec<_>>();

Expand All @@ -55,30 +58,28 @@ fn constructor(name: PlSmallStr, fields: &[Series]) -> PolarsResult<StructChunke
},
// Different chunk lengths: rechunk and recurse.
Err(_) => {
let fields = fields.iter().map(|s| s.rechunk()).collect::<Vec<_>>();
constructor(name, &fields)
let fields = fields.map(|s| s.rechunk()).collect::<Vec<_>>();
constructor(name, fields.iter())
},
}
}

impl StructChunked {
pub fn from_columns(name: PlSmallStr, fields: &[Column]) -> PolarsResult<Self> {
// @scalar-opt!
let series = fields
.iter()
.map(|c| c.as_materialized_series().clone())
.collect::<Vec<_>>();
Self::from_series(name, &series)
Self::from_series(name, fields.iter().map(|c| c.as_materialized_series()))
}

pub fn from_series(name: PlSmallStr, fields: &[Series]) -> PolarsResult<Self> {
pub fn from_series<'a, I: ExactSizeIterator<Item = &'a Series> + Clone>(
name: PlSmallStr,
fields: I,
) -> PolarsResult<Self> {
let mut names = PlHashSet::with_capacity(fields.len());
let first_len = fields.first().map(|s| s.len()).unwrap_or(0);
let first_len = fields.clone().next().map(|s| s.len()).unwrap_or(0);
let mut max_len = first_len;

let mut all_equal_len = true;
let mut is_empty = false;
for s in fields {
for s in fields.clone() {
let s_len = s.len();
max_len = std::cmp::max(max_len, s_len);

Expand Down Expand Up @@ -117,10 +118,10 @@ impl StructChunked {
);
}
}
constructor(name, &new_fields)
} else if fields.is_empty() {
let fields = &[Series::new_null(PlSmallStr::EMPTY, 0)];
constructor(name, fields)
constructor(name, new_fields.iter())
} else if fields.len() == 0 {
let fields = [Series::new_null(PlSmallStr::EMPTY, 0)];
constructor(name, fields.iter())
} else {
constructor(name, fields)
}
Expand Down Expand Up @@ -184,7 +185,7 @@ impl StructChunked {
})
.collect::<PolarsResult<Vec<_>>>()?;

let mut out = Self::from_series(self.name().clone(), &new_fields)?;
let mut out = Self::from_series(self.name().clone(), new_fields.iter())?;
if self.null_count > 0 {
out.zip_outer_validity(self);
}
Expand Down Expand Up @@ -240,7 +241,7 @@ impl StructChunked {
}
})
.collect::<PolarsResult<Vec<_>>>()?;
let mut out = Self::from_series(self.name().clone(), &fields)?;
let mut out = Self::from_series(self.name().clone(), fields.iter())?;
if self.null_count > 0 {
out.zip_outer_validity(self);
}
Expand Down Expand Up @@ -285,7 +286,7 @@ impl StructChunked {
.iter()
.map(func)
.collect::<PolarsResult<Vec<_>>>()?;
Self::from_series(self.name().clone(), &fields).map(|mut ca| {
Self::from_series(self.name().clone(), fields.iter()).map(|mut ca| {
if self.null_count > 0 {
// SAFETY: we don't change types/ lengths.
unsafe {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/frame/row/av_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ impl<'a> AnyValueBufferTrusted<'a> {
s
})
.collect::<Vec<_>>();
StructChunked::from_series(PlSmallStr::EMPTY, &v)
StructChunked::from_series(PlSmallStr::EMPTY, v.iter())
.unwrap()
.into_series()
},
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/serde/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ impl<'de> Deserialize<'de> for Series {
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => {
let values: Vec<Series> = map.next_value()?;
let ca = StructChunked::from_series(name.clone(), &values).unwrap();
let ca = StructChunked::from_series(name.clone(), values.iter()).unwrap();
let mut s = ca.into_series();
s.rename(name);
Ok(s)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/series/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ fn any_values_to_struct(
series_fields.push(s)
}

let mut out = StructChunked::from_series(PlSmallStr::EMPTY, &series_fields)?;
let mut out = StructChunked::from_series(PlSmallStr::EMPTY, series_fields.iter())?;
if has_outer_validity {
let mut validity = MutableBitmap::new();
validity.extend_constant(values.len(), true);
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ impl Series {
.iter()
.map(|s| s.to_physical_repr().into_owned())
.collect();
let mut ca = StructChunked::from_series(self.name().clone(), &fields).unwrap();
let mut ca = StructChunked::from_series(self.name().clone(), fields.iter()).unwrap();

if arr.null_count() > 0 {
ca.zip_outer_validity(arr);
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/series/ops/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl Series {
.iter()
.map(|fld| Series::full_null(fld.name().clone(), size, fld.dtype()))
.collect::<Vec<_>>();
let ca = StructChunked::from_series(name, &fields).unwrap();
let ca = StructChunked::from_series(name, fields.iter()).unwrap();

if !fields.is_empty() {
ca.with_outer_validity(Some(Bitmap::new_zeroed(size)))
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-expr/src/expressions/aggregation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ impl PartitionedAggregation for AggregationExpr {
};
let mut count_s = series.agg_valid_count(groups);
count_s.rename(PlSmallStr::from_static("__POLARS_COUNT"));
Ok(StructChunked::from_series(new_name, &[agg_s, count_s])
Ok(StructChunked::from_series(new_name, [agg_s, count_s].iter())
.unwrap()
.into_series())
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/array/to_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pub trait ToStruct: AsArray {
.collect::<PolarsResult<Vec<_>>>()
})?;

StructChunked::from_series(ca.name().clone(), &fields)
StructChunked::from_series(ca.name().clone(), fields.iter())
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/hist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ where
let out = fields.pop().unwrap();
out.with_name(ca.name().clone())
} else {
StructChunked::from_series(ca.name().clone(), &fields)
StructChunked::from_series(ca.name().clone(), fields.iter())
.unwrap()
.into_series()
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/list/to_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub trait ToStruct: AsList {
.collect::<PolarsResult<Vec<_>>>()
})?;

StructChunked::from_series(ca.name().clone(), &fields)
StructChunked::from_series(ca.name().clone(), fields.iter())
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/strings/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub(super) fn extract_groups(
if n_fields == 1 {
return StructChunked::from_series(
ca.name().clone(),
&[Series::new_null(ca.name().clone(), ca.len())],
[Series::new_null(ca.name().clone(), ca.len())].iter(),
)
.map(|ca| ca.into_series());
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-ops/src/chunked_array/strings/json_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,10 @@ mod tests {

let expected_series = StructChunked::from_series(
"".into(),
&[
[
Series::new("a".into(), &[None, Some(1), Some(2), None]),
Series::new("b".into(), &[None, Some("hello"), Some("goodbye"), None]),
],
].iter(),
)
.unwrap()
.with_outer_validity_chunked(BooleanChunked::new("".into(), [false, true, true, false]))
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/strings/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ where
})
.collect::<Vec<_>>();

StructChunked::from_series(ca.name().clone(), &fields)
StructChunked::from_series(ca.name().clone(), fields.iter())
}

pub fn split_helper<'a, F, I>(ca: &'a StringChunked, by: &'a StringChunked, op: F) -> ListChunked
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/frame/join/merge_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ fn merge_series(lhs: &Series, rhs: &Series, merge_indicator: &[bool]) -> PolarsR
.zip(rhs.fields_as_series())
.map(|(lhs, rhs)| merge_series(lhs, &rhs, merge_indicator))
.collect::<PolarsResult<Vec<_>>>()?;
StructChunked::from_series(PlSmallStr::EMPTY, &new_fields)
StructChunked::from_series(PlSmallStr::EMPTY, new_fields.iter())
.unwrap()
.into_series()
},
Expand Down
12 changes: 3 additions & 9 deletions crates/polars-ops/src/frame/pivot/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,15 +233,9 @@ fn pivot_impl(
already exists in the DataFrame. Please rename it prior to calling `pivot`.")
}
// @scalar-opt
let columns_struct = StructChunked::from_series(
column.clone(),
&fields
.iter()
.map(|c| c.as_materialized_series().clone())
.collect::<Vec<_>>(),
)
.unwrap()
.into_series();
let columns_struct = StructChunked::from_columns(column.clone(), fields)
.unwrap()
.into_series();
let mut binding = pivot_df.clone();
let pivot_df = unsafe { binding.with_column_unchecked(columns_struct) };
pivot_impl_single_column(
Expand Down
7 changes: 1 addition & 6 deletions crates/polars-ops/src/frame/pivot/positioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,14 +484,9 @@ pub(super) fn compute_row_idx(
}
} else {
let binding = pivot_df.select(index.iter().cloned())?;
// @scalar-opt
let fields = binding.get_columns();
let fields = fields
.iter()
.map(|c| c.as_materialized_series().clone())
.collect::<Vec<_>>();
let index_struct_series =
StructChunked::from_series(PlSmallStr::from_static("placeholder"), &fields)?
StructChunked::from_columns(PlSmallStr::from_static("placeholder"), fields)?
.into_series();
let index_agg = unsafe { index_struct_series.agg_first(groups) };
let index_agg_physical = index_agg.to_physical_repr();
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-ops/src/series/ops/cut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ fn map_cats(
},
});

let outvals = vec![
let outvals = [
brk_vals.finish().into_series(),
bld.finish()
._with_fast_unique(label_has_value.iter().all(bool::clone))
.into_series(),
];
Ok(StructChunked::from_series(out_name, &outvals)?.into_series())
Ok(StructChunked::from_series(out_name, outvals.iter())?.into_series())
} else {
Ok(bld
.drain_iter_and_finish(s_iter.map(|opt| {
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-plan/src/dsl/function_expr/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ pub(super) fn rename_fields(s: &Column, names: Arc<[PlSmallStr]>) -> PolarsResul
s
})
.collect::<Vec<_>>();
let mut out = StructChunked::from_series(ca.name().clone(), &fields)?;
let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?;
out.zip_outer_validity(ca);
Ok(out.into_column())
}
Expand All @@ -193,7 +193,7 @@ pub(super) fn prefix_fields(s: &Column, prefix: &str) -> PolarsResult<Column> {
s
})
.collect::<Vec<_>>();
let mut out = StructChunked::from_series(ca.name().clone(), &fields)?;
let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?;
out.zip_outer_validity(ca);
Ok(out.into_column())
}
Expand All @@ -210,7 +210,7 @@ pub(super) fn suffix_fields(s: &Column, suffix: &str) -> PolarsResult<Column> {
s
})
.collect::<Vec<_>>();
let mut out = StructChunked::from_series(ca.name().clone(), &fields)?;
let mut out = StructChunked::from_series(ca.name().clone(), fields.iter())?;
out.zip_outer_validity(ca);
Ok(out.into_column())
}
Expand Down Expand Up @@ -245,7 +245,7 @@ pub(super) fn with_fields(args: &[Column]) -> PolarsResult<Column> {
}

let new_fields = fields.into_values().cloned().collect::<Vec<_>>();
let mut out = StructChunked::from_series(ca.name().clone(), &new_fields)?;
let mut out = StructChunked::from_series(ca.name().clone(), new_fields.iter())?;
out.zip_outer_validity(ca);
Ok(out.into_column())
}
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl ExprNameNameSpace {
fd
})
.collect::<Vec<_>>();
let mut out = StructChunked::from_series(s.name().clone(), &fields)?;
let mut out = StructChunked::from_series(s.name().clone(), fields.iter())?;
out.zip_outer_validity(s);
Ok(Some(out.into_column()))
},
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-python/src/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ fn iterator_to_struct<'a>(
.collect::<Vec<_>>()
});

Ok(StructChunked::from_series(name, &fields)
Ok(StructChunked::from_series(name, fields.iter())
.unwrap()
.into_series()
.into())
Expand Down
Loading

0 comments on commit e797f70

Please sign in to comment.