diff --git a/crates/polars-core/src/frame/chunks.rs b/crates/polars-core/src/frame/chunks.rs index 8ecefbef0d5b..2a371e85ab31 100644 --- a/crates/polars-core/src/frame/chunks.rs +++ b/crates/polars-core/src/frame/chunks.rs @@ -29,18 +29,7 @@ impl DataFrame { let columns = self .get_columns() .iter() - .map(|s| match s.dtype() { - #[cfg(feature = "dtype-struct")] - DataType::Struct(_) => { - let mut ca = s.struct_().unwrap().clone(); - for field in ca.fields_mut().iter_mut() { - *field = field.replace_with_chunk(field.chunks()[i].clone()) - } - ca.update_chunks(0); - ca.into_series() - }, - _ => s.replace_with_chunk(s.chunks()[i].clone()), - }) + .map(|s| s.select_chunk(i)) .collect::>(); DataFrame::new_no_checks(columns) diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 2653eec0939e..a17f6f1ebde9 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -452,9 +452,10 @@ impl DataFrame { /// Aggregate all the chunks in the DataFrame to a single chunk in parallel. /// This may lead to more peak memory consumption. pub fn as_single_chunk_par(&mut self) -> &mut Self { - if self.columns.iter().any(|s| s.n_chunks() > 1) { - self.columns = self._apply_columns_par(&|s| s.rechunk()); - } + self.as_single_chunk(); + // if self.columns.iter().any(|s| s.n_chunks() > 1) { + // self.columns = self._apply_columns_par(&|s| s.rechunk()); + // } self } diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 687f4544b4d4..86791a8a06b9 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -194,30 +194,29 @@ impl Series { ca.chunks_mut() } - /// Create a `Series` of the same data type with all chunks replaced. - /// # Safety - /// These chunks should align with the data-type - pub unsafe fn replace_chunks(&self, chunks: Vec) -> Self { - let mut new = self.clear(); - // Assign mut so we go through arc only once. - let mut_new = new._get_inner_mut(); - *mut_new.chunks_mut() = chunks; - mut_new.compute_len(); - new - } - - /// Create a `Series` of the same data type with all chunks replaced. - /// # Safety - /// This chunk should align with the data-type - pub unsafe fn replace_with_chunk(&self, chunk: ArrayRef) -> Self { - let mut new = self.clear(); - // Assign mut so we go through arc only once. - let mut_new = new._get_inner_mut(); - let chunks = mut_new.chunks_mut(); - chunks.clear(); - chunks.push(chunk); - mut_new.compute_len(); - new + pub fn select_chunk(&self, i: usize) -> Self { + match self.dtype() { + #[cfg(feature = "dtype-struct")] + DataType::Struct(_) => { + let mut ca = self.struct_().unwrap().clone(); + for field in ca.fields_mut().iter_mut() { + *field = field.select_chunk(i) + } + ca.update_chunks(0); + ca.into_series() + }, + _ => { + let mut new = self.clear(); + // Assign mut so we go through arc only once. + let mut_new = new._get_inner_mut(); + let chunks = unsafe { mut_new.chunks_mut() }; + let chunk = self.chunks()[i].clone(); + chunks.clear(); + chunks.push(chunk); + mut_new.compute_len(); + new + }, + } } pub fn is_sorted_flag(&self) -> IsSorted { diff --git a/py-polars/tests/unit/datatypes/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py index d399ba52c5ba..d53b22dd7c7e 100644 --- a/py-polars/tests/unit/datatypes/test_struct.py +++ b/py-polars/tests/unit/datatypes/test_struct.py @@ -927,3 +927,10 @@ def test_struct_filter_chunked_16498() -> None: def test_struct_field_dynint_nullable_16243() -> None: pl.select(pl.lit(None).fill_null(pl.struct(42))) + + +def test_struct_split_16536() -> None: + df = pl.DataFrame({"struct": [{"a": {"a": {"a": 1}}}], "list": [[1]], "int": [1]}) + + df = pl.concat([df, df, df, df], rechunk=False) + assert df.filter(pl.col("int") == 1).shape == (4, 3)