diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 2d264fe50115..52e41cf27c9f 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -464,6 +464,13 @@ impl DataFrame { /// Returns true if the chunks of the columns do not align and re-chunking should be done pub fn should_rechunk(&self) -> bool { + // Fast check. It is also needed for correctness, as code below doesn't check if the number + // of chunks is equal. + if !self.get_columns().iter().map(|s| s.n_chunks()).all_equal() { + return true; + } + + // From here we check chunk lengths. let mut chunk_lengths = self.columns.iter().map(|s| s.chunk_lengths()); match chunk_lengths.next() { None => false, diff --git a/py-polars/tests/unit/test_chunks.py b/py-polars/tests/unit/test_chunks.py new file mode 100644 index 000000000000..63e7a327f003 --- /dev/null +++ b/py-polars/tests/unit/test_chunks.py @@ -0,0 +1,16 @@ +import numpy as np + +import polars as pl + + +def test_chunks_align_16830() -> None: + n = 2 + df = pl.DataFrame( + {"index_1": np.repeat(np.arange(10), n), "index_2": np.repeat(np.arange(10), n)} + ) + df = pl.concat([df[0:10], df[10:]], rechunk=False) + df = df.filter(df["index_1"] == 0) # filter chunks + df = df.with_columns( + index_2=pl.Series(values=[0] * n) + ) # set a chunk of different size + df.set_sorted("index_2") # triggers `select_chunk`.