Skip to content

Commit

Permalink
perf: Use split_at in split
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 11, 2024
1 parent 13d68ae commit 9a05fc7
Showing 1 changed file with 26 additions and 10 deletions.
36 changes: 26 additions & 10 deletions crates/polars-core/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,19 +199,24 @@ impl Container for Series {
}

fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
let total_len = container.len();
if target == 1 {
return vec![container.clone()];
}
let mut out = Vec::with_capacity(target);
let chunk_size = chunk_size as i64;

for i in 0..target {
let offset = i * chunk_size;
let len = if i == (target - 1) {
total_len.saturating_sub(offset)
} else {
chunk_size
};
let container = container.slice((i * chunk_size) as i64, len);
out.push(container);
// First split
let (chunk, mut remainder) = container.split_at(chunk_size);
out.push(chunk);

// Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
for _ in 1..target - 1 {
let (a, b) = remainder.split_at(chunk_size);
out.push(a);
remainder = b
}
// This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
out.push(remainder);
out
}

Expand All @@ -223,6 +228,7 @@ pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
}

let chunk_size = std::cmp::max(total_len / target, 1);

if container.n_chunks() == target
&& container
.chunk_lengths()
Expand Down Expand Up @@ -1156,6 +1162,16 @@ pub fn coalesce_nulls_series(a: &Series, b: &Series) -> (Series, Series) {
mod test {
use super::*;

#[test]
fn test_split() {
let ca: Int32Chunked = (0..10).collect_ca("a");

let out = split(&ca, 3);
assert_eq!(out[0].len(), 3);
assert_eq!(out[1].len(), 3);
assert_eq!(out[2].len(), 4);
}

#[test]
fn test_align_chunks() {
let a = Int32Chunked::new("", &[1, 2, 3, 4]);
Expand Down

0 comments on commit 9a05fc7

Please sign in to comment.