Skip to content

Commit

Permalink
perf: don't reassign chunks back to df owner (pola-rs#14633)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Feb 22, 2024
1 parent 3b065f3 commit 21d4326
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions crates/polars-io/src/parquet/write.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::io::Write;

use arrow::array::{Array, ArrayRef};
Expand Down Expand Up @@ -192,8 +193,8 @@ where
df.align_chunks();

let n_splits = df.height() / self.row_group_size.unwrap_or(512 * 512);
if n_splits > 0 {
*df = accumulate_dataframes_vertical_unchecked(
let chunked_df = if n_splits > 0 {
Cow::Owned(accumulate_dataframes_vertical_unchecked(
split_df_as_ref(df, n_splits, false)?
.into_iter()
.map(|mut df| {
Expand All @@ -206,10 +207,12 @@ where
}
df
}),
);
}
))
} else {
Cow::Borrowed(df)
};
let mut batched = self.batched(&df.schema())?;
batched.write_batch(df)?;
batched.write_batch(&chunked_df)?;
batched.finish()
}
}
Expand Down

0 comments on commit 21d4326

Please sign in to comment.