Skip to content

Commit ce03376

Browse files
committed
optimize split_files by using drain.
1 parent 5162833 commit ce03376

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

datafusion/core/src/datasource/listing/helpers.rs

+16-4
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,22 @@ pub fn split_files(
139139

140140
// effectively this is div with rounding up instead of truncating
141141
let chunk_size = (partitioned_files.len() + n - 1) / n;
142-
partitioned_files
143-
.chunks_mut(chunk_size)
144-
.map(|c| c.iter_mut().map(mem::take).collect())
145-
.collect()
142+
let mut chunks = Vec::with_capacity(n);
143+
let mut current_chunk = Vec::with_capacity(chunk_size);
144+
for file in partitioned_files.drain(..) {
145+
current_chunk.push(file);
146+
if current_chunk.len() == chunk_size {
147+
let full_chunk =
148+
mem::replace(&mut current_chunk, Vec::with_capacity(chunk_size));
149+
chunks.push(full_chunk);
150+
}
151+
}
152+
153+
if !current_chunk.is_empty() {
154+
chunks.push(current_chunk)
155+
}
156+
157+
chunks
146158
}
147159

148160
struct Partition {

0 commit comments

Comments
 (0)