Skip to content

Commit e9bf8aa

Browse files
authored
Speed up filter_bytes (#6699)
* Use vec * Use extend, fix capacity
1 parent e907bf8 commit e9bf8aa

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

arrow-data/src/transform/variable_size.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ fn extend_offset_values<T: ArrowNativeType + AsPrimitive<usize>>(
3434
len: usize,
3535
) {
3636
let start_values = offsets[start].as_();
37-
let end_values = offsets[start + len].as_();
37+
let end_values: usize = offsets[start + len].as_();
3838
let new_values = &values[start_values..end_values];
3939
buffer.extend_from_slice(new_values);
4040
}

arrow-select/src/filter.rs

+11-10
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,6 @@ fn filter_native<T: ArrowNativeType>(values: &[T], predicate: &FilterPredicate)
582582
}
583583
IterationStrategy::Indices(indices) => {
584584
let iter = indices.iter().map(|x| values[*x]);
585-
586585
// SAFETY: `Vec::iter` is trusted length
587586
unsafe { MutableBuffer::from_trusted_len_iter(iter) }
588587
}
@@ -618,8 +617,8 @@ where
618617
struct FilterBytes<'a, OffsetSize> {
619618
src_offsets: &'a [OffsetSize],
620619
src_values: &'a [u8],
621-
dst_offsets: MutableBuffer,
622-
dst_values: MutableBuffer,
620+
dst_offsets: Vec<OffsetSize>,
621+
dst_values: Vec<u8>,
623622
cur_offset: OffsetSize,
624623
}
625624

@@ -631,10 +630,10 @@ where
631630
where
632631
T: ByteArrayType<Offset = OffsetSize>,
633632
{
634-
let num_offsets_bytes = (capacity + 1) * std::mem::size_of::<OffsetSize>();
635-
let mut dst_offsets = MutableBuffer::new(num_offsets_bytes);
636-
let dst_values = MutableBuffer::new(0);
633+
let dst_values = Vec::new();
634+
let mut dst_offsets: Vec<OffsetSize> = Vec::with_capacity(capacity + 1);
637635
let cur_offset = OffsetSize::from_usize(0).unwrap();
636+
638637
dst_offsets.push(cur_offset);
639638

640639
Self {
@@ -664,13 +663,15 @@ where
664663

665664
/// Extends the in-progress array by the indexes in the provided iterator
666665
fn extend_idx(&mut self, iter: impl Iterator<Item = usize>) {
667-
for idx in iter {
668-
let (start, end, len) = self.get_value_range(idx);
666+
self.dst_offsets.extend(iter.map(|idx| {
667+
let start = self.src_offsets[idx].as_usize();
668+
let end = self.src_offsets[idx + 1].as_usize();
669+
let len = OffsetSize::from_usize(end - start).expect("illegal offset range");
669670
self.cur_offset += len;
670-
self.dst_offsets.push(self.cur_offset);
671671
self.dst_values
672672
.extend_from_slice(&self.src_values[start..end]);
673-
}
673+
self.cur_offset
674+
}));
674675
}
675676

676677
/// Extends the in-progress array by the ranges in the provided iterator

0 commit comments

Comments
 (0)