-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: teach BitPackedEncoding canonical_into #2324
Changes from 13 commits
3a24da2
77b779c
10da4b4
1045a96
e4cbde9
409a98c
715c7df
eeee805
89a4e90
fa243bf
e063b93
348e3c4
05ac017
8308c6a
2639bb1
db8bdfe
801eeeb
32ec159
e6d3435
2fb3a35
8b1f1c7
6dd57c0
549f68e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,116 +1,158 @@ | ||
use divan::Bencher; | ||
use rand::prelude::StdRng; | ||
use rand::{Rng, SeedableRng}; | ||
use rand::SeedableRng; | ||
use vortex_array::array::ChunkedArray; | ||
use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder}; | ||
use vortex_array::{Array, IntoArray, IntoArrayVariant, IntoCanonical}; | ||
use vortex_buffer::BufferMut; | ||
use vortex_dtype::NativePType; | ||
use vortex_error::{VortexExpect, VortexUnwrap}; | ||
use vortex_fastlanes::bitpack_to_best_bit_width; | ||
use vortex_array::{IntoArray, IntoCanonical}; | ||
use vortex_error::{VortexExpect as _, VortexUnwrap}; | ||
use vortex_fastlanes::test_harness::make_array; | ||
|
||
fn main() { | ||
divan::main(); | ||
} | ||
|
||
fn make_array<T: NativePType>(len: usize) -> Array { | ||
#[divan::bench( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mind extract these into a single shared const, see |
||
args = [ | ||
(100000, 1, 0.10), | ||
(100000, 1, 0.01), | ||
(100000, 1, 0.00), | ||
(100000, 10, 0.10), | ||
(100000, 10, 0.01), | ||
(100000, 10, 0.00), | ||
(100000, 100, 0.10), | ||
(100000, 100, 0.01), | ||
(100000, 100, 0.00), | ||
(100000, 1000, 0.00), | ||
// (1000000, 100, 0.00), | ||
// (1000000, 1000, 0.00), | ||
// (10000000, 100, 0.00), | ||
] | ||
)] | ||
fn into_canonical_non_nullable( | ||
bencher: Bencher, | ||
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64), | ||
) { | ||
let mut rng = StdRng::seed_from_u64(0); | ||
let values = (0..len) | ||
.map(|_| T::from(rng.gen_range(0..100)).vortex_expect("valid value")) | ||
.collect::<BufferMut<T>>() | ||
.into_array() | ||
.into_primitive() | ||
.vortex_unwrap(); | ||
|
||
bitpack_to_best_bit_width(values) | ||
.vortex_unwrap() | ||
.into_array() | ||
} | ||
|
||
#[divan::bench()] | ||
fn test() { | ||
let chunks = (0..10).map(|_| make_array::<i32>(100)).collect::<Vec<_>>(); | ||
let arr = make_array::<i32>(1); | ||
let chunked = ChunkedArray::try_new(chunks, arr.dtype().clone()) | ||
.vortex_unwrap() | ||
.into_array(); | ||
let chunks = (0..chunk_count) | ||
.map(|_| { | ||
make_array(&mut rng, chunk_len, fraction_patched, 0.0).vortex_expect("make_array works") | ||
}) | ||
.collect::<Vec<_>>(); | ||
let chunked = ChunkedArray::from_iter(chunks).into_array(); | ||
|
||
let into_ca = chunked | ||
.clone() | ||
.into_canonical() | ||
.vortex_unwrap() | ||
.into_primitive() | ||
.vortex_unwrap(); | ||
let mut primitive_builder = | ||
PrimitiveBuilder::<i32>::with_capacity(arr.dtype().nullability(), 10 * 100); | ||
chunked | ||
.clone() | ||
.canonicalize_into(&mut primitive_builder) | ||
.vortex_unwrap(); | ||
let ca_into = primitive_builder.finish().vortex_unwrap(); | ||
bencher | ||
.with_inputs(|| chunked.clone()) | ||
.bench_values(|chunked| chunked.into_canonical().vortex_unwrap()); | ||
} | ||
|
||
assert_eq!( | ||
into_ca.as_slice::<i32>(), | ||
ca_into.into_primitive().vortex_unwrap().as_slice::<i32>() | ||
); | ||
#[divan::bench( | ||
args = [ | ||
(100000, 1, 0.10), | ||
(100000, 1, 0.01), | ||
(100000, 1, 0.00), | ||
(100000, 10, 0.10), | ||
(100000, 10, 0.01), | ||
(100000, 10, 0.00), | ||
(100000, 100, 0.10), | ||
(100000, 100, 0.01), | ||
(100000, 100, 0.00), | ||
(100000, 1000, 0.00), | ||
// (1000000, 100, 0.00), | ||
// (1000000, 1000, 0.00), | ||
// (10000000, 100, 0.00), | ||
] | ||
)] | ||
fn canonical_into_non_nullable( | ||
bencher: Bencher, | ||
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64), | ||
) { | ||
let mut rng = StdRng::seed_from_u64(0); | ||
|
||
let mut primitive_builder = | ||
PrimitiveBuilder::<i32>::with_capacity(arr.dtype().nullability(), 10 * 100); | ||
primitive_builder.extend_from_array(chunked).vortex_unwrap(); | ||
let ca_into = primitive_builder.finish().vortex_unwrap(); | ||
let chunks = (0..chunk_count) | ||
.map(|_| { | ||
make_array(&mut rng, chunk_len, fraction_patched, 0.0).vortex_expect("make_array works") | ||
}) | ||
.collect::<Vec<_>>(); | ||
let chunked = ChunkedArray::from_iter(chunks).into_array(); | ||
|
||
assert_eq!( | ||
into_ca.as_slice::<i32>(), | ||
ca_into.into_primitive().vortex_unwrap().as_slice::<i32>() | ||
); | ||
bencher | ||
.with_inputs(|| chunked.clone()) | ||
.bench_values(|chunked| { | ||
let mut primitive_builder = PrimitiveBuilder::<i32>::with_capacity( | ||
chunked.dtype().nullability(), | ||
chunk_len * chunk_count + 1024, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We cannot assume this constant since its not added by the builder or at least we need another bench without this which is the primary one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran the benchmark without the extra 1024 elements, and canonical_into ended up slower in all cases. When the builder can’t hold the full result, it has to resize and copy—just like into_canonical. I can also address your comment about using a temporary buffer for the final chunk and include that in a benchmark. Looking ahead, I see two possible approaches:
I’m inclined toward the second option because data goes exactly where it belongs, although it does waste space for smaller arrays. For example, a 500-element array might need space for 1024 elements. Ultimately, this is a key design decision for how builders should work. Maybe worth a quick sync or a wider discussion? What do you think? |
||
); | ||
chunked | ||
.canonicalize_into(&mut primitive_builder) | ||
.vortex_unwrap(); | ||
primitive_builder.finish().vortex_unwrap() | ||
}); | ||
} | ||
|
||
#[divan::bench( | ||
types = [u32], | ||
args = [ | ||
// (1000, 100), | ||
// (100000, 100), | ||
// (1000000, 100), | ||
// (100000, 1000), | ||
(100000, 3), | ||
(100000, 1, 0.10), | ||
(100000, 1, 0.00), | ||
(100000, 10, 0.10), | ||
(100000, 10, 0.00), | ||
(100000, 100, 0.10), | ||
(100000, 100, 0.00), | ||
] | ||
)] | ||
fn into_canonical<T: NativePType>(bencher: Bencher, (arr_len, chunk_count): (usize, usize)) { | ||
fn into_canonical_nullable( | ||
bencher: Bencher, | ||
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64), | ||
) { | ||
let mut rng = StdRng::seed_from_u64(0); | ||
|
||
let chunks = (0..chunk_count) | ||
.map(|_| make_array::<T>(arr_len)) | ||
.map(|_| { | ||
make_array(&mut rng, chunk_len, fraction_patched, 0.05) | ||
.vortex_expect("make_array works") | ||
}) | ||
.collect::<Vec<_>>(); | ||
let arr = make_array::<T>(1); | ||
let chunked = ChunkedArray::try_new(chunks, arr.dtype().clone()).vortex_unwrap(); | ||
let chunked = ChunkedArray::from_iter(chunks).into_array(); | ||
|
||
bencher.bench(|| chunked.clone().into_canonical().vortex_unwrap().len()); | ||
bencher | ||
.with_inputs(|| chunked.clone()) | ||
.bench_values(|chunked| chunked.into_canonical().vortex_unwrap()); | ||
} | ||
|
||
#[divan::bench( | ||
types = [u32], | ||
args = [ | ||
// (1000, 100), | ||
// (100000, 100), | ||
// (1000000, 100), | ||
// (100000, 1000), | ||
(100000, 3), | ||
(100000, 1, 0.10), | ||
(100000, 1, 0.00), | ||
(100000, 10, 0.10), | ||
(100000, 10, 0.00), | ||
(100000, 100, 0.10), | ||
(100000, 100, 0.00), | ||
] | ||
)] | ||
fn canonical_into<T: NativePType>(bencher: Bencher, (arr_len, chunk_count): (usize, usize)) { | ||
fn canonical_into_nullable( | ||
bencher: Bencher, | ||
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64), | ||
) { | ||
let mut rng = StdRng::seed_from_u64(0); | ||
|
||
let chunks = (0..chunk_count) | ||
.map(|_| make_array::<T>(arr_len)) | ||
.map(|_| { | ||
make_array(&mut rng, chunk_len, fraction_patched, 0.05) | ||
.vortex_expect("make_array works") | ||
}) | ||
.collect::<Vec<_>>(); | ||
let arr = make_array::<T>(1); | ||
let chunked = ChunkedArray::try_new(chunks, arr.dtype().clone()) | ||
.vortex_unwrap() | ||
.into_array(); | ||
let chunked = ChunkedArray::from_iter(chunks).into_array(); | ||
|
||
bencher.bench(|| { | ||
let mut primitive_builder = | ||
PrimitiveBuilder::<T>::with_capacity(arr.dtype().nullability(), arr_len * chunk_count); | ||
chunked | ||
.clone() | ||
.canonicalize_into(&mut primitive_builder) | ||
.vortex_unwrap(); | ||
primitive_builder.finish().vortex_unwrap().len() | ||
}); | ||
bencher | ||
.with_inputs(|| chunked.clone()) | ||
.bench_values(|chunked| { | ||
let mut primitive_builder = PrimitiveBuilder::<i32>::with_capacity( | ||
chunked.dtype().nullability(), | ||
chunk_len * chunk_count + 1024, | ||
); | ||
chunked | ||
.canonicalize_into(&mut primitive_builder) | ||
.vortex_unwrap(); | ||
primitive_builder.finish().vortex_unwrap() | ||
}); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This creates patches