Skip to content

Commit dbf8ba5

Browse files
authored
feat: teach BitPackedEncoding canonical_into (#2324)
Runtime for a 100k element array is reduced to ~60% of `into_canonical`. This runtime reduction only holds when the builder avoids all resizes (allocation + extend_from_slice)! The builder must be large enough to contain both the entire decoded array and also any scratch space needed by the decoder. For BitPacked, that means it needs 1023 empty element spots in case it needs to decode a 1024 element chunk to retrieve just one element. ``` canonicalize_bench fastest │ slowest │ median │ mean │ samples │ iters ├─ test 43.24 µs │ 221.6 µs │ 47.62 µs │ 47.49 µs │ 100000 │ 100000 ├─ canonical_into │ │ │ │ │ │ ╰─ u32 │ │ │ │ │ │ ╰─ (100000, 3) 24.12 µs │ 104.7 µs │ 24.29 µs │ 24.39 µs │ 100000 │ 100000 ╰─ into_canonical │ │ │ │ │ ╰─ u32 │ │ │ │ │ ╰─ (100000, 3) 39.54 µs │ 249.6 µs │ 39.91 µs │ 40.95 µs │ 100000 │ 100000 ```
1 parent 0751488 commit dbf8ba5

File tree

12 files changed

+471
-216
lines changed

12 files changed

+471
-216
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/fastlanes/Cargo.toml

+7-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ arrow-buffer = { workspace = true }
2222
fastlanes = { workspace = true }
2323
itertools = { workspace = true }
2424
num-traits = { workspace = true }
25+
rand = { workspace = true, optional = true }
2526
rkyv = { workspace = true }
2627
serde = { workspace = true }
2728
vortex-array = { workspace = true }
@@ -36,6 +37,10 @@ criterion = { workspace = true }
3637
divan = { workspace = true }
3738
rand = { workspace = true }
3839
vortex-array = { workspace = true, features = ["test-harness"] }
40+
vortex-fastlanes = { path = ".", features = ["test-harness"] }
41+
42+
[features]
43+
test-harness = ["dep:rand"]
3944

4045
[[bench]]
4146
name = "bitpacking_take"
@@ -47,4 +52,5 @@ harness = false
4752

4853
[[bench]]
4954
name = "canonicalize_bench"
50-
harness = false
55+
harness = false
56+
required-features = ["test-harness"]
Original file line numberDiff line numberDiff line change
@@ -1,116 +1,134 @@
11
use divan::Bencher;
22
use rand::prelude::StdRng;
3-
use rand::{Rng, SeedableRng};
3+
use rand::SeedableRng;
44
use vortex_array::array::ChunkedArray;
55
use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
6-
use vortex_array::{Array, IntoArray, IntoArrayVariant, IntoCanonical};
7-
use vortex_buffer::BufferMut;
8-
use vortex_dtype::NativePType;
9-
use vortex_error::{VortexExpect, VortexUnwrap};
10-
use vortex_fastlanes::bitpack_to_best_bit_width;
6+
use vortex_array::{IntoArray, IntoCanonical};
7+
use vortex_error::{VortexExpect as _, VortexUnwrap};
8+
use vortex_fastlanes::test_harness::make_array;
119

1210
fn main() {
1311
divan::main();
1412
}
1513

16-
fn make_array<T: NativePType>(len: usize) -> Array {
14+
const BENCH_ARGS: [(usize, usize, f64); 10] = [
15+
(100000, 1, 0.10),
16+
(100000, 1, 0.01),
17+
(100000, 1, 0.00),
18+
(100000, 10, 0.10),
19+
(100000, 10, 0.01),
20+
(100000, 10, 0.00),
21+
(100000, 100, 0.10),
22+
(100000, 100, 0.01),
23+
(100000, 100, 0.00),
24+
(100000, 1000, 0.00),
25+
// Too slow for 1000 samples. Try 10 samples.
26+
// (1000000, 100, 0.00),
27+
// (1000000, 1000, 0.00),
28+
// (10000000, 100, 0.00),
29+
];
30+
31+
#[divan::bench(args = BENCH_ARGS)]
32+
fn into_canonical_non_nullable(
33+
bencher: Bencher,
34+
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64),
35+
) {
1736
let mut rng = StdRng::seed_from_u64(0);
18-
let values = (0..len)
19-
.map(|_| T::from(rng.gen_range(0..100)).vortex_expect("valid value"))
20-
.collect::<BufferMut<T>>()
21-
.into_array()
22-
.into_primitive()
23-
.vortex_unwrap();
2437

25-
bitpack_to_best_bit_width(values)
26-
.vortex_unwrap()
27-
.into_array()
38+
let chunks = (0..chunk_count)
39+
.map(|_| {
40+
make_array(&mut rng, chunk_len, fraction_patched, 0.0).vortex_expect("make_array works")
41+
})
42+
.collect::<Vec<_>>();
43+
let chunked = ChunkedArray::from_iter(chunks).into_array();
44+
45+
bencher
46+
.with_inputs(|| chunked.clone())
47+
.bench_values(|chunked| chunked.into_canonical().vortex_unwrap());
2848
}
2949

30-
#[divan::bench()]
31-
fn test() {
32-
let chunks = (0..10).map(|_| make_array::<i32>(100)).collect::<Vec<_>>();
33-
let arr = make_array::<i32>(1);
34-
let chunked = ChunkedArray::try_new(chunks, arr.dtype().clone())
35-
.vortex_unwrap()
36-
.into_array();
50+
#[divan::bench(args = BENCH_ARGS)]
51+
fn canonical_into_non_nullable(
52+
bencher: Bencher,
53+
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64),
54+
) {
55+
let mut rng = StdRng::seed_from_u64(0);
3756

38-
let into_ca = chunked
39-
.clone()
40-
.into_canonical()
41-
.vortex_unwrap()
42-
.into_primitive()
43-
.vortex_unwrap();
44-
let mut primitive_builder =
45-
PrimitiveBuilder::<i32>::with_capacity(arr.dtype().nullability(), 10 * 100);
46-
chunked
47-
.clone()
48-
.canonicalize_into(&mut primitive_builder)
49-
.vortex_unwrap();
50-
let ca_into = primitive_builder.finish().vortex_unwrap();
57+
let chunks = (0..chunk_count)
58+
.map(|_| {
59+
make_array(&mut rng, chunk_len, fraction_patched, 0.0).vortex_expect("make_array works")
60+
})
61+
.collect::<Vec<_>>();
62+
let chunked = ChunkedArray::from_iter(chunks).into_array();
5163

52-
assert_eq!(
53-
into_ca.as_slice::<i32>(),
54-
ca_into.into_primitive().vortex_unwrap().as_slice::<i32>()
55-
);
64+
bencher
65+
.with_inputs(|| chunked.clone())
66+
.bench_values(|chunked| {
67+
let mut primitive_builder = PrimitiveBuilder::<i32>::with_capacity(
68+
chunked.dtype().nullability(),
69+
chunk_len * chunk_count,
70+
);
71+
chunked
72+
.canonicalize_into(&mut primitive_builder)
73+
.vortex_unwrap();
74+
primitive_builder.finish().vortex_unwrap()
75+
});
76+
}
5677

57-
let mut primitive_builder =
58-
PrimitiveBuilder::<i32>::with_capacity(arr.dtype().nullability(), 10 * 100);
59-
primitive_builder.extend_from_array(chunked).vortex_unwrap();
60-
let ca_into = primitive_builder.finish().vortex_unwrap();
78+
const NULLABLE_BENCH_ARGS: [(usize, usize, f64); 6] = [
79+
(100000, 1, 0.10),
80+
(100000, 1, 0.00),
81+
(100000, 10, 0.10),
82+
(100000, 10, 0.00),
83+
(100000, 100, 0.10),
84+
(100000, 100, 0.00),
85+
];
6186

62-
assert_eq!(
63-
into_ca.as_slice::<i32>(),
64-
ca_into.into_primitive().vortex_unwrap().as_slice::<i32>()
65-
);
66-
}
87+
#[divan::bench(args = NULLABLE_BENCH_ARGS)]
88+
fn into_canonical_nullable(
89+
bencher: Bencher,
90+
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64),
91+
) {
92+
let mut rng = StdRng::seed_from_u64(0);
6793

68-
#[divan::bench(
69-
types = [u32],
70-
args = [
71-
// (1000, 100),
72-
// (100000, 100),
73-
// (1000000, 100),
74-
// (100000, 1000),
75-
(100000, 3),
76-
]
77-
)]
78-
fn into_canonical<T: NativePType>(bencher: Bencher, (arr_len, chunk_count): (usize, usize)) {
7994
let chunks = (0..chunk_count)
80-
.map(|_| make_array::<T>(arr_len))
95+
.map(|_| {
96+
make_array(&mut rng, chunk_len, fraction_patched, 0.05)
97+
.vortex_expect("make_array works")
98+
})
8199
.collect::<Vec<_>>();
82-
let arr = make_array::<T>(1);
83-
let chunked = ChunkedArray::try_new(chunks, arr.dtype().clone()).vortex_unwrap();
100+
let chunked = ChunkedArray::from_iter(chunks).into_array();
84101

85-
bencher.bench(|| chunked.clone().into_canonical().vortex_unwrap().len());
102+
bencher
103+
.with_inputs(|| chunked.clone())
104+
.bench_values(|chunked| chunked.into_canonical().vortex_unwrap());
86105
}
87106

88-
#[divan::bench(
89-
types = [u32],
90-
args = [
91-
// (1000, 100),
92-
// (100000, 100),
93-
// (1000000, 100),
94-
// (100000, 1000),
95-
(100000, 3),
96-
]
97-
)]
98-
fn canonical_into<T: NativePType>(bencher: Bencher, (arr_len, chunk_count): (usize, usize)) {
107+
#[divan::bench(args = NULLABLE_BENCH_ARGS)]
108+
fn canonical_into_nullable(
109+
bencher: Bencher,
110+
(chunk_len, chunk_count, fraction_patched): (usize, usize, f64),
111+
) {
112+
let mut rng = StdRng::seed_from_u64(0);
113+
99114
let chunks = (0..chunk_count)
100-
.map(|_| make_array::<T>(arr_len))
115+
.map(|_| {
116+
make_array(&mut rng, chunk_len, fraction_patched, 0.05)
117+
.vortex_expect("make_array works")
118+
})
101119
.collect::<Vec<_>>();
102-
let arr = make_array::<T>(1);
103-
let chunked = ChunkedArray::try_new(chunks, arr.dtype().clone())
104-
.vortex_unwrap()
105-
.into_array();
120+
let chunked = ChunkedArray::from_iter(chunks).into_array();
106121

107-
bencher.bench(|| {
108-
let mut primitive_builder =
109-
PrimitiveBuilder::<T>::with_capacity(arr.dtype().nullability(), arr_len * chunk_count);
110-
chunked
111-
.clone()
112-
.canonicalize_into(&mut primitive_builder)
113-
.vortex_unwrap();
114-
primitive_builder.finish().vortex_unwrap().len()
115-
});
122+
bencher
123+
.with_inputs(|| chunked.clone())
124+
.bench_values(|chunked| {
125+
let mut primitive_builder = PrimitiveBuilder::<i32>::with_capacity(
126+
chunked.dtype().nullability(),
127+
chunk_len * chunk_count,
128+
);
129+
chunked
130+
.canonicalize_into(&mut primitive_builder)
131+
.vortex_unwrap();
132+
primitive_builder.finish().vortex_unwrap()
133+
});
116134
}

0 commit comments

Comments
 (0)