Skip to content

Commit bef0ae6

Browse files
authored
feat: faster Dict logical validity (#1034)
The codes are known to be non-null because we encode null as code `0`. It seems that rustc (reasonably) cannot fully eliminate the overhead of creating a `Some` for each element of the array.
1 parent 8d9f620 commit bef0ae6

File tree

3 files changed

+9
-7
lines changed

3 files changed

+9
-7
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/dict/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ categories = { workspace = true }
1414
readme = { workspace = true }
1515

1616
[dependencies]
17+
arrow-buffer = { workspace = true }
1718
hashbrown = { workspace = true }
1819
num-traits = { workspace = true }
1920
serde = { workspace = true }

encodings/dict/src/dict.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::fmt::{Debug, Display};
22

3+
use arrow_buffer::BooleanBuffer;
34
use serde::{Deserialize, Serialize};
4-
use vortex::accessor::ArrayAccessor;
55
use vortex::array::BoolArray;
66
use vortex::compute::take;
77
use vortex::compute::unary::scalar_at;
@@ -90,12 +90,12 @@ impl ArrayValidity for DictArray {
9090
.into_primitive()
9191
.vortex_expect("Failed to convert DictArray codes to primitive array");
9292
match_each_integer_ptype!(primitive_codes.ptype(), |$P| {
93-
ArrayAccessor::<$P>::with_iterator(&primitive_codes, |iter| {
94-
LogicalValidity::Array(
95-
BoolArray::from(iter.flatten().map(|c| *c != 0).collect::<Vec<_>>())
96-
.into_array(),
97-
)
98-
}).vortex_expect("Failed to convert DictArray codes into logical validity")
93+
let is_valid = primitive_codes
94+
.maybe_null_slice::<$P>();
95+
let is_valid_buffer = BooleanBuffer::collect_bool(is_valid.len(), |idx| {
96+
is_valid[idx] != 0
97+
});
98+
LogicalValidity::Array(BoolArray::from(is_valid_buffer).into_array())
9999
})
100100
} else {
101101
LogicalValidity::AllValid(self.len())

0 commit comments

Comments
 (0)