Skip to content

Commit d292c65

Browse files
authored
Add BinaryView::make_view method (#2664)
1 parent 6b135ac commit d292c65

File tree

5 files changed

+93
-51
lines changed

5 files changed

+93
-51
lines changed

encodings/dict/src/builders/bytes.rs

+6-9
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,12 @@ impl BytesDictBuilder {
5353
)
5454
.or_insert_with(|| {
5555
let next_code = self.views.len() as u64;
56-
if val.len() <= BinaryView::MAX_INLINED_SIZE {
57-
self.views.push(BinaryView::new_inlined(val));
58-
} else {
59-
self.views.push(BinaryView::new_view(
60-
u32::try_from(val.len()).vortex_unwrap(),
61-
val[0..4].try_into().vortex_unwrap(),
62-
0,
63-
u32::try_from(self.values.len()).vortex_unwrap(),
64-
));
56+
self.views.push(BinaryView::make_view(
57+
val,
58+
0,
59+
u32::try_from(self.values.len()).vortex_unwrap(),
60+
));
61+
if val.len() > BinaryView::MAX_INLINED_SIZE {
6562
self.values.extend_from_slice(val);
6663
}
6764
next_code

encodings/fsst/src/canonical.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use arrow_array::builder::make_view;
21
use fsst::Decompressor;
32
use vortex_array::arrays::{BinaryView, VarBinArray, VarBinViewArray};
43
use vortex_array::builders::{ArrayBuilder, VarBinViewBuilder};
@@ -78,7 +77,7 @@ fn fsst_into_varbin_view(
7877
let mut offset = 0;
7978
for len in uncompressed_lens_array.as_slice::<$P>() {
8079
let len = *len as usize;
81-
let view = make_view(
80+
let view = BinaryView::make_view(
8281
&uncompressed_bytes[offset..][..len],
8382
block_offset,
8483
offset as u32,

vortex-array/src/arrays/constant/canonical.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use arrow_array::builder::make_view;
21
use arrow_buffer::BooleanBuffer;
32
use vortex_buffer::{Buffer, BufferMut, buffer};
43
use vortex_dtype::{DType, Nullability, PType, match_each_native_ptype};
@@ -114,7 +113,7 @@ fn canonical_byte_view(
114113
Some(scalar_bytes) => {
115114
// Create a view to hold the scalar bytes.
116115
// If the scalar cannot be inlined, allocate a single buffer large enough to hold it.
117-
let view = BinaryView::from(make_view(scalar_bytes, 0, 0));
116+
let view = BinaryView::make_view(scalar_bytes, 0, 0);
118117
let mut buffers = Vec::new();
119118
if scalar_bytes.len() >= BinaryView::MAX_INLINED_SIZE {
120119
buffers.push(Buffer::copy_from(scalar_bytes));

vortex-array/src/arrays/varbinview/mod.rs

+77-27
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,12 @@ pub struct Inlined {
3838
}
3939

4040
impl Inlined {
41-
pub fn new(value: &[u8]) -> Self {
42-
assert!(
43-
value.len() <= BinaryView::MAX_INLINED_SIZE,
44-
"Inlined strings must be shorter than 13 characters, {} given",
45-
value.len()
46-
);
41+
fn new<const N: usize>(value: &[u8]) -> Self {
4742
let mut inlined = Self {
48-
size: value.len().try_into().vortex_unwrap(),
43+
size: N.try_into().vortex_unwrap(),
4944
data: [0u8; BinaryView::MAX_INLINED_SIZE],
5045
};
51-
inlined.data[..value.len()].copy_from_slice(value);
46+
inlined.data[..N].copy_from_slice(&value[..N]);
5247
inlined
5348
}
5449

@@ -120,29 +115,82 @@ assert_eq_align!(BinaryView, u128);
120115
impl BinaryView {
121116
pub const MAX_INLINED_SIZE: usize = 12;
122117

123-
pub fn empty_view() -> Self {
124-
Self {
125-
inlined: Inlined::new(&[]),
118+
/// Create a view from a value, block and offset
119+
///
120+
/// Depending on the length of the provided value either a new inlined
121+
/// or a reference view will be constructed.
122+
///
123+
/// Adapted from arrow-rs <https://github.com/apache/arrow-rs/blob/f4fde769ab6e1a9b75f890b7f8b47bc22800830b/arrow-array/src/builder/generic_bytes_view_builder.rs#L524>
124+
/// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores
125+
#[inline(never)]
126+
pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
127+
match value.len() {
128+
0 => Self {
129+
inlined: Inlined::new::<0>(value),
130+
},
131+
1 => Self {
132+
inlined: Inlined::new::<1>(value),
133+
},
134+
2 => Self {
135+
inlined: Inlined::new::<2>(value),
136+
},
137+
3 => Self {
138+
inlined: Inlined::new::<3>(value),
139+
},
140+
4 => Self {
141+
inlined: Inlined::new::<4>(value),
142+
},
143+
5 => Self {
144+
inlined: Inlined::new::<5>(value),
145+
},
146+
6 => Self {
147+
inlined: Inlined::new::<6>(value),
148+
},
149+
7 => Self {
150+
inlined: Inlined::new::<7>(value),
151+
},
152+
8 => Self {
153+
inlined: Inlined::new::<8>(value),
154+
},
155+
9 => Self {
156+
inlined: Inlined::new::<9>(value),
157+
},
158+
10 => Self {
159+
inlined: Inlined::new::<10>(value),
160+
},
161+
11 => Self {
162+
inlined: Inlined::new::<11>(value),
163+
},
164+
12 => Self {
165+
inlined: Inlined::new::<12>(value),
166+
},
167+
_ => Self {
168+
_ref: Ref::new(
169+
u32::try_from(value.len()).vortex_unwrap(),
170+
value[0..4].try_into().vortex_unwrap(),
171+
block,
172+
offset,
173+
),
174+
},
126175
}
127176
}
128177

178+
/// Create a new empty view
179+
#[inline]
180+
pub fn empty_view() -> Self {
181+
Self::new_inlined(&[])
182+
}
183+
184+
/// Create a new inlined binary view
185+
#[inline]
129186
pub fn new_inlined(value: &[u8]) -> Self {
130187
assert!(
131188
value.len() <= Self::MAX_INLINED_SIZE,
132189
"expected inlined value to be <= 12 bytes, was {}",
133190
value.len()
134191
);
135192

136-
Self {
137-
inlined: Inlined::new(value),
138-
}
139-
}
140-
141-
/// Create a new view over bytes stored in a block.
142-
pub fn new_view(len: u32, prefix: [u8; 4], block: u32, offset: u32) -> Self {
143-
Self {
144-
_ref: Ref::new(len, prefix, block, offset),
145-
}
193+
Self::make_view(value, 0, 0)
146194
}
147195

148196
#[inline]
@@ -183,12 +231,14 @@ impl BinaryView {
183231
} else {
184232
// Referencing views must have their buffer_index adjusted with new offsets
185233
let view_ref = self.as_view();
186-
BinaryView::new_view(
187-
self.len(),
188-
*view_ref.prefix(),
189-
offset + view_ref.buffer_index(),
190-
view_ref.offset(),
191-
)
234+
Self {
235+
_ref: Ref::new(
236+
self.len(),
237+
*view_ref.prefix(),
238+
offset + view_ref.buffer_index(),
239+
view_ref.offset(),
240+
),
241+
}
192242
}
193243
}
194244
}

vortex-array/src/builders/varbinview.rs

+8-11
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,24 @@ impl VarBinViewBuilder {
4040
}
4141

4242
fn append_value_view(&mut self, value: &[u8]) {
43-
let v: &[u8] = value;
4443
let length =
45-
u32::try_from(v.len()).vortex_expect("cannot have a single string >2^32 in length");
44+
u32::try_from(value.len()).vortex_expect("cannot have a single string >2^32 in length");
4645
if length <= 12 {
47-
self.views_builder.push(BinaryView::new_inlined(v));
46+
self.views_builder.push(BinaryView::make_view(value, 0, 0));
4847
return;
4948
}
5049

51-
let required_cap = self.in_progress.len() + v.len();
50+
let required_cap = self.in_progress.len() + value.len();
5251
if self.in_progress.capacity() < required_cap {
5352
self.flush_in_progress();
54-
let to_reserve = max(v.len(), VarBinViewBuilder::BLOCK_SIZE as usize);
53+
let to_reserve = max(value.len(), VarBinViewBuilder::BLOCK_SIZE as usize);
5554
self.in_progress.reserve(to_reserve);
5655
};
57-
let offset = u32::try_from(self.in_progress.len()).vortex_expect("too many buffers");
58-
self.in_progress.extend_from_slice(v);
5956

60-
let view = BinaryView::new_view(
61-
length,
62-
// inline the first 4 bytes of the view
63-
v[0..4].try_into().vortex_expect("length already checked"),
57+
let offset = u32::try_from(self.in_progress.len()).vortex_expect("too many buffers");
58+
self.in_progress.extend_from_slice(value);
59+
let view = BinaryView::make_view(
60+
value,
6461
// buffer offset
6562
u32::try_from(self.completed.len()).vortex_expect("too many buffers"),
6663
offset,

0 commit comments

Comments
 (0)