Skip to content

Commit cfdabe0

Browse files
doki23tustvold
authored andcommitted
Add OffsetsBuilder
1 parent f41c2a4 commit cfdabe0

File tree

5 files changed

+192
-4
lines changed

5 files changed

+192
-4
lines changed

arrow-buffer/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,8 @@ rand = { version = "0.8", default-features = false, features = ["std", "std_rng"
4646

4747
[[bench]]
4848
name = "i256"
49+
harness = false
50+
51+
[[bench]]
52+
name = "offset"
4953
harness = false

arrow-buffer/benches/offset.rs

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_buffer::{OffsetBuffer, OffsetBufferBuilder};
19+
use criterion::*;
20+
use rand::rngs::StdRng;
21+
use rand::{Rng, SeedableRng};
22+
23+
const SIZE: usize = 1024;
24+
25+
fn criterion_benchmark(c: &mut Criterion) {
26+
let mut rng = StdRng::seed_from_u64(42);
27+
let lengths: Vec<usize> = black_box((0..SIZE).map(|_| rng.gen_range(0..40)).collect());
28+
29+
c.bench_function("OffsetBuffer::from_lengths", |b| {
30+
b.iter(|| OffsetBuffer::<i32>::from_lengths(lengths.iter().copied()));
31+
});
32+
33+
c.bench_function("OffsetBufferBuilder::push_length", |b| {
34+
b.iter(|| {
35+
let mut builder = OffsetBufferBuilder::<i32>::new(lengths.len());
36+
lengths.iter().for_each(|x| builder.push_length(*x));
37+
builder.finish()
38+
});
39+
});
40+
41+
let offsets = OffsetBuffer::<i32>::from_lengths(lengths.iter().copied()).into_inner();
42+
43+
c.bench_function("OffsetBuffer::new", |b| {
44+
b.iter(|| OffsetBuffer::new(black_box(offsets.clone())));
45+
});
46+
}
47+
48+
criterion_group!(benches, criterion_benchmark);
49+
criterion_main!(benches);

arrow-buffer/src/buffer/offset.rs

+10-3
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
// under the License.
1717

1818
use crate::buffer::ScalarBuffer;
19-
use crate::{ArrowNativeType, MutableBuffer};
20-
use std::ops::Deref;
19+
use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder};
20+
use std::ops::{Add, Deref, Sub};
2121

2222
/// A non-empty buffer of monotonically increasing, positive integers.
2323
///
@@ -55,7 +55,6 @@ use std::ops::Deref;
5555
/// (offsets[i],
5656
/// offsets[i+1])
5757
/// ```
58-
5958
#[derive(Debug, Clone)]
6059
pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
6160

@@ -174,6 +173,14 @@ impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
174173
}
175174
}
176175

176+
impl<O: ArrowNativeType + Add<Output = O> + Sub<Output = O>> From<OffsetBufferBuilder<O>>
177+
for OffsetBuffer<O>
178+
{
179+
fn from(value: OffsetBufferBuilder<O>) -> Self {
180+
value.finish()
181+
}
182+
}
183+
177184
#[cfg(test)]
178185
mod tests {
179186
use super::*;

arrow-buffer/src/builder/mod.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
//! Buffer builders
1919
2020
mod boolean;
21-
pub use boolean::*;
2221
mod null;
22+
mod offset;
23+
24+
pub use boolean::*;
2325
pub use null::*;
26+
pub use offset::*;
2427

2528
use crate::{ArrowNativeType, Buffer, MutableBuffer};
2629
use std::{iter, marker::PhantomData};

arrow-buffer/src/builder/offset.rs

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::ops::Deref;
19+
20+
use crate::{ArrowNativeType, OffsetBuffer};
21+
22+
#[derive(Debug)]
23+
pub struct OffsetBufferBuilder<O: ArrowNativeType> {
24+
offsets: Vec<O>,
25+
last_offset: usize,
26+
}
27+
28+
/// Builder of [`OffsetBuffer`]
29+
impl<O: ArrowNativeType> OffsetBufferBuilder<O> {
30+
/// Create a new builder with space for `capacity + 1` offsets
31+
pub fn new(capacity: usize) -> Self {
32+
let mut offsets = Vec::with_capacity(capacity + 1);
33+
offsets.push(O::usize_as(0));
34+
Self {
35+
offsets,
36+
last_offset: 0,
37+
}
38+
}
39+
40+
/// Push a slice of `length` bytes
41+
///
42+
/// # Panics
43+
///
44+
/// Panics if adding `length` would overflow `usize`
45+
#[inline]
46+
pub fn push_length(&mut self, length: usize) {
47+
self.last_offset = self.last_offset.checked_add(length).expect("overflow");
48+
self.offsets.push(O::usize_as(self.last_offset))
49+
}
50+
51+
/// Reserve space for at least `additional` further offsets
52+
#[inline]
53+
pub fn reserve(&mut self, additional: usize) {
54+
self.offsets.reserve(additional);
55+
}
56+
57+
/// Takes the builder itself and returns an [`OffsetBuffer`]
58+
///
59+
/// # Panics
60+
///
61+
/// Panics if adding `length` would overflow `usize`
62+
pub fn finish(self) -> OffsetBuffer<O> {
63+
O::from_usize(self.last_offset).expect("overflow");
64+
unsafe { OffsetBuffer::new_unchecked(self.offsets.into()) }
65+
}
66+
67+
/// Builds the [OffsetBuffer] without resetting the builder.
68+
///
69+
/// # Panics
70+
///
71+
/// Panics if adding `length` would overflow `usize`
72+
pub fn finish_cloned(&self) -> OffsetBuffer<O> {
73+
O::from_usize(self.last_offset).expect("overflow");
74+
unsafe { OffsetBuffer::new_unchecked(self.offsets.clone().into()) }
75+
}
76+
}
77+
78+
impl<O: ArrowNativeType> Deref for OffsetBufferBuilder<O> {
79+
type Target = [O];
80+
81+
fn deref(&self) -> &Self::Target {
82+
self.offsets.as_ref()
83+
}
84+
}
85+
86+
#[cfg(test)]
87+
mod tests {
88+
use crate::OffsetBufferBuilder;
89+
90+
#[test]
91+
fn test_basic() {
92+
let mut builder = OffsetBufferBuilder::<i32>::new(5);
93+
assert_eq!(builder.len(), 1);
94+
assert_eq!(&*builder, &[0]);
95+
let finished = builder.finish_cloned();
96+
assert_eq!(finished.len(), 1);
97+
assert_eq!(&*finished, &[0]);
98+
99+
builder.push_length(2);
100+
builder.push_length(6);
101+
builder.push_length(0);
102+
builder.push_length(13);
103+
104+
let finished = builder.finish();
105+
assert_eq!(&*finished, &[0, 2, 8, 8, 21]);
106+
}
107+
108+
#[test]
109+
#[should_panic(expected = "overflow")]
110+
fn test_usize_overflow() {
111+
let mut builder = OffsetBufferBuilder::<i32>::new(5);
112+
builder.push_length(1);
113+
builder.push_length(usize::MAX);
114+
builder.finish();
115+
}
116+
117+
#[test]
118+
#[should_panic(expected = "overflow")]
119+
fn test_i32_overflow() {
120+
let mut builder = OffsetBufferBuilder::<i32>::new(5);
121+
builder.push_length(1);
122+
builder.push_length(i32::MAX as usize);
123+
builder.finish();
124+
}
125+
}

0 commit comments

Comments
 (0)