Skip to content

Commit 495682a

Browse files
authored
Add ArrayDataLayout, port validation (#1799) (#3818)
* Add ArrayDataLayout (#1799) * Fix ArrayData::buffer * Don't export macros, yet * Fix doc * Review feedback * Further review feedback
1 parent 2a3fd96 commit 495682a

File tree

16 files changed

+1424
-189
lines changed

16 files changed

+1424
-189
lines changed

arrow-buffer/src/buffer/boolean.rs

+5
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,9 @@ impl BooleanBuffer {
139139
pub fn inner(&self) -> &Buffer {
140140
&self.buffer
141141
}
142+
143+
/// Returns the inner [`Buffer`], consuming self
144+
pub fn into_inner(self) -> Buffer {
145+
self.buffer
146+
}
142147
}

arrow-buffer/src/buffer/offset.rs

+15
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,21 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
3939
let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
4040
Self(buffer.into_buffer().into())
4141
}
42+
43+
/// Returns the inner [`ScalarBuffer`]
44+
pub fn inner(&self) -> &ScalarBuffer<O> {
45+
&self.0
46+
}
47+
48+
/// Returns the inner [`ScalarBuffer`], consuming self
49+
pub fn into_inner(self) -> ScalarBuffer<O> {
50+
self.0
51+
}
52+
53+
/// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
54+
pub fn slice(&self, offset: usize, len: usize) -> Self {
55+
Self(self.0.slice(offset, len.saturating_add(1)))
56+
}
4257
}
4358

4459
impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {

arrow-buffer/src/buffer/run.rs

+10
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,16 @@ where
198198
len,
199199
}
200200
}
201+
202+
/// Returns the inner [`ScalarBuffer`]
203+
pub fn inner(&self) -> &ScalarBuffer<E> {
204+
&self.run_ends
205+
}
206+
207+
/// Returns the inner [`ScalarBuffer`], consuming self
208+
pub fn into_inner(self) -> ScalarBuffer<E> {
209+
self.run_ends
210+
}
201211
}
202212

203213
#[cfg(test)]

arrow-buffer/src/buffer/scalar.rs

+15
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,21 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
5050
let byte_len = len.checked_mul(size).expect("length overflow");
5151
buffer.slice_with_length(byte_offset, byte_len).into()
5252
}
53+
54+
/// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
55+
pub fn slice(&self, offset: usize, len: usize) -> Self {
56+
Self::new(self.buffer.clone(), offset, len)
57+
}
58+
59+
/// Returns the inner [`Buffer`]
60+
pub fn inner(&self) -> &Buffer {
61+
&self.buffer
62+
}
63+
64+
/// Returns the inner [`Buffer`], consuming self
65+
pub fn into_inner(self) -> Buffer {
66+
self.buffer
67+
}
5368
}
5469

5570
impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {

arrow-data/src/data/boolean.rs

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::data::types::PhysicalType;
19+
use crate::data::ArrayDataLayout;
20+
use crate::{ArrayDataBuilder, Buffers};
21+
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
22+
use arrow_schema::DataType;
23+
24+
#[derive(Debug, Clone)]
25+
pub struct BooleanArrayData {
26+
data_type: DataType,
27+
values: BooleanBuffer,
28+
nulls: Option<NullBuffer>,
29+
}
30+
31+
impl BooleanArrayData {
32+
/// Create a new [`BooleanArrayData`]
33+
///
34+
/// # Panics
35+
///
36+
/// Panics if
37+
/// - `nulls` and `values` are different lengths
38+
/// - `PhysicalType::from(&data_type) != PhysicalType::Boolean`
39+
pub fn new(
40+
data_type: DataType,
41+
values: BooleanBuffer,
42+
nulls: Option<NullBuffer>,
43+
) -> Self {
44+
let physical = PhysicalType::from(&data_type);
45+
assert_eq!(
46+
physical, PhysicalType::Boolean,
47+
"Illegal physical type for BooleanArrayData of datatype {:?}, expected {:?} got {:?}",
48+
data_type,
49+
PhysicalType::Boolean,
50+
physical
51+
);
52+
53+
if let Some(n) = nulls.as_ref() {
54+
assert_eq!(values.len(), n.len())
55+
}
56+
Self {
57+
data_type,
58+
values,
59+
nulls,
60+
}
61+
}
62+
63+
/// Create a new [`BooleanArrayData`]
64+
///
65+
/// # Safety
66+
///
67+
/// - `nulls` and `values` are the same lengths
68+
/// - `PhysicalType::from(&data_type) == PhysicalType::Boolean`
69+
pub unsafe fn new_unchecked(
70+
data_type: DataType,
71+
values: BooleanBuffer,
72+
nulls: Option<NullBuffer>,
73+
) -> Self {
74+
Self {
75+
data_type,
76+
values,
77+
nulls,
78+
}
79+
}
80+
81+
/// Creates a new [`BooleanArrayData`] from raw buffers
82+
///
83+
/// # Safety
84+
///
85+
/// See [`BooleanArrayData::new_unchecked`]
86+
pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
87+
let values = builder.buffers.into_iter().next().unwrap();
88+
let values = BooleanBuffer::new(values, builder.offset, builder.len);
89+
Self {
90+
values,
91+
data_type: builder.data_type,
92+
nulls: builder.nulls,
93+
}
94+
}
95+
96+
/// Returns the null buffer if any
97+
#[inline]
98+
pub fn nulls(&self) -> Option<&NullBuffer> {
99+
self.nulls.as_ref()
100+
}
101+
102+
/// Returns the boolean values
103+
#[inline]
104+
pub fn values(&self) -> &BooleanBuffer {
105+
&self.values
106+
}
107+
108+
/// Returns the data type of this array
109+
#[inline]
110+
pub fn data_type(&self) -> &DataType {
111+
&self.data_type
112+
}
113+
114+
/// Returns the underlying parts of this [`BooleanArrayData`]
115+
pub fn into_parts(self) -> (DataType, BooleanBuffer, Option<NullBuffer>) {
116+
(self.data_type, self.values, self.nulls)
117+
}
118+
119+
/// Returns a zero-copy slice of this array
120+
pub fn slice(&self, offset: usize, len: usize) -> Self {
121+
Self {
122+
data_type: self.data_type.clone(),
123+
values: self.values.slice(offset, len),
124+
nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
125+
}
126+
}
127+
128+
/// Returns an [`ArrayDataLayout`] representation of this
129+
pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
130+
ArrayDataLayout {
131+
data_type: &self.data_type,
132+
len: self.values.len(),
133+
offset: self.values.offset(),
134+
nulls: self.nulls.as_ref(),
135+
buffers: Buffers::one(self.values().inner()),
136+
child_data: &[],
137+
}
138+
}
139+
}

arrow-data/src/data/buffers.rs

+10-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ pub struct Buffers<'a>([Option<&'a Buffer>; 2]);
2525

2626
impl<'a> Buffers<'a> {
2727
/// Temporary will be removed once ArrayData does not store `Vec<Buffer>` directly (#3769)
28-
#[inline]
2928
pub(crate) fn from_slice(a: &'a [Buffer]) -> Self {
3029
match a.len() {
3130
0 => Self([None, None]),
@@ -34,6 +33,16 @@ impl<'a> Buffers<'a> {
3433
}
3534
}
3635

36+
#[inline]
37+
pub(crate) fn one(b: &'a Buffer) -> Self {
38+
Self([Some(b), None])
39+
}
40+
41+
#[inline]
42+
pub(crate) fn two(a: &'a Buffer, b: &'a Buffer) -> Self {
43+
Self([Some(a), Some(b)])
44+
}
45+
3746
/// Returns the number of [`Buffer`] in this collection
3847
#[inline]
3948
pub fn len(&self) -> usize {

0 commit comments

Comments
 (0)