Skip to content

Commit eff058f

Browse files
authored
Use NullBuffer in ArrayData (#3775) (#3778)
* Use NullBuffer in ArrayData (#3775) * Clippy * Format * Doc * Tweaks * Review feedback
1 parent 7852e76 commit eff058f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+577
-719
lines changed

arrow-arith/src/aggregate.rs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ where
117117
.map(|i| unsafe { array.value_unchecked(i) })
118118
.reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
119119
} else {
120-
let null_buffer = array.data_ref().null_buffer().unwrap();
121-
let iter = BitIndexIterator::new(null_buffer, array.offset(), array.len());
120+
let nulls = array.data().nulls().unwrap();
121+
let iter = BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len());
122122
unsafe {
123123
let idx = iter.reduce(|acc_idx, idx| {
124124
let acc = array.value_unchecked(acc_idx);
@@ -288,20 +288,20 @@ where
288288

289289
let data: &[T::Native] = array.values();
290290

291-
match array.data().null_buffer() {
291+
match array.data().nulls() {
292292
None => {
293293
let sum = data.iter().fold(T::default_value(), |accumulator, value| {
294294
accumulator.add_wrapping(*value)
295295
});
296296

297297
Some(sum)
298298
}
299-
Some(buffer) => {
299+
Some(nulls) => {
300300
let mut sum = T::default_value();
301301
let data_chunks = data.chunks_exact(64);
302302
let remainder = data_chunks.remainder();
303303

304-
let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
304+
let bit_chunks = nulls.inner().bit_chunks();
305305
data_chunks
306306
.zip(bit_chunks.iter())
307307
.for_each(|(chunk, mask)| {
@@ -347,7 +347,7 @@ where
347347

348348
let data: &[T::Native] = array.values();
349349

350-
match array.data().null_buffer() {
350+
match array.data().nulls() {
351351
None => {
352352
let sum = data
353353
.iter()
@@ -357,14 +357,14 @@ where
357357

358358
Ok(Some(sum))
359359
}
360-
Some(buffer) => {
360+
Some(nulls) => {
361361
let mut sum = T::default_value();
362362

363363
try_for_each_valid_idx(
364-
array.len(),
365-
array.offset(),
366-
null_count,
367-
Some(buffer.as_slice()),
364+
nulls.len(),
365+
nulls.offset(),
366+
nulls.null_count(),
367+
Some(nulls.validity()),
368368
|idx| {
369369
unsafe { sum = sum.add_checked(array.value_unchecked(idx))? };
370370
Ok::<_, ArrowError>(())
@@ -665,7 +665,7 @@ mod simd {
665665
let mut chunk_acc = A::init_accumulator_chunk();
666666
let mut rem_acc = A::init_accumulator_scalar();
667667

668-
match array.data().null_buffer() {
668+
match array.data().nulls() {
669669
None => {
670670
let data_chunks = data.chunks_exact(64);
671671
let remainder = data_chunks.remainder();
@@ -681,12 +681,12 @@ mod simd {
681681
A::accumulate_scalar(&mut rem_acc, *value);
682682
});
683683
}
684-
Some(buffer) => {
684+
Some(nulls) => {
685685
// process data in chunks of 64 elements since we also get 64 bits of validity information at a time
686686
let data_chunks = data.chunks_exact(64);
687687
let remainder = data_chunks.remainder();
688688

689-
let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
689+
let bit_chunks = nulls.inner().bit_chunks();
690690
let remainder_bits = bit_chunks.remainder_bits();
691691

692692
data_chunks.zip(bit_chunks).for_each(|(chunk, mut mask)| {

arrow-arith/src/arithmetic.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,7 @@ mod tests {
15721572
use arrow_array::builder::{
15731573
BooleanBufferBuilder, BufferBuilder, PrimitiveDictionaryBuilder,
15741574
};
1575+
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
15751576
use arrow_buffer::i256;
15761577
use arrow_data::ArrayDataBuilder;
15771578
use chrono::NaiveDate;
@@ -3057,15 +3058,19 @@ mod tests {
30573058
// `count_set_bits_offset` takes len in bits as parameter.
30583059
assert_eq!(null_buffer.count_set_bits_offset(0, 13), 0);
30593060

3061+
let nulls = BooleanBuffer::new(null_buffer, 0, 13);
3062+
assert_eq!(nulls.count_set_bits(), 0);
3063+
let nulls = NullBuffer::new(nulls);
3064+
assert_eq!(nulls.null_count(), 13);
3065+
30603066
let mut data_buffer_builder = BufferBuilder::<i32>::new(13);
30613067
data_buffer_builder.append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
30623068
let data_buffer = data_buffer_builder.finish();
30633069

30643070
let arg1: Int32Array = ArrayDataBuilder::new(DataType::Int32)
30653071
.len(13)
3066-
.null_count(13)
3072+
.nulls(Some(nulls))
30673073
.buffers(vec![data_buffer])
3068-
.null_bit_buffer(Some(null_buffer))
30693074
.build()
30703075
.unwrap()
30713076
.into();
@@ -3078,9 +3083,7 @@ mod tests {
30783083

30793084
let arg2: Int32Array = ArrayDataBuilder::new(DataType::Int32)
30803085
.len(13)
3081-
.null_count(0)
30823086
.buffers(vec![data_buffer])
3083-
.null_bit_buffer(None)
30843087
.build()
30853088
.unwrap()
30863089
.into();

arrow-arith/src/arity.rs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use arrow_array::builder::BufferBuilder;
2121
use arrow_array::iterator::ArrayIter;
2222
use arrow_array::*;
23+
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
2324
use arrow_buffer::{Buffer, MutableBuffer};
2425
use arrow_data::bit_iterator::try_for_each_valid_idx;
2526
use arrow_data::bit_mask::combine_option_bitmap;
@@ -276,10 +277,7 @@ where
276277
let len = a.len();
277278

278279
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
279-
let null_count = null_buffer
280-
.as_ref()
281-
.map(|x| len - x.count_set_bits_offset(0, len))
282-
.unwrap_or_default();
280+
let nulls = null_buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len)));
283281

284282
let mut builder = a.into_builder()?;
285283

@@ -289,13 +287,7 @@ where
289287
.zip(b.values())
290288
.for_each(|(l, r)| *l = op(*l, *r));
291289

292-
let array_builder = builder
293-
.finish()
294-
.data()
295-
.clone()
296-
.into_builder()
297-
.null_bit_buffer(null_buffer)
298-
.null_count(null_count);
290+
let array_builder = builder.finish().into_data().into_builder().nulls(nulls);
299291

300292
let array_data = unsafe { array_builder.build_unchecked() };
301293
Ok(Ok(PrimitiveArray::<T>::from(array_data)))

0 commit comments

Comments
 (0)