Skip to content

Commit

Permalink
Refactor BooleanArrayDecoder to use PrimitiveValueDecoder
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey committed Sep 25, 2024
1 parent 6c58290 commit 8de74e7
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 35 deletions.
42 changes: 11 additions & 31 deletions src/array_decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::sync::Arc;

use arrow::array::{ArrayRef, BooleanArray, BooleanBuilder, PrimitiveArray};
use arrow::array::{ArrayRef, BooleanArray, PrimitiveArray};
use arrow::buffer::NullBuffer;
use arrow::datatypes::ArrowNativeTypeOp;
use arrow::datatypes::{ArrowPrimitiveType, Decimal128Type};
Expand Down Expand Up @@ -152,13 +152,13 @@ impl ArrayBatchDecoder for DecimalArrayDecoder {
}

struct BooleanArrayDecoder {
iter: Box<dyn Iterator<Item = Result<bool>> + Send>,
iter: Box<dyn PrimitiveValueDecoder<bool> + Send>,
present: Option<Box<dyn Iterator<Item = bool> + Send>>,
}

impl BooleanArrayDecoder {
pub fn new(
iter: Box<dyn Iterator<Item = Result<bool>> + Send>,
iter: Box<dyn PrimitiveValueDecoder<bool> + Send>,
present: Option<Box<dyn Iterator<Item = bool> + Send>>,
) -> Self {
Self { iter, present }
Expand All @@ -172,38 +172,18 @@ impl ArrayBatchDecoder for BooleanArrayDecoder {
parent_present: Option<&[bool]>,
) -> Result<ArrayRef> {
let present = derive_present_vec(&mut self.present, parent_present, batch_size);

match present {
let mut data = vec![false; batch_size];
let array = match present {
Some(present) => {
let mut builder = BooleanBuilder::with_capacity(batch_size);
for is_present in present {
if is_present {
// TODO: return as error instead
let val = self
.iter
.next()
.transpose()?
.expect("array less than expected length");
builder.append_value(val);
} else {
builder.append_null();
}
}
let array = builder.finish();
let array = Arc::new(array) as ArrayRef;
Ok(array)
self.iter.decode_spaced(data.as_mut_slice(), &present)?;
BooleanArray::new(data.into(), Some(present.into()))
}
None => {
let data = self
.iter
.by_ref()
.take(batch_size)
.collect::<Result<Vec<_>>>()?;
let array = BooleanArray::from(data);
let array = Arc::new(array) as ArrayRef;
Ok(array)
self.iter.decode(data.as_mut_slice())?;
BooleanArray::from(data)
}
}
};
Ok(Arc::new(array))
}
}

Expand Down
10 changes: 6 additions & 4 deletions src/encoding/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ use crate::{error::Result, memory::EstimateMemory};

use super::{
byte::{ByteRleDecoder, ByteRleEncoder},
PrimitiveValueEncoder,
PrimitiveValueDecoder, PrimitiveValueEncoder,
};

pub struct BooleanDecoder<R: Read> {
iter: ByteRleDecoder<R>,
decoder: ByteRleDecoder<R>,
data: u8,
bits_in_data: usize,
}

impl<R: Read> BooleanDecoder<R> {
pub fn new(reader: R) -> Self {
Self {
iter: ByteRleDecoder::new(reader),
decoder: ByteRleDecoder::new(reader),
bits_in_data: 0,
data: 0,
}
Expand All @@ -61,7 +61,7 @@ impl<R: Read> Iterator for BooleanDecoder<R> {
fn next(&mut self) -> Option<Self::Item> {
// read more data if necessary
if self.bits_in_data == 0 {
match self.iter.next() {
match self.decoder.next() {
Some(Ok(data)) => {
self.data = data as u8;
self.bits_in_data = 8;
Expand All @@ -76,6 +76,8 @@ impl<R: Read> Iterator for BooleanDecoder<R> {
}
}

impl<R: Read> PrimitiveValueDecoder<bool> for BooleanDecoder<R> {}

/// ORC encodes validity starting from MSB, whilst Arrow encodes it
/// from LSB. After bytes are filled with the present bits, they are
/// further encoded via Byte RLE.
Expand Down

0 comments on commit 8de74e7

Please sign in to comment.