Skip to content

Commit

Permalink
Refactor string column handling to read contents directly to StringAr…
Browse files Browse the repository at this point in the history
…rays
  • Loading branch information
Jefffrey committed Mar 5, 2024
1 parent dd536d6 commit 93b37e6
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 162 deletions.
1 change: 1 addition & 0 deletions src/arrow_reader/column/present.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub fn new_present_iter(
column: &Column,
stripe: &Stripe,
) -> Result<Box<dyn Iterator<Item = Result<bool>>>> {
// TODO: return None if no present stream
let rows = column.number_of_rows as usize;
let iter = stripe
.stream_map
Expand Down
29 changes: 22 additions & 7 deletions src/arrow_reader/decoder/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::sync::Arc;

use arrow::array::{Array, ArrayRef, BinaryBuilder, BooleanBuilder, PrimitiveBuilder};
use arrow::datatypes::ArrowPrimitiveType;
use arrow::array::{
Array, ArrayRef, BinaryBuilder, BooleanBuilder, PrimitiveArray, PrimitiveBuilder,
};
use arrow::datatypes::{ArrowPrimitiveType, UInt64Type};
use arrow::datatypes::{
Date32Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, SchemaRef,
TimestampNanosecondType,
Expand Down Expand Up @@ -40,14 +42,12 @@ impl<T: ArrowPrimitiveType> PrimitiveArrayDecoder<T> {
pub fn new(inner: NullableIterator<T::Native>) -> Self {
Self { inner }
}
}

impl<T: ArrowPrimitiveType> ArrayBatchDecoder for PrimitiveArrayDecoder<T> {
fn next_batch(
fn next_primitive_batch(
&mut self,
batch_size: usize,
parent_present: Option<&[bool]>,
) -> Result<Option<ArrayRef>> {
) -> Result<Option<PrimitiveArray<T>>> {
let mut builder = PrimitiveBuilder::<T>::with_capacity(batch_size);

let mut iter = self.inner.by_ref().take(batch_size);
Expand Down Expand Up @@ -77,7 +77,7 @@ impl<T: ArrowPrimitiveType> ArrayBatchDecoder for PrimitiveArrayDecoder<T> {
}
};

let array = Arc::new(builder.finish());
let array = builder.finish();
if array.is_empty() {
Ok(None)
} else {
Expand All @@ -86,6 +86,19 @@ impl<T: ArrowPrimitiveType> ArrayBatchDecoder for PrimitiveArrayDecoder<T> {
}
}

impl<T: ArrowPrimitiveType> ArrayBatchDecoder for PrimitiveArrayDecoder<T> {
fn next_batch(
&mut self,
batch_size: usize,
parent_present: Option<&[bool]>,
) -> Result<Option<ArrayRef>> {
let array = self.next_primitive_batch(batch_size, parent_present)?;
let array = array.map(|a| Arc::new(a) as ArrayRef);
Ok(array)
}
}

type UInt64ArrayDecoder = PrimitiveArrayDecoder<UInt64Type>;
type Int64ArrayDecoder = PrimitiveArrayDecoder<Int64Type>;
type Int32ArrayDecoder = PrimitiveArrayDecoder<Int32Type>;
type Int16ArrayDecoder = PrimitiveArrayDecoder<Int16Type>;
Expand Down Expand Up @@ -256,6 +269,8 @@ pub trait ArrayBatchDecoder: Send {
/// then the child doesn't have a value (similar to other nullability). So we need
/// to take care to insert these null values as Arrow requires the child to hold
/// data in the null slot of the child.
// TODO: reconsider returning Option - array already encodes emptiness, this causes
// more boilerplate?
fn next_batch(
&mut self,
batch_size: usize,
Expand Down
Loading

0 comments on commit 93b37e6

Please sign in to comment.