Skip to content

Implement Extend for ArrayBuilder (#1841) #3563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,18 +481,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a str>> for Dictionary
let it = iter.into_iter();
let (lower, _) = it.size_hint();
let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
it.for_each(|i| {
if let Some(i) = i {
// Note: impl ... for Result<DictionaryArray<T>> fails with
// error[E0117]: only traits defined in the current crate can be implemented for arbitrary types
builder
.append(i)
.expect("Unable to append a value to a dictionary array.");
} else {
builder.append_null();
}
});

builder.extend(it);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is certainly nicer

builder.finish()
}
}
Expand Down
22 changes: 22 additions & 0 deletions arrow-array/src/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,15 @@ impl ArrayBuilder for BooleanBuilder {
}
}

impl Extend<Option<bool>> for BooleanBuilder {
#[inline]
fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
for v in iter {
self.append_option(v)
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -304,4 +313,17 @@ mod tests {
assert_eq!(0, array.null_count());
assert!(array.data().null_buffer().is_none());
}

#[test]
fn test_extend() {
let mut builder = BooleanBuilder::new();
builder.extend([false, false, true, false, false].into_iter().map(Some));
builder.extend([true, true, false].into_iter().map(Some));
let array = builder.finish();
let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
assert_eq!(
&values,
&[false, false, true, false, false, true, true, false]
)
}
}
23 changes: 23 additions & 0 deletions arrow-array/src/builder/generic_bytes_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
}

/// Appends a value into the builder.
///
/// # Panics
///
/// Panics if the resulting length of [`Self::values_slice`] would exceed `T::Offset::MAX`
#[inline]
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
self.value_builder.append_slice(value.as_ref().as_ref());
Expand Down Expand Up @@ -216,6 +220,15 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
}
}

impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
#[inline]
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
for v in iter {
self.append_option(v)
}
}
}

/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;

Expand Down Expand Up @@ -417,4 +430,14 @@ mod tests {
fn test_large_string_array_builder_finish_cloned() {
_test_generic_string_array_builder_finish_cloned::<i64>()
}

#[test]
fn test_extend() {
let mut builder = GenericStringBuilder::<i32>::new();
builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
let array = builder.finish();
assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
}
}
47 changes: 45 additions & 2 deletions arrow-array/src/builder/generic_bytes_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ where
K: ArrowDictionaryKeyType,
T: ByteArrayType,
{
/// Append a primitive value to the array. Return an existing index
/// Append a value to the array. Return an existing index
/// if already present in the values array or a new index if the
/// value is appended to the values array.
///
Expand Down Expand Up @@ -255,12 +255,34 @@ where
Ok(key)
}

/// Infallibly append a value to this builder
///
/// # Panics
///
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the new APIs for #3562

self.append(value).expect("dictionary key overflow");
}

/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.keys_builder.append_null()
}

/// Append an `Option` value into the builder
///
/// # Panics
///
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
#[inline]
pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
match value {
None => self.append_null(),
Some(v) => self.append_value(v),
};
}

/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.dedup.clear();
Expand Down Expand Up @@ -297,6 +319,17 @@ where
}
}

impl<K: ArrowDictionaryKeyType, T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>>
for GenericByteDictionaryBuilder<K, T>
{
#[inline]
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
for v in iter {
self.append_option(v)
}
}
}

fn get_bytes<'a, K: ArrowNativeType, T: ByteArrayType>(
values: &'a GenericByteBuilder<T>,
key: &K,
Expand Down Expand Up @@ -405,7 +438,7 @@ mod tests {

use crate::array::Array;
use crate::array::Int8Array;
use crate::types::{Int16Type, Int8Type};
use crate::types::{Int16Type, Int32Type, Int8Type, Utf8Type};
use crate::{BinaryArray, StringArray};

fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
Expand Down Expand Up @@ -622,4 +655,14 @@ mod tests {
vec![b"abc", b"def"],
);
}

#[test]
fn test_extend() {
let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
builder.extend(["c", "d", "a"].into_iter().map(Some));
let dict = builder.finish();
assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
assert_eq!(dict.values().len(), 4);
}
}
47 changes: 47 additions & 0 deletions arrow-array/src/builder/generic_list_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ where
}

/// Finish the current variable-length list array slot
///
/// # Panics
///
/// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX`
#[inline]
pub fn append(&mut self, is_valid: bool) {
self.offsets_builder
Expand Down Expand Up @@ -178,10 +182,32 @@ where
}
}

impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
where
O: OffsetSizeTrait,
B: ArrayBuilder + Extend<E>,
V: IntoIterator<Item = E>,
{
#[inline]
fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
for v in iter {
match v {
Some(elements) => {
self.values_builder.extend(elements);
self.append(true);
}
None => self.append(false),
}
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::builder::{Int32Builder, ListBuilder};
use crate::cast::as_primitive_array;
use crate::types::Int32Type;
use crate::{Array, Int32Array};
use arrow_buffer::Buffer;
use arrow_schema::DataType;
Expand Down Expand Up @@ -364,4 +390,25 @@ mod tests {
list_array.values().data().child_data()[0].buffers()[0].clone()
);
}

#[test]
fn test_extend() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 thank you for this test

let mut builder = ListBuilder::new(Int32Builder::new());
builder.extend([
Some(vec![Some(1), Some(2), Some(7), None]),
Some(vec![]),
Some(vec![Some(4), Some(5)]),
None,
]);

let array = builder.finish();
assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
assert_eq!(array.null_count(), 1);
assert!(array.is_null(3));
let a_values = array.values();
let elements = as_primitive_array::<Int32Type>(a_values.as_ref());
assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
assert_eq!(elements.null_count(), 1);
assert!(elements.is_null(3));
}
}
22 changes: 22 additions & 0 deletions arrow-array/src/builder/primitive_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
}

/// Appends values from a slice of type `T` and a validity boolean slice
///
/// # Panics
///
/// Panics if `values` and `is_valid` have different lengths
#[inline]
pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
assert_eq!(
Expand Down Expand Up @@ -328,6 +332,15 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
}
}

impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
#[inline]
fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
for v in iter {
self.append_option(v)
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -578,4 +591,13 @@ mod tests {
fn test_invalid_with_data_type() {
Int32Builder::new().with_data_type(DataType::Int64);
}

#[test]
fn test_extend() {
let mut builder = PrimitiveBuilder::<Int16Type>::new();
builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
builder.extend([2, 4, 6, 2].into_iter().map(Some));
let array = builder.finish();
assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
}
}
48 changes: 47 additions & 1 deletion arrow-array/src/builder/primitive_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,34 @@ where
Ok(key)
}

/// Infallibly append a value to this builder
///
/// # Panics
///
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
pub fn append_value(&mut self, value: V::Native) {
self.append(value).expect("dictionary key overflow");
}

/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.keys_builder.append_null()
}

/// Append an `Option` value into the builder
///
/// # Panics
///
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
#[inline]
pub fn append_option(&mut self, value: Option<V::Native>) {
match value {
None => self.append_null(),
Some(v) => self.append_value(v),
};
}

/// Builds the `DictionaryArray` and reset this builder.
pub fn finish(&mut self) -> DictionaryArray<K> {
self.map.clear();
Expand Down Expand Up @@ -235,14 +257,25 @@ where
}
}

impl<K: ArrowPrimitiveType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
for PrimitiveDictionaryBuilder<K, P>
{
#[inline]
fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
for v in iter {
self.append_option(v)
}
}
}

#[cfg(test)]
mod tests {
use super::*;

use crate::array::Array;
use crate::array::UInt32Array;
use crate::array::UInt8Array;
use crate::types::{UInt32Type, UInt8Type};
use crate::types::{Int32Type, UInt32Type, UInt8Type};

#[test]
fn test_primitive_dictionary_builder() {
Expand Down Expand Up @@ -270,6 +303,19 @@ mod tests {
assert_eq!(avs, &[12345678, 22345678]);
}

#[test]
fn test_extend() {
let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
builder.extend([1, 2, 3, 1, 2, 3, 1, 2, 3].into_iter().map(Some));
builder.extend([4, 5, 1, 3, 1].into_iter().map(Some));
let dict = builder.finish();
assert_eq!(
dict.keys().values(),
&[0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 4, 0, 2, 0]
);
assert_eq!(dict.values().len(), 5);
}

#[test]
#[should_panic(expected = "DictionaryKeyOverflowError")]
fn test_primitive_dictionary_overflow() {
Expand Down