diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index 6cff5bfdc9f6..fb2868c2778f 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -481,18 +481,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator> for Dictionary let it = iter.into_iter(); let (lower, _) = it.size_hint(); let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024); - it.for_each(|i| { - if let Some(i) = i { - // Note: impl ... for Result> fails with - // error[E0117]: only traits defined in the current crate can be implemented for arbitrary types - builder - .append(i) - .expect("Unable to append a value to a dictionary array."); - } else { - builder.append_null(); - } - }); - + builder.extend(it); builder.finish() } } diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs index 96f436253c5a..06709e5f375d 100644 --- a/arrow-array/src/builder/boolean_builder.rs +++ b/arrow-array/src/builder/boolean_builder.rs @@ -211,6 +211,15 @@ impl ArrayBuilder for BooleanBuilder { } } +impl Extend> for BooleanBuilder { + #[inline] + fn extend>>(&mut self, iter: T) { + for v in iter { + self.append_option(v) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -304,4 +313,17 @@ mod tests { assert_eq!(0, array.null_count()); assert!(array.data().null_buffer().is_none()); } + + #[test] + fn test_extend() { + let mut builder = BooleanBuilder::new(); + builder.extend([false, false, true, false, false].into_iter().map(Some)); + builder.extend([true, true, false].into_iter().map(Some)); + let array = builder.finish(); + let values = array.iter().map(|x| x.unwrap()).collect::>(); + assert_eq!( + &values, + &[false, false, true, false, false, true, true, false] + ) + } } diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs index 195628f4712f..032e6af48460 100644 --- a/arrow-array/src/builder/generic_bytes_builder.rs +++ b/arrow-array/src/builder/generic_bytes_builder.rs @@ -82,6 +82,10 @@ impl GenericByteBuilder { } /// Appends a value into the builder. + /// + /// # Panics + /// + /// Panics if the resulting length of [`Self::values_slice`] would exceed `T::Offset::MAX` #[inline] pub fn append_value(&mut self, value: impl AsRef) { self.value_builder.append_slice(value.as_ref().as_ref()); @@ -216,6 +220,15 @@ impl ArrayBuilder for GenericByteBuilder { } } +impl> Extend> for GenericByteBuilder { + #[inline] + fn extend>>(&mut self, iter: I) { + for v in iter { + self.append_option(v) + } + } +} + /// Array builder for [`GenericStringArray`][crate::GenericStringArray] pub type GenericStringBuilder = GenericByteBuilder>; @@ -417,4 +430,14 @@ mod tests { fn test_large_string_array_builder_finish_cloned() { _test_generic_string_array_builder_finish_cloned::() } + + #[test] + fn test_extend() { + let mut builder = GenericStringBuilder::::new(); + builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some)); + builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some)); + let array = builder.finish(); + assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]); + assert_eq!(array.value_data(), b"abcabcdcupcakeshello"); + } } diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs index 4a920f3ee43e..449100da1e0e 100644 --- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs +++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs @@ -214,7 +214,7 @@ where K: ArrowDictionaryKeyType, T: ByteArrayType, { - /// Append a primitive value to the array. Return an existing index + /// Append a value to the array. Return an existing index /// if already present in the values array or a new index if the /// value is appended to the values array. /// @@ -255,12 +255,34 @@ where Ok(key) } + /// Infallibly append a value to this builder + /// + /// # Panics + /// + /// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX` + pub fn append_value(&mut self, value: impl AsRef) { + self.append(value).expect("dictionary key overflow"); + } + /// Appends a null slot into the builder #[inline] pub fn append_null(&mut self) { self.keys_builder.append_null() } + /// Append an `Option` value into the builder + /// + /// # Panics + /// + /// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX` + #[inline] + pub fn append_option(&mut self, value: Option>) { + match value { + None => self.append_null(), + Some(v) => self.append_value(v), + }; + } + /// Builds the `DictionaryArray` and reset this builder. pub fn finish(&mut self) -> DictionaryArray { self.dedup.clear(); @@ -297,6 +319,17 @@ where } } +impl> Extend> + for GenericByteDictionaryBuilder +{ + #[inline] + fn extend>>(&mut self, iter: I) { + for v in iter { + self.append_option(v) + } + } +} + fn get_bytes<'a, K: ArrowNativeType, T: ByteArrayType>( values: &'a GenericByteBuilder, key: &K, @@ -405,7 +438,7 @@ mod tests { use crate::array::Array; use crate::array::Int8Array; - use crate::types::{Int16Type, Int8Type}; + use crate::types::{Int16Type, Int32Type, Int8Type, Utf8Type}; use crate::{BinaryArray, StringArray}; fn test_bytes_dictionary_builder(values: Vec<&T::Native>) @@ -622,4 +655,14 @@ mod tests { vec![b"abc", b"def"], ); } + + #[test] + fn test_extend() { + let mut builder = GenericByteDictionaryBuilder::::new(); + builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some)); + builder.extend(["c", "d", "a"].into_iter().map(Some)); + let dict = builder.finish(); + assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]); + assert_eq!(dict.values().len(), 4); + } } diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index 8f3f881c4b32..6228475542bd 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -111,6 +111,10 @@ where } /// Finish the current variable-length list array slot + /// + /// # Panics + /// + /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX` #[inline] pub fn append(&mut self, is_valid: bool) { self.offsets_builder @@ -178,10 +182,32 @@ where } } +impl Extend> for GenericListBuilder +where + O: OffsetSizeTrait, + B: ArrayBuilder + Extend, + V: IntoIterator, +{ + #[inline] + fn extend>>(&mut self, iter: T) { + for v in iter { + match v { + Some(elements) => { + self.values_builder.extend(elements); + self.append(true); + } + None => self.append(false), + } + } + } +} + #[cfg(test)] mod tests { use super::*; use crate::builder::{Int32Builder, ListBuilder}; + use crate::cast::as_primitive_array; + use crate::types::Int32Type; use crate::{Array, Int32Array}; use arrow_buffer::Buffer; use arrow_schema::DataType; @@ -364,4 +390,25 @@ mod tests { list_array.values().data().child_data()[0].buffers()[0].clone() ); } + + #[test] + fn test_extend() { + let mut builder = ListBuilder::new(Int32Builder::new()); + builder.extend([ + Some(vec![Some(1), Some(2), Some(7), None]), + Some(vec![]), + Some(vec![Some(4), Some(5)]), + None, + ]); + + let array = builder.finish(); + assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]); + assert_eq!(array.null_count(), 1); + assert!(array.is_null(3)); + let a_values = array.values(); + let elements = as_primitive_array::(a_values.as_ref()); + assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]); + assert_eq!(elements.null_count(), 1); + assert!(elements.is_null(3)); + } } diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs index a969e121808b..2d88ea50f257 100644 --- a/arrow-array/src/builder/primitive_builder.rs +++ b/arrow-array/src/builder/primitive_builder.rs @@ -238,6 +238,10 @@ impl PrimitiveBuilder { } /// Appends values from a slice of type `T` and a validity boolean slice + /// + /// # Panics + /// + /// Panics if `values` and `is_valid` have different lengths #[inline] pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) { assert_eq!( @@ -328,6 +332,15 @@ impl PrimitiveBuilder { } } +impl Extend> for PrimitiveBuilder

{ + #[inline] + fn extend>>(&mut self, iter: T) { + for v in iter { + self.append_option(v) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -578,4 +591,13 @@ mod tests { fn test_invalid_with_data_type() { Int32Builder::new().with_data_type(DataType::Int64); } + + #[test] + fn test_extend() { + let mut builder = PrimitiveBuilder::::new(); + builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some)); + builder.extend([2, 4, 6, 2].into_iter().map(Some)); + let array = builder.finish(); + assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]); + } } diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs index 4640902d870f..f44f0e30602e 100644 --- a/arrow-array/src/builder/primitive_dictionary_builder.rs +++ b/arrow-array/src/builder/primitive_dictionary_builder.rs @@ -193,12 +193,34 @@ where Ok(key) } + /// Infallibly append a value to this builder + /// + /// # Panics + /// + /// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX` + pub fn append_value(&mut self, value: V::Native) { + self.append(value).expect("dictionary key overflow"); + } + /// Appends a null slot into the builder #[inline] pub fn append_null(&mut self) { self.keys_builder.append_null() } + /// Append an `Option` value into the builder + /// + /// # Panics + /// + /// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX` + #[inline] + pub fn append_option(&mut self, value: Option) { + match value { + None => self.append_null(), + Some(v) => self.append_value(v), + }; + } + /// Builds the `DictionaryArray` and reset this builder. pub fn finish(&mut self) -> DictionaryArray { self.map.clear(); @@ -235,6 +257,17 @@ where } } +impl Extend> + for PrimitiveDictionaryBuilder +{ + #[inline] + fn extend>>(&mut self, iter: T) { + for v in iter { + self.append_option(v) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -242,7 +275,7 @@ mod tests { use crate::array::Array; use crate::array::UInt32Array; use crate::array::UInt8Array; - use crate::types::{UInt32Type, UInt8Type}; + use crate::types::{Int32Type, UInt32Type, UInt8Type}; #[test] fn test_primitive_dictionary_builder() { @@ -270,6 +303,19 @@ mod tests { assert_eq!(avs, &[12345678, 22345678]); } + #[test] + fn test_extend() { + let mut builder = PrimitiveDictionaryBuilder::::new(); + builder.extend([1, 2, 3, 1, 2, 3, 1, 2, 3].into_iter().map(Some)); + builder.extend([4, 5, 1, 3, 1].into_iter().map(Some)); + let dict = builder.finish(); + assert_eq!( + dict.keys().values(), + &[0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 4, 0, 2, 0] + ); + assert_eq!(dict.values().len(), 5); + } + #[test] #[should_panic(expected = "DictionaryKeyOverflowError")] fn test_primitive_dictionary_overflow() {