Skip to content

Add Array::with_nulls #6659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::print_long_array;
use crate::array::{print_long_array, replace_nulls};
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
Expand Down Expand Up @@ -316,6 +316,10 @@ impl Array for BooleanArray {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_null_count(&self) -> usize {
self.null_count()
}
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{get_offsets, print_long_array};
use crate::array::{get_offsets, print_long_array, replace_nulls};
use crate::builder::GenericByteBuilder;
use crate::iterator::ArrayIter;
use crate::types::bytes::ByteArrayNativeType;
Expand Down Expand Up @@ -461,6 +461,10 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.value_offsets.inner().inner().capacity();
sum += self.value_data.capacity();
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::print_long_array;
use crate::array::{print_long_array, replace_nulls};
use crate::builder::{ArrayBuilder, GenericByteViewBuilder};
use crate::iterator::ArrayIter;
use crate::types::bytes::ByteArrayNativeType;
Expand Down Expand Up @@ -549,6 +549,10 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.buffers.iter().map(|b| b.capacity()).sum::<usize>();
sum += self.views.inner().capacity();
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::replace_nulls;
use crate::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder};
use crate::cast::AsArray;
use crate::iterator::ArrayIter;
Expand Down Expand Up @@ -728,6 +729,10 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
self.keys.nulls()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
match self.values.nulls() {
None => self.nulls().cloned(),
Expand Down Expand Up @@ -862,6 +867,10 @@ impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V
self.dictionary.nulls()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
self.dictionary.logical_nulls()
}
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::print_long_array;
use crate::array::{print_long_array, replace_nulls};
use crate::iterator::FixedSizeBinaryIter;
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar};
use arrow_buffer::buffer::NullBuffer;
Expand Down Expand Up @@ -610,6 +610,10 @@ impl Array for FixedSizeBinaryArray {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.value_data.capacity();
if let Some(n) = &self.nulls {
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::print_long_array;
use crate::array::{print_long_array, replace_nulls};
use crate::builder::{FixedSizeListBuilder, PrimitiveBuilder};
use crate::iterator::FixedSizeListIter;
use crate::{make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType};
Expand Down Expand Up @@ -409,6 +409,10 @@ impl Array for FixedSizeListArray {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
if let Some(n) = self.nulls.as_ref() {
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{get_offsets, make_array, print_long_array};
use crate::array::{get_offsets, make_array, print_long_array, replace_nulls};
use crate::builder::{GenericListBuilder, PrimitiveBuilder};
use crate::{
iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
Expand Down Expand Up @@ -493,6 +493,10 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
size += self.value_offsets.inner().inner().capacity();
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/list_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::any::Any;
use std::ops::Add;
use std::sync::Arc;

use crate::array::{make_array, print_long_array};
use crate::array::{make_array, print_long_array, replace_nulls};
use crate::iterator::GenericListViewArrayIter;
use crate::{new_empty_array, Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait};

Expand Down Expand Up @@ -334,6 +334,10 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
size += self.value_offsets.inner().capacity();
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{get_offsets, print_long_array};
use crate::array::{get_offsets, print_long_array, replace_nulls};
use crate::iterator::MapArrayIter;
use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
Expand Down Expand Up @@ -380,6 +380,10 @@ impl Array for MapArray {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.entries.get_buffer_memory_size();
size += self.value_offsets.inner().inner().capacity();
Expand Down
56 changes: 56 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {
fn as_any(&self) -> &dyn Any;

/// Returns the underlying data of this array
///
/// See [`Self::into_data`] for a version that consumes self
fn to_data(&self) -> ArrayData;

/// Returns the underlying data of this array
Expand Down Expand Up @@ -196,6 +198,28 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// use the slower [`Array::logical_nulls`] to obtain a computed mask.
fn nulls(&self) -> Option<&NullBuffer>;

/// Replaces the nulls of this array.
///
/// # Panics
/// Panics if the length of the null buffer is not equal to the length of the array.
///
/// # Example:
/// ```
/// # use arrow_array::{Array, Int32Array};
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the new API and an example. I am not sure this is a good idea due to @tustvold 's comment here: #6528 (comment) but figured I would make a PR for discussion (and only now do I really understand what he is talking about)

/// # use arrow_array::cast::AsArray;
/// # use arrow_buffer::NullBuffer;
/// // Create an array with values [1, null, 3, 4, 5]
/// let array = Int32Array::from(vec![Some(1), None, Some(3), Some(4), Some(5)]);
/// // Set the first, third, and fifth elements to null, others to valid
/// let nulls = Some(NullBuffer::from(vec![false, true, false, true, false]));
/// let array_with_nulls = array.with_nulls(nulls);
/// assert_eq!(
/// array_with_nulls.as_primitive(),
/// &Int32Array::from(vec![None, Some(0), None, Some(4), None]),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the fact that this is now Some(0) (whatever value was created for the null element in the original array) could have potential issues 🤔

/// );
/// ```
fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef;

/// Returns a potentially computed [`NullBuffer`] that represents the logical
/// null values of this array, if any.
///
Expand Down Expand Up @@ -372,6 +396,10 @@ impl Array for ArrayRef {
self.as_ref().nulls()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
self.as_ref().logical_nulls()
}
Expand Down Expand Up @@ -442,6 +470,10 @@ impl<T: Array> Array for &T {
T::nulls(self)
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
T::logical_nulls(self)
}
Expand Down Expand Up @@ -843,6 +875,30 @@ where
Ok(())
}

/// Helper function to replace the nulls in an array with a new null buffer
///
/// See [`Array::with_nulls`] for more information
pub(crate) fn replace_nulls(array_data: ArrayData, nulls: Option<NullBuffer>) -> ArrayRef {
let Some(nulls) = nulls else {
return make_array(array_data);
};

if nulls.len() != array_data.len() {
panic!(
"Null buffer length must be equal to the array length. \
Expected: {}, got: {}",
array_data.len(),
nulls.len()
);
}

let data = array_data.into_builder().nulls(Some(nulls));

// SAFETY:
// Checked that the null buffer has the same length as the array
make_array(unsafe { data.build_unchecked() })
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

//! Contains the `NullArray` type.

use crate::array::replace_nulls;
use crate::builder::NullBuilder;
use crate::{Array, ArrayRef};
use arrow_buffer::buffer::NullBuffer;
Expand Down Expand Up @@ -113,6 +114,10 @@ impl Array for NullArray {
None
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
(self.len != 0).then(|| NullBuffer::new_null(self.len))
}
Expand Down
6 changes: 5 additions & 1 deletion arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::print_long_array;
use crate::array::{print_long_array, replace_nulls};
use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
use crate::iterator::PrimitiveIter;
use crate::temporal_conversions::{
Expand Down Expand Up @@ -1160,6 +1160,10 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_null_count(&self) -> usize {
self.null_count()
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuff
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Field};

use crate::array::replace_nulls;
use crate::{
builder::StringRunBuilder,
make_array,
Expand Down Expand Up @@ -338,6 +339,10 @@ impl<T: RunEndIndexType> Array for RunArray<T> {
None
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
let len = self.len();
let nulls = self.values.logical_nulls()?;
Expand Down Expand Up @@ -592,6 +597,10 @@ impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
self.run_array.nulls()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
self.run_array.logical_nulls()
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::replace_nulls;
use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
Expand Down Expand Up @@ -377,6 +378,10 @@ impl Array for StructArray {
self.nulls.as_ref()
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
if let Some(n) = self.nulls.as_ref() {
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.
#![allow(clippy::enum_clike_unportable_variant)]

use crate::array::replace_nulls;
use crate::{make_array, Array, ArrayRef};
use arrow_buffer::bit_chunk_iterator::{BitChunkIterator, BitChunks};
use arrow_buffer::buffer::NullBuffer;
Expand Down Expand Up @@ -752,6 +753,10 @@ impl Array for UnionArray {
None
}

fn with_nulls(self, nulls: Option<NullBuffer>) -> ArrayRef {
replace_nulls(self.to_data(), nulls)
}

fn logical_nulls(&self) -> Option<NullBuffer> {
let fields = match self.data_type() {
DataType::Union(fields, _) => fields,
Expand Down
Loading
Loading