From 46d161270eacf4301b81abeee2333b3537f70e91 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 8 Mar 2025 11:35:43 +0200 Subject: [PATCH] feat: use `force_validate` feature flag when creating an arrays (#7241) --- arrow-array/src/array/byte_array.rs | 3 +++ arrow-array/src/array/byte_view_array.rs | 4 ++++ arrow-array/src/array/dictionary_array.rs | 4 ++++ arrow-array/src/array/struct_array.rs | 4 ++++ arrow-array/src/ffi.rs | 8 ++++++-- 5 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index f2b22507081d..b368f273cf46 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -164,6 +164,9 @@ impl GenericByteArray { values: Buffer, nulls: Option, ) -> Self { + if cfg!(feature = "force_validate") { + return Self::new(offsets, values, nulls); + } Self { data_type: T::DATA_TYPE, value_offsets: offsets, diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index 8c78dec8e1c5..e837512ed064 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -232,6 +232,10 @@ impl GenericByteViewArray { buffers: Vec, nulls: Option, ) -> Self { + if cfg!(feature = "force_validate") { + return Self::new(views, buffers, nulls); + } + Self { data_type: T::DATA_TYPE, phantom: Default::default(), diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index f852b57fb65e..38c4e019206b 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -327,6 +327,10 @@ impl DictionaryArray { /// /// Safe provided [`Self::try_new`] would not return an error pub unsafe fn new_unchecked(keys: PrimitiveArray, values: ArrayRef) -> Self { + if cfg!(feature = "force_validate") { + return Self::new(keys, values); + } + let data_type = DataType::Dictionary( Box::new(keys.data_type().clone()), Box::new(values.data_type().clone()), diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index de6d9c699d22..7ecee683350a 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -189,6 +189,10 @@ impl StructArray { arrays: Vec, nulls: Option, ) -> Self { + if cfg!(feature = "force_validate") { + return Self::new(fields, arrays, nulls); + } + let len = arrays.first().map(|x| x.len()).unwrap_or_default(); Self { len, diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 144f2a21afec..ac28289e652b 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -1298,12 +1298,12 @@ mod tests_to_then_from_ffi { mod tests_from_ffi { use std::sync::Arc; - use arrow_buffer::{bit_util, buffer::Buffer, MutableBuffer, OffsetBuffer}; + use arrow_buffer::{bit_util, buffer::Buffer}; use arrow_data::transform::MutableArrayData; use arrow_data::ArrayData; use arrow_schema::{DataType, Field}; - use super::{ImportedArrowArray, Result}; + use super::Result; use crate::builder::GenericByteViewBuilder; use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType}; use crate::{ @@ -1507,7 +1507,11 @@ mod tests_from_ffi { } #[test] + #[cfg(not(feature = "force_validate"))] fn test_empty_string_with_non_zero_offset() -> Result<()> { + use super::ImportedArrowArray; + use arrow_buffer::{MutableBuffer, OffsetBuffer}; + // Simulate an empty string array with a non-zero offset from a producer let data: Buffer = MutableBuffer::new(0).into(); let offsets = OffsetBuffer::new(vec![123].into());