Skip to content

Commit 4491b17

Browse files
authored
refactor!: do not default the struct array length to 0 in Struct::try_new (#7247)
* Do not default the struct array length to 0 in Struct::try_new if there are no child arrays. * Extend testing for new_empty_fields * Add try_new_with_length
1 parent 880be2f commit 4491b17

File tree

1 file changed

+85
-5
lines changed

1 file changed

+85
-5
lines changed

arrow-array/src/array/struct_array.rs

Lines changed: 85 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,28 @@ impl StructArray {
9191
Self::try_new(fields, arrays, nulls).unwrap()
9292
}
9393

94+
/// Create a new [`StructArray`] from the provided parts, returning an error on failure
95+
///
96+
/// The length will be inferred from the length of the child arrays. Returns an error if
97+
/// there are no child arrays. Consider using [`Self::try_new_with_length`] if the length
98+
/// is known to avoid this.
99+
///
100+
/// # Errors
101+
///
102+
/// Errors if
103+
///
104+
/// * `fields.len() == 0`
105+
/// * Any reason that [`Self::try_new_with_length`] would error
106+
pub fn try_new(
107+
fields: Fields,
108+
arrays: Vec<ArrayRef>,
109+
nulls: Option<NullBuffer>,
110+
) -> Result<Self, ArrowError> {
111+
let len = arrays.first().map(|x| x.len()).ok_or_else(||ArrowError::InvalidArgumentError("use StructArray::try_new_with_length or StructArray::new_empty to create a struct array with no fields so that the length can be set correctly".to_string()))?;
112+
113+
Self::try_new_with_length(fields, arrays, nulls, len)
114+
}
115+
94116
/// Create a new [`StructArray`] from the provided parts, returning an error on failure
95117
///
96118
/// # Errors
@@ -102,10 +124,11 @@ impl StructArray {
102124
/// * `arrays[i].len() != arrays[j].len()`
103125
/// * `arrays[i].len() != nulls.len()`
104126
/// * `!fields[i].is_nullable() && !nulls.contains(arrays[i].nulls())`
105-
pub fn try_new(
127+
pub fn try_new_with_length(
106128
fields: Fields,
107129
arrays: Vec<ArrayRef>,
108130
nulls: Option<NullBuffer>,
131+
len: usize,
109132
) -> Result<Self, ArrowError> {
110133
if fields.len() != arrays.len() {
111134
return Err(ArrowError::InvalidArgumentError(format!(
@@ -114,7 +137,6 @@ impl StructArray {
114137
arrays.len()
115138
)));
116139
}
117-
let len = arrays.first().map(|x| x.len()).unwrap_or_default();
118140

119141
if let Some(n) = nulls.as_ref() {
120142
if n.len() != len {
@@ -181,6 +203,10 @@ impl StructArray {
181203

182204
/// Create a new [`StructArray`] from the provided parts without validation
183205
///
206+
/// The length will be inferred from the length of the child arrays. Panics if there are no
207+
/// child arrays. Consider using [`Self::new_unchecked_with_length`] if the length is known
208+
/// to avoid this.
209+
///
184210
/// # Safety
185211
///
186212
/// Safe if [`Self::new`] would not panic with the given arguments
@@ -193,7 +219,32 @@ impl StructArray {
193219
return Self::new(fields, arrays, nulls);
194220
}
195221

196-
let len = arrays.first().map(|x| x.len()).unwrap_or_default();
222+
let len = arrays.first().map(|x| x.len()).expect(
223+
"cannot use StructArray::new_unchecked if there are no fields, length is unknown",
224+
);
225+
Self {
226+
len,
227+
data_type: DataType::Struct(fields),
228+
nulls,
229+
fields: arrays,
230+
}
231+
}
232+
233+
/// Create a new [`StructArray`] from the provided parts without validation
234+
///
235+
/// # Safety
236+
///
237+
/// Safe if [`Self::new`] would not panic with the given arguments
238+
pub unsafe fn new_unchecked_with_length(
239+
fields: Fields,
240+
arrays: Vec<ArrayRef>,
241+
nulls: Option<NullBuffer>,
242+
len: usize,
243+
) -> Self {
244+
if cfg!(feature = "force_validate") {
245+
return Self::try_new_with_length(fields, arrays, nulls, len).unwrap();
246+
}
247+
197248
Self {
198249
len,
199250
data_type: DataType::Struct(fields),
@@ -817,9 +868,38 @@ mod tests {
817868
}
818869

819870
#[test]
871+
#[should_panic(expected = "use StructArray::try_new_with_length")]
820872
fn test_struct_array_from_empty() {
821-
let sa = StructArray::from(vec![]);
822-
assert!(sa.is_empty())
873+
// This can't work because we don't know how many rows the array should have. Previously we inferred 0 but
874+
// that often led to bugs.
875+
let _ = StructArray::from(vec![]);
876+
}
877+
878+
#[test]
879+
fn test_empty_struct_array() {
880+
assert!(StructArray::try_new(Fields::empty(), vec![], None).is_err());
881+
882+
let arr = StructArray::new_empty_fields(10, None);
883+
assert_eq!(arr.len(), 10);
884+
assert_eq!(arr.null_count(), 0);
885+
assert_eq!(arr.num_columns(), 0);
886+
887+
let arr2 = StructArray::try_new_with_length(Fields::empty(), vec![], None, 10).unwrap();
888+
assert_eq!(arr2.len(), 10);
889+
890+
let arr = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
891+
assert_eq!(arr.len(), 10);
892+
assert_eq!(arr.null_count(), 10);
893+
assert_eq!(arr.num_columns(), 0);
894+
895+
let arr2 = StructArray::try_new_with_length(
896+
Fields::empty(),
897+
vec![],
898+
Some(NullBuffer::new_null(10)),
899+
10,
900+
)
901+
.unwrap();
902+
assert_eq!(arr2.len(), 10);
823903
}
824904

825905
#[test]

0 commit comments

Comments
 (0)