Skip to content

Commit 755616f

Browse files
authored
Use FixedSizeListArray::new in FixedSizeListBuilder (#5612)
* Use FixedSizeListArray::new in FixedSizeListBuilder * Fix handling of entirely null zero-sized list array (#5614) * Fix
1 parent 12c0d00 commit 755616f

File tree

2 files changed

+52
-106
lines changed

2 files changed

+52
-106
lines changed

arrow-array/src/array/fixed_size_list_array.rs

+26-11
Original file line numberDiff line numberDiff line change
@@ -152,16 +152,22 @@ impl FixedSizeListArray {
152152
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
153153
})?;
154154

155-
let len = values.len() / s.max(1);
156-
if let Some(n) = nulls.as_ref() {
157-
if n.len() != len {
158-
return Err(ArrowError::InvalidArgumentError(format!(
159-
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
160-
len,
161-
n.len(),
162-
)));
155+
let len = match s {
156+
0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(),
157+
_ => {
158+
let len = values.len() / s.max(1);
159+
if let Some(n) = nulls.as_ref() {
160+
if n.len() != len {
161+
return Err(ArrowError::InvalidArgumentError(format!(
162+
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
163+
len,
164+
n.len(),
165+
)));
166+
}
167+
}
168+
len
163169
}
164-
}
170+
};
165171

166172
if field.data_type() != values.data_type() {
167173
return Err(ArrowError::InvalidArgumentError(format!(
@@ -460,7 +466,7 @@ mod tests {
460466

461467
use crate::cast::AsArray;
462468
use crate::types::Int32Type;
463-
use crate::Int32Array;
469+
use crate::{new_empty_array, Int32Array};
464470

465471
use super::*;
466472

@@ -656,7 +662,7 @@ mod tests {
656662
);
657663

658664
let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), None);
659-
assert_eq!(list.len(), 6);
665+
assert_eq!(list.len(), 0);
660666

661667
let nulls = NullBuffer::new_null(2);
662668
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
@@ -674,4 +680,13 @@ mod tests {
674680
let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err();
675681
assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"");
676682
}
683+
684+
#[test]
685+
fn empty_fixed_size_list() {
686+
let field = Arc::new(Field::new("item", DataType::Int32, true));
687+
let nulls = NullBuffer::new_null(2);
688+
let values = new_empty_array(&DataType::Int32);
689+
let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls));
690+
assert_eq!(list.len(), 2);
691+
}
677692
}

arrow-array/src/builder/fixed_size_list_builder.rs

+26-95
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
use crate::builder::ArrayBuilder;
1919
use crate::{ArrayRef, FixedSizeListArray};
2020
use arrow_buffer::NullBufferBuilder;
21-
use arrow_data::ArrayData;
22-
use arrow_schema::{DataType, Field, FieldRef};
21+
use arrow_schema::{Field, FieldRef};
2322
use std::any::Any;
2423
use std::sync::Arc;
2524

@@ -94,9 +93,9 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
9493
}
9594
}
9695

97-
/// Override the field passed to [`ArrayData::builder`]
96+
/// Override the field passed to [`FixedSizeListArray::new`]
9897
///
99-
/// By default a nullable field is created with the name `item`
98+
/// By default, a nullable field is created with the name `item`
10099
///
101100
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
102101
/// field's data type does not match that of `T`
@@ -169,116 +168,52 @@ where
169168
/// Builds the [`FixedSizeListBuilder`] and reset this builder.
170169
pub fn finish(&mut self) -> FixedSizeListArray {
171170
let len = self.len();
172-
let values_arr = self.values_builder.finish();
173-
let values_data = values_arr.to_data();
171+
let values = self.values_builder.finish();
172+
let nulls = self.null_buffer_builder.finish();
174173

175174
assert_eq!(
176-
values_data.len(), len * self.list_len as usize,
175+
values.len(), len * self.list_len as usize,
177176
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
178-
values_data.len(),
177+
values.len(),
179178
self.list_len,
180179
len,
181180
);
182181

183-
let nulls = self.null_buffer_builder.finish();
182+
let field = self
183+
.field
184+
.clone()
185+
.unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));
184186

185-
let field = match &self.field {
186-
Some(f) => {
187-
let size = self.value_length();
188-
assert_eq!(
189-
f.data_type(),
190-
values_data.data_type(),
191-
"DataType of field ({}) should be the same as the values_builder DataType ({})",
192-
f.data_type(),
193-
values_data.data_type()
194-
);
195-
196-
if let Some(a) = values_arr.logical_nulls() {
197-
let nulls_valid = f.is_nullable()
198-
|| nulls
199-
.as_ref()
200-
.map(|n| n.expand(size as _).contains(&a))
201-
.unwrap_or_default();
202-
203-
assert!(
204-
nulls_valid,
205-
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
206-
f.name()
207-
);
208-
}
209-
f.clone()
210-
}
211-
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
212-
};
213-
214-
let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
215-
.len(len)
216-
.add_child_data(values_data)
217-
.nulls(nulls);
218-
219-
let array_data = unsafe { array_data.build_unchecked() };
220-
221-
FixedSizeListArray::from(array_data)
187+
FixedSizeListArray::new(field, self.list_len, values, nulls)
222188
}
223189

224190
/// Builds the [`FixedSizeListBuilder`] without resetting the builder.
225191
pub fn finish_cloned(&self) -> FixedSizeListArray {
226192
let len = self.len();
227-
let values_arr = self.values_builder.finish_cloned();
228-
let values_data = values_arr.to_data();
193+
let values = self.values_builder.finish_cloned();
194+
let nulls = self.null_buffer_builder.finish_cloned();
229195

230196
assert_eq!(
231-
values_data.len(), len * self.list_len as usize,
197+
values.len(), len * self.list_len as usize,
232198
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
233-
values_data.len(),
199+
values.len(),
234200
self.list_len,
235201
len,
236202
);
237203

238-
let nulls = self.null_buffer_builder.finish_cloned();
204+
let field = self
205+
.field
206+
.clone()
207+
.unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));
239208

240-
let field = match &self.field {
241-
Some(f) => {
242-
let size = self.value_length();
243-
assert_eq!(
244-
f.data_type(),
245-
values_data.data_type(),
246-
"DataType of field ({}) should be the same as the values_builder DataType ({})",
247-
f.data_type(),
248-
values_data.data_type()
249-
);
250-
if let Some(a) = values_arr.logical_nulls() {
251-
let nulls_valid = f.is_nullable()
252-
|| nulls
253-
.as_ref()
254-
.map(|n| n.expand(size as _).contains(&a))
255-
.unwrap_or_default();
256-
257-
assert!(
258-
nulls_valid,
259-
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
260-
f.name()
261-
);
262-
}
263-
f.clone()
264-
}
265-
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
266-
};
267-
268-
let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
269-
.len(len)
270-
.add_child_data(values_data)
271-
.nulls(nulls);
272-
273-
let array_data = unsafe { array_data.build_unchecked() };
274-
275-
FixedSizeListArray::from(array_data)
209+
FixedSizeListArray::new(field, self.list_len, values, nulls)
276210
}
277211
}
278212

279213
#[cfg(test)]
280214
mod tests {
281215
use super::*;
216+
use arrow_schema::DataType;
282217

283218
use crate::builder::Int32Builder;
284219
use crate::Array;
@@ -368,7 +303,7 @@ mod tests {
368303
}
369304

370305
#[test]
371-
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
306+
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
372307
fn test_fixed_size_list_array_builder_with_field_null_panic() {
373308
let builder = make_list_builder(true, true);
374309
let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
@@ -377,9 +312,7 @@ mod tests {
377312
}
378313

379314
#[test]
380-
#[should_panic(
381-
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
382-
)]
315+
#[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
383316
fn test_fixed_size_list_array_builder_with_field_type_panic() {
384317
let values_builder = Int32Builder::new();
385318
let builder = FixedSizeListBuilder::new(values_builder, 3);
@@ -417,7 +350,7 @@ mod tests {
417350
}
418351

419352
#[test]
420-
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
353+
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
421354
fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
422355
let builder = make_list_builder(true, true);
423356
let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
@@ -439,9 +372,7 @@ mod tests {
439372
}
440373

441374
#[test]
442-
#[should_panic(
443-
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
444-
)]
375+
#[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
445376
fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
446377
let builder = make_list_builder(false, false);
447378
let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));

0 commit comments

Comments
 (0)