Skip to content

Commit 1b3d1a9

Browse files
authored
feat: implemented with_field() for FixedSizeListBuilder (#5541)
* feat: implemented with_field() for FixedSizeListBuilder * Switched back to build_unchecked, assertions on array_data * decresed code repetition * Fixed null logic * Added unit tests for empty with field * Fixed clippy issue
1 parent 51c1b4b commit 1b3d1a9

File tree

1 file changed

+249
-33
lines changed

1 file changed

+249
-33
lines changed

arrow-array/src/builder/fixed_size_list_builder.rs

+249-33
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::builder::ArrayBuilder;
1919
use crate::{ArrayRef, FixedSizeListArray};
2020
use arrow_buffer::NullBufferBuilder;
2121
use arrow_data::ArrayData;
22-
use arrow_schema::{DataType, Field};
22+
use arrow_schema::{DataType, Field, FieldRef};
2323
use std::any::Any;
2424
use std::sync::Arc;
2525

@@ -67,6 +67,7 @@ pub struct FixedSizeListBuilder<T: ArrayBuilder> {
6767
null_buffer_builder: NullBufferBuilder,
6868
values_builder: T,
6969
list_len: i32,
70+
field: Option<FieldRef>,
7071
}
7172

7273
impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
@@ -89,6 +90,20 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
8990
null_buffer_builder: NullBufferBuilder::new(capacity),
9091
values_builder,
9192
list_len: value_length,
93+
field: None,
94+
}
95+
}
96+
97+
/// Override the field passed to [`ArrayData::builder`]
98+
///
99+
/// By default a nullable field is created with the name `item`
100+
///
101+
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
102+
/// field's data type does not match that of `T`
103+
pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
104+
Self {
105+
field: Some(field.into()),
106+
..self
92107
}
93108
}
94109
}
@@ -166,13 +181,40 @@ where
166181
);
167182

168183
let nulls = self.null_buffer_builder.finish();
169-
let array_data = ArrayData::builder(DataType::FixedSizeList(
170-
Arc::new(Field::new("item", values_data.data_type().clone(), true)),
171-
self.list_len,
172-
))
173-
.len(len)
174-
.add_child_data(values_data)
175-
.nulls(nulls);
184+
185+
let field = match &self.field {
186+
Some(f) => {
187+
let size = self.value_length();
188+
assert_eq!(
189+
f.data_type(),
190+
values_data.data_type(),
191+
"DataType of field ({}) should be the same as the values_builder DataType ({})",
192+
f.data_type(),
193+
values_data.data_type()
194+
);
195+
196+
if let Some(a) = values_arr.logical_nulls() {
197+
let nulls_valid = f.is_nullable()
198+
|| nulls
199+
.as_ref()
200+
.map(|n| n.expand(size as _).contains(&a))
201+
.unwrap_or_default();
202+
203+
assert!(
204+
nulls_valid,
205+
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
206+
f.name()
207+
);
208+
}
209+
f.clone()
210+
}
211+
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
212+
};
213+
214+
let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
215+
.len(len)
216+
.add_child_data(values_data)
217+
.nulls(nulls);
176218

177219
let array_data = unsafe { array_data.build_unchecked() };
178220

@@ -194,13 +236,39 @@ where
194236
);
195237

196238
let nulls = self.null_buffer_builder.finish_cloned();
197-
let array_data = ArrayData::builder(DataType::FixedSizeList(
198-
Arc::new(Field::new("item", values_data.data_type().clone(), true)),
199-
self.list_len,
200-
))
201-
.len(len)
202-
.add_child_data(values_data)
203-
.nulls(nulls);
239+
240+
let field = match &self.field {
241+
Some(f) => {
242+
let size = self.value_length();
243+
assert_eq!(
244+
f.data_type(),
245+
values_data.data_type(),
246+
"DataType of field ({}) should be the same as the values_builder DataType ({})",
247+
f.data_type(),
248+
values_data.data_type()
249+
);
250+
if let Some(a) = values_arr.logical_nulls() {
251+
let nulls_valid = f.is_nullable()
252+
|| nulls
253+
.as_ref()
254+
.map(|n| n.expand(size as _).contains(&a))
255+
.unwrap_or_default();
256+
257+
assert!(
258+
nulls_valid,
259+
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
260+
f.name()
261+
);
262+
}
263+
f.clone()
264+
}
265+
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
266+
};
267+
268+
let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
269+
.len(len)
270+
.add_child_data(values_data)
271+
.nulls(nulls);
204272

205273
let array_data = unsafe { array_data.build_unchecked() };
206274

@@ -216,28 +284,54 @@ mod tests {
216284
use crate::Array;
217285
use crate::Int32Array;
218286

219-
#[test]
220-
fn test_fixed_size_list_array_builder() {
287+
fn make_list_builder(
288+
include_null_element: bool,
289+
include_null_in_values: bool,
290+
) -> FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> {
221291
let values_builder = Int32Builder::new();
222292
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
223293

224-
// [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
225294
builder.values().append_value(0);
226295
builder.values().append_value(1);
227296
builder.values().append_value(2);
228297
builder.append(true);
229-
builder.values().append_null();
230-
builder.values().append_null();
231-
builder.values().append_null();
232-
builder.append(false);
298+
299+
builder.values().append_value(2);
233300
builder.values().append_value(3);
234-
builder.values().append_null();
235-
builder.values().append_value(5);
236-
builder.append(true);
237-
builder.values().append_value(6);
238-
builder.values().append_value(7);
239-
builder.values().append_null();
301+
builder.values().append_value(4);
240302
builder.append(true);
303+
304+
if include_null_element {
305+
builder.values().append_null();
306+
builder.values().append_null();
307+
builder.values().append_null();
308+
builder.append(false);
309+
} else {
310+
builder.values().append_value(2);
311+
builder.values().append_value(3);
312+
builder.values().append_value(4);
313+
builder.append(true);
314+
}
315+
316+
if include_null_in_values {
317+
builder.values().append_value(3);
318+
builder.values().append_null();
319+
builder.values().append_value(5);
320+
builder.append(true);
321+
} else {
322+
builder.values().append_value(3);
323+
builder.values().append_value(4);
324+
builder.values().append_value(5);
325+
builder.append(true);
326+
}
327+
328+
builder
329+
}
330+
331+
#[test]
332+
fn test_fixed_size_list_array_builder() {
333+
let mut builder = make_list_builder(true, true);
334+
241335
let list_array = builder.finish();
242336

243337
assert_eq!(DataType::Int32, list_array.value_type());
@@ -248,9 +342,48 @@ mod tests {
248342
}
249343

250344
#[test]
251-
fn test_fixed_size_list_array_builder_finish_cloned() {
345+
fn test_fixed_size_list_array_builder_with_field() {
346+
let builder = make_list_builder(false, false);
347+
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
348+
let list_array = builder.finish();
349+
350+
assert_eq!(DataType::Int32, list_array.value_type());
351+
assert_eq!(4, list_array.len());
352+
assert_eq!(0, list_array.null_count());
353+
assert_eq!(6, list_array.value_offset(2));
354+
assert_eq!(3, list_array.value_length());
355+
}
356+
357+
#[test]
358+
fn test_fixed_size_list_array_builder_with_field_and_null() {
359+
let builder = make_list_builder(true, false);
360+
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
361+
let list_array = builder.finish();
362+
363+
assert_eq!(DataType::Int32, list_array.value_type());
364+
assert_eq!(4, list_array.len());
365+
assert_eq!(1, list_array.null_count());
366+
assert_eq!(6, list_array.value_offset(2));
367+
assert_eq!(3, list_array.value_length());
368+
}
369+
370+
#[test]
371+
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
372+
fn test_fixed_size_list_array_builder_with_field_null_panic() {
373+
let builder = make_list_builder(true, true);
374+
let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
375+
376+
builder.finish();
377+
}
378+
379+
#[test]
380+
#[should_panic(
381+
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
382+
)]
383+
fn test_fixed_size_list_array_builder_with_field_type_panic() {
252384
let values_builder = Int32Builder::new();
253-
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
385+
let builder = FixedSizeListBuilder::new(values_builder, 3);
386+
let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
254387

255388
// [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
256389
builder.values().append_value(0);
@@ -262,13 +395,68 @@ mod tests {
262395
builder.values().append_null();
263396
builder.append(false);
264397
builder.values().append_value(3);
265-
builder.values().append_null();
398+
builder.values().append_value(4);
266399
builder.values().append_value(5);
267400
builder.append(true);
401+
402+
builder.finish();
403+
}
404+
405+
#[test]
406+
fn test_fixed_size_list_array_builder_cloned_with_field() {
407+
let builder = make_list_builder(true, true);
408+
let builder = builder.with_field(Field::new("list_element", DataType::Int32, true));
409+
410+
let list_array = builder.finish_cloned();
411+
412+
assert_eq!(DataType::Int32, list_array.value_type());
413+
assert_eq!(4, list_array.len());
414+
assert_eq!(1, list_array.null_count());
415+
assert_eq!(6, list_array.value_offset(2));
416+
assert_eq!(3, list_array.value_length());
417+
}
418+
419+
#[test]
420+
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
421+
fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
422+
let builder = make_list_builder(true, true);
423+
let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
424+
425+
builder.finish_cloned();
426+
}
427+
428+
#[test]
429+
fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
430+
let builder = make_list_builder(true, false);
431+
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
432+
let list_array = builder.finish();
433+
434+
assert_eq!(DataType::Int32, list_array.value_type());
435+
assert_eq!(4, list_array.len());
436+
assert_eq!(1, list_array.null_count());
437+
assert_eq!(6, list_array.value_offset(2));
438+
assert_eq!(3, list_array.value_length());
439+
}
440+
441+
#[test]
442+
#[should_panic(
443+
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
444+
)]
445+
fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
446+
let builder = make_list_builder(false, false);
447+
let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
448+
449+
builder.finish_cloned();
450+
}
451+
452+
#[test]
453+
fn test_fixed_size_list_array_builder_finish_cloned() {
454+
let mut builder = make_list_builder(true, true);
455+
268456
let mut list_array = builder.finish_cloned();
269457

270458
assert_eq!(DataType::Int32, list_array.value_type());
271-
assert_eq!(3, list_array.len());
459+
assert_eq!(4, list_array.len());
272460
assert_eq!(1, list_array.null_count());
273461
assert_eq!(3, list_array.value_length());
274462

@@ -283,12 +471,40 @@ mod tests {
283471
list_array = builder.finish();
284472

285473
assert_eq!(DataType::Int32, list_array.value_type());
286-
assert_eq!(5, list_array.len());
474+
assert_eq!(6, list_array.len());
287475
assert_eq!(2, list_array.null_count());
288476
assert_eq!(6, list_array.value_offset(2));
289477
assert_eq!(3, list_array.value_length());
290478
}
291479

480+
#[test]
481+
fn test_fixed_size_list_array_builder_with_field_empty() {
482+
let values_builder = Int32Array::builder(0);
483+
let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
484+
"list_item",
485+
DataType::Int32,
486+
false,
487+
));
488+
assert!(builder.is_empty());
489+
let arr = builder.finish();
490+
assert_eq!(0, arr.len());
491+
assert_eq!(0, builder.len());
492+
}
493+
494+
#[test]
495+
fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
496+
let values_builder = Int32Array::builder(0);
497+
let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
498+
"list_item",
499+
DataType::Int32,
500+
false,
501+
));
502+
assert!(builder.is_empty());
503+
let arr = builder.finish_cloned();
504+
assert_eq!(0, arr.len());
505+
assert_eq!(0, builder.len());
506+
}
507+
292508
#[test]
293509
fn test_fixed_size_list_array_builder_empty() {
294510
let values_builder = Int32Array::builder(5);

0 commit comments

Comments
 (0)