Skip to content

Commit 3b90fc9

Browse files
authored
bugfix: correct offsets when serializing a list of fixed sized list and non-zero start offset (#7318)
* When serializing fixed length arrays, adjust the offsets for writing out * Add unit test * clippy warnings * Add unit test for nulls * Update unit test to account for which schema had nulls
1 parent 87ff531 commit 3b90fc9

File tree

1 file changed

+231
-0
lines changed

1 file changed

+231
-0
lines changed

arrow-ipc/src/writer.rs

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,26 @@ fn write_array_data(
17271727
write_options,
17281728
)?;
17291729
return Ok(offset);
1730+
} else if let DataType::FixedSizeList(_, fixed_size) = data_type {
1731+
assert_eq!(array_data.child_data().len(), 1);
1732+
let fixed_size = *fixed_size as usize;
1733+
1734+
let child_offset = array_data.offset() * fixed_size;
1735+
let child_length = array_data.len() * fixed_size;
1736+
let child_data = array_data.child_data()[0].slice(child_offset, child_length);
1737+
1738+
offset = write_array_data(
1739+
&child_data,
1740+
buffers,
1741+
arrow_data,
1742+
nodes,
1743+
offset,
1744+
child_data.len(),
1745+
child_data.null_count(),
1746+
compression_codec,
1747+
write_options,
1748+
)?;
1749+
return Ok(offset);
17301750
} else {
17311751
for buffer in array_data.buffers() {
17321752
offset = write_buffer(
@@ -1837,6 +1857,9 @@ mod tests {
18371857
use std::io::Cursor;
18381858
use std::io::Seek;
18391859

1860+
use arrow_array::builder::FixedSizeListBuilder;
1861+
use arrow_array::builder::Float32Builder;
1862+
use arrow_array::builder::Int64Builder;
18401863
use arrow_array::builder::MapBuilder;
18411864
use arrow_array::builder::UnionBuilder;
18421865
use arrow_array::builder::{GenericListBuilder, ListBuilder, StringBuilder};
@@ -3075,4 +3098,212 @@ mod tests {
30753098
assert_eq!(stream_bytes_written_on_flush, expected_stream_flushed_bytes);
30763099
assert_eq!(file_bytes_written_on_flush, expected_file_flushed_bytes);
30773100
}
3101+
3102+
#[test]
3103+
fn test_roundtrip_list_of_fixed_list() -> Result<(), ArrowError> {
3104+
let l1_type =
3105+
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, false)), 3);
3106+
let l2_type = DataType::List(Arc::new(Field::new("item", l1_type.clone(), false)));
3107+
3108+
let l0_builder = Float32Builder::new();
3109+
let l1_builder = FixedSizeListBuilder::new(l0_builder, 3).with_field(Arc::new(Field::new(
3110+
"item",
3111+
DataType::Float32,
3112+
false,
3113+
)));
3114+
let mut l2_builder =
3115+
ListBuilder::new(l1_builder).with_field(Arc::new(Field::new("item", l1_type, false)));
3116+
3117+
for point in [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] {
3118+
l2_builder.values().values().append_value(point[0]);
3119+
l2_builder.values().values().append_value(point[1]);
3120+
l2_builder.values().values().append_value(point[2]);
3121+
3122+
l2_builder.values().append(true);
3123+
}
3124+
l2_builder.append(true);
3125+
3126+
let point = [10., 11., 12.];
3127+
l2_builder.values().values().append_value(point[0]);
3128+
l2_builder.values().values().append_value(point[1]);
3129+
l2_builder.values().values().append_value(point[2]);
3130+
3131+
l2_builder.values().append(true);
3132+
l2_builder.append(true);
3133+
3134+
let array = Arc::new(l2_builder.finish()) as ArrayRef;
3135+
3136+
let schema = Arc::new(Schema::new_with_metadata(
3137+
vec![Field::new("points", l2_type, false)],
3138+
HashMap::default(),
3139+
));
3140+
3141+
// Test a variety of combinations that include 0 and non-zero offsets
3142+
// and also portions or the rest of the array
3143+
test_slices(&array, &schema, 0, 1)?;
3144+
test_slices(&array, &schema, 0, 2)?;
3145+
test_slices(&array, &schema, 1, 1)?;
3146+
3147+
Ok(())
3148+
}
3149+
3150+
#[test]
3151+
fn test_roundtrip_list_of_fixed_list_w_nulls() -> Result<(), ArrowError> {
3152+
let l0_builder = Float32Builder::new();
3153+
let l1_builder = FixedSizeListBuilder::new(l0_builder, 3);
3154+
let mut l2_builder = ListBuilder::new(l1_builder);
3155+
3156+
for point in [
3157+
[Some(1.0), Some(2.0), None],
3158+
[Some(4.0), Some(5.0), Some(6.0)],
3159+
[None, Some(8.0), Some(9.0)],
3160+
] {
3161+
for p in point {
3162+
match p {
3163+
Some(p) => l2_builder.values().values().append_value(p),
3164+
None => l2_builder.values().values().append_null(),
3165+
}
3166+
}
3167+
3168+
l2_builder.values().append(true);
3169+
}
3170+
l2_builder.append(true);
3171+
3172+
let point = [Some(10.), None, None];
3173+
for p in point {
3174+
match p {
3175+
Some(p) => l2_builder.values().values().append_value(p),
3176+
None => l2_builder.values().values().append_null(),
3177+
}
3178+
}
3179+
3180+
l2_builder.values().append(true);
3181+
l2_builder.append(true);
3182+
3183+
let array = Arc::new(l2_builder.finish()) as ArrayRef;
3184+
3185+
let schema = Arc::new(Schema::new_with_metadata(
3186+
vec![Field::new(
3187+
"points",
3188+
DataType::List(Arc::new(Field::new(
3189+
"item",
3190+
DataType::FixedSizeList(
3191+
Arc::new(Field::new("item", DataType::Float32, true)),
3192+
3,
3193+
),
3194+
true,
3195+
))),
3196+
true,
3197+
)],
3198+
HashMap::default(),
3199+
));
3200+
3201+
// Test a variety of combinations that include 0 and non-zero offsets
3202+
// and also portions or the rest of the array
3203+
test_slices(&array, &schema, 0, 1)?;
3204+
test_slices(&array, &schema, 0, 2)?;
3205+
test_slices(&array, &schema, 1, 1)?;
3206+
3207+
Ok(())
3208+
}
3209+
3210+
fn test_slices(
3211+
parent_array: &ArrayRef,
3212+
schema: &SchemaRef,
3213+
offset: usize,
3214+
length: usize,
3215+
) -> Result<(), ArrowError> {
3216+
let subarray = parent_array.slice(offset, length);
3217+
let original_batch = RecordBatch::try_new(schema.clone(), vec![subarray])?;
3218+
3219+
let mut bytes = Vec::new();
3220+
let mut writer = StreamWriter::try_new(&mut bytes, schema)?;
3221+
writer.write(&original_batch)?;
3222+
writer.finish()?;
3223+
3224+
let mut cursor = std::io::Cursor::new(bytes);
3225+
let mut reader = StreamReader::try_new(&mut cursor, None)?;
3226+
let returned_batch = reader.next().unwrap()?;
3227+
3228+
assert_eq!(original_batch, returned_batch);
3229+
3230+
Ok(())
3231+
}
3232+
3233+
#[test]
3234+
fn test_roundtrip_fixed_list() -> Result<(), ArrowError> {
3235+
let int_builder = Int64Builder::new();
3236+
let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3)
3237+
.with_field(Arc::new(Field::new("item", DataType::Int64, false)));
3238+
3239+
for point in [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] {
3240+
fixed_list_builder.values().append_value(point[0]);
3241+
fixed_list_builder.values().append_value(point[1]);
3242+
fixed_list_builder.values().append_value(point[2]);
3243+
3244+
fixed_list_builder.append(true);
3245+
}
3246+
3247+
let array = Arc::new(fixed_list_builder.finish()) as ArrayRef;
3248+
3249+
let schema = Arc::new(Schema::new_with_metadata(
3250+
vec![Field::new(
3251+
"points",
3252+
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, false)), 3),
3253+
false,
3254+
)],
3255+
HashMap::default(),
3256+
));
3257+
3258+
// Test a variety of combinations that include 0 and non-zero offsets
3259+
// and also portions or the rest of the array
3260+
test_slices(&array, &schema, 0, 4)?;
3261+
test_slices(&array, &schema, 0, 2)?;
3262+
test_slices(&array, &schema, 1, 3)?;
3263+
test_slices(&array, &schema, 2, 1)?;
3264+
3265+
Ok(())
3266+
}
3267+
3268+
#[test]
3269+
fn test_roundtrip_fixed_list_w_nulls() -> Result<(), ArrowError> {
3270+
let int_builder = Int64Builder::new();
3271+
let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3);
3272+
3273+
for point in [
3274+
[Some(1), Some(2), None],
3275+
[Some(4), Some(5), Some(6)],
3276+
[None, Some(8), Some(9)],
3277+
[Some(10), None, None],
3278+
] {
3279+
for p in point {
3280+
match p {
3281+
Some(p) => fixed_list_builder.values().append_value(p),
3282+
None => fixed_list_builder.values().append_null(),
3283+
}
3284+
}
3285+
3286+
fixed_list_builder.append(true);
3287+
}
3288+
3289+
let array = Arc::new(fixed_list_builder.finish()) as ArrayRef;
3290+
3291+
let schema = Arc::new(Schema::new_with_metadata(
3292+
vec![Field::new(
3293+
"points",
3294+
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 3),
3295+
true,
3296+
)],
3297+
HashMap::default(),
3298+
));
3299+
3300+
// Test a variety of combinations that include 0 and non-zero offsets
3301+
// and also portions or the rest of the array
3302+
test_slices(&array, &schema, 0, 4)?;
3303+
test_slices(&array, &schema, 0, 2)?;
3304+
test_slices(&array, &schema, 1, 3)?;
3305+
test_slices(&array, &schema, 2, 1)?;
3306+
3307+
Ok(())
3308+
}
30783309
}

0 commit comments

Comments
 (0)