Skip to content

Commit ad3b4c9

Browse files
Kikkonalamb
andauthored
Add DataType::ListView and DataType::LargeListView (#5493)
* Add DataType::ListView and DataType::LargeListView * revert some file to main * revert some file to main * revert some file to main * fix: listview buffer init * cargo clippy * cargo fmt * fix buffer init * Update arrow-schema/src/datatype.rs Co-authored-by: Andrew Lamb <[email protected]> * Update arrow-schema/src/datatype.rs Co-authored-by: Andrew Lamb <[email protected]> --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 19a3bb0 commit ad3b4c9

File tree

8 files changed

+55
-1
lines changed

8 files changed

+55
-1
lines changed

arrow-data/src/data.rs

+11
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,20 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
118118
buffer.push(0i32);
119119
[buffer, empty_buffer]
120120
}
121+
DataType::ListView(_) => [
122+
MutableBuffer::new(capacity * mem::size_of::<i32>()),
123+
MutableBuffer::new(capacity * mem::size_of::<i32>()),
124+
],
121125
DataType::LargeList(_) => {
122126
// offset buffer always starts with a zero
123127
let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
124128
buffer.push(0i64);
125129
[buffer, empty_buffer]
126130
}
131+
DataType::LargeListView(_) => [
132+
MutableBuffer::new(capacity * mem::size_of::<i64>()),
133+
MutableBuffer::new(capacity * mem::size_of::<i64>()),
134+
],
127135
DataType::FixedSizeBinary(size) => {
128136
[MutableBuffer::new(capacity * *size as usize), empty_buffer]
129137
}
@@ -1549,6 +1557,9 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
15491557
}
15501558
DataType::FixedSizeList(_, _) => DataTypeLayout::new_empty(), // all in child data
15511559
DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
1560+
DataType::ListView(_) | DataType::LargeListView(_) => {
1561+
unimplemented!("ListView/LargeListView not implemented")
1562+
}
15521563
DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
15531564
DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
15541565
DataType::Struct(_) => DataTypeLayout::new_empty(), // all in child data,

arrow-data/src/equal/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ fn equal_values(
100100
unimplemented!("BinaryView/Utf8View not yet implemented")
101101
}
102102
DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
103+
DataType::ListView(_) | DataType::LargeListView(_) => {
104+
unimplemented!("ListView/LargeListView not yet implemented")
105+
}
103106
DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
104107
DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
105108
DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),

arrow-data/src/transform/mod.rs

+9
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,9 @@ fn build_extend(array: &ArrayData) -> Extend {
228228
unimplemented!("BinaryView/Utf8View not implemented")
229229
}
230230
DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
231+
DataType::ListView(_) | DataType::LargeListView(_) => {
232+
unimplemented!("ListView/LargeListView not implemented")
233+
}
231234
DataType::LargeList(_) => list::build_extend::<i64>(array),
232235
DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"),
233236
DataType::Struct(_) => structure::build_extend(array),
@@ -273,6 +276,9 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
273276
unimplemented!("BinaryView/Utf8View not implemented")
274277
}
275278
DataType::Map(_, _) | DataType::List(_) => list::extend_nulls::<i32>,
279+
DataType::ListView(_) | DataType::LargeListView(_) => {
280+
unimplemented!("ListView/LargeListView not implemented")
281+
}
276282
DataType::LargeList(_) => list::extend_nulls::<i64>,
277283
DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
278284
DataType::UInt8 => primitive::extend_nulls::<u8>,
@@ -428,6 +434,9 @@ impl<'a> MutableArrayData<'a> {
428434
DataType::BinaryView | DataType::Utf8View => {
429435
unimplemented!("BinaryView/Utf8View not implemented")
430436
}
437+
DataType::ListView(_) | DataType::LargeListView(_) => {
438+
unimplemented!("ListView/LargeListView not implemented")
439+
}
431440
DataType::Map(_, _) | DataType::List(_) | DataType::LargeList(_) => {
432441
let children = arrays
433442
.iter()

arrow-integration-test/src/datatype.rs

+3
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,9 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
281281
DataType::Union(_, _) => json!({"name": "union"}),
282282
DataType::List(_) => json!({ "name": "list"}),
283283
DataType::LargeList(_) => json!({ "name": "largelist"}),
284+
DataType::ListView(_) | DataType::LargeListView(_) => {
285+
unimplemented!("ListView/LargeListView not implemented")
286+
}
284287
DataType::FixedSizeList(_, length) => {
285288
json!({"name":"fixedsizelist", "listSize": length})
286289
}

arrow-ipc/src/convert.rs

+1
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,7 @@ pub(crate) fn get_fb_field_type<'a>(
664664
children: Some(fbb.create_vector(&[child])),
665665
}
666666
}
667+
ListView(_) | LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"),
667668
LargeList(ref list_type) => {
668669
let child = build_field(fbb, list_type);
669670
FBFieldType {

arrow-schema/src/datatype.rs

+25-1
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,30 @@ pub enum DataType {
228228
///
229229
/// A single List array can store up to [`i32::MAX`] elements in total.
230230
List(FieldRef),
231+
232+
/// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length.
233+
///
234+
/// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s.
235+
///
236+
/// The ListView layout is defined by three buffers:
237+
/// a validity bitmap, an offsets buffer, and an additional sizes buffer.
238+
/// Sizes and offsets are both 32 bits for this type
239+
ListView(FieldRef),
231240
/// A list of some logical data type with fixed length.
232241
FixedSizeList(FieldRef, i32),
233242
/// A list of some logical data type with variable length and 64-bit offsets.
234243
///
235244
/// A single LargeList array can store up to [`i64::MAX`] elements in total.
236245
LargeList(FieldRef),
246+
247+
/// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length and 64-bit offsets.
248+
///
249+
/// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s.
250+
///
251+
/// The LargeListView layout is defined by three buffers:
252+
/// a validity bitmap, an offsets buffer, and an additional sizes buffer.
253+
/// Sizes and offsets are both 64 bits for this type
254+
LargeListView(FieldRef),
237255
/// A nested datatype that contains a number of sub-fields.
238256
Struct(Fields),
239257
/// A nested datatype that can represent slots of differing types. Components:
@@ -536,7 +554,11 @@ impl DataType {
536554
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => None,
537555
DataType::Binary | DataType::LargeBinary | DataType::BinaryView => None,
538556
DataType::FixedSizeBinary(_) => None,
539-
DataType::List(_) | DataType::LargeList(_) | DataType::Map(_, _) => None,
557+
DataType::List(_)
558+
| DataType::ListView(_)
559+
| DataType::LargeList(_)
560+
| DataType::LargeListView(_)
561+
| DataType::Map(_, _) => None,
540562
DataType::FixedSizeList(_, _) => None,
541563
DataType::Struct(_) => None,
542564
DataType::Union(_, _) => None,
@@ -581,8 +603,10 @@ impl DataType {
581603
| DataType::Decimal256(_, _) => 0,
582604
DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(),
583605
DataType::List(field)
606+
| DataType::ListView(field)
584607
| DataType::FixedSizeList(field, _)
585608
| DataType::LargeList(field)
609+
| DataType::LargeListView(field)
586610
| DataType::Map(field, _) => field.size(),
587611
DataType::Struct(fields) => fields.size(),
588612
DataType::Union(fields, _) => fields.size(),

arrow-schema/src/field.rs

+2
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,9 @@ impl Field {
510510
| DataType::BinaryView
511511
| DataType::Interval(_)
512512
| DataType::LargeList(_)
513+
| DataType::LargeListView(_)
513514
| DataType::List(_)
515+
| DataType::ListView(_)
514516
| DataType::Map(_, _)
515517
| DataType::Dictionary(_, _)
516518
| DataType::RunEndEncoded(_, _)

parquet/src/arrow/schema/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
528528
.with_id(id)
529529
.build()
530530
}
531+
DataType::ListView(_) | DataType::LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"),
531532
DataType::Struct(fields) => {
532533
if fields.is_empty() {
533534
return Err(

0 commit comments

Comments
 (0)