16
16
// under the License.
17
17
18
18
use std:: any:: Any ;
19
- use std:: cmp:: { max, min } ;
19
+ use std:: cmp:: max;
20
20
use std:: marker:: PhantomData ;
21
- use std:: mem:: size_of;
22
21
use std:: result:: Result :: Ok ;
23
22
use std:: sync:: Arc ;
24
23
use std:: vec:: Vec ;
25
24
26
25
use arrow:: array:: {
27
26
Array , ArrayData , ArrayDataBuilder , ArrayRef , BooleanArray , BooleanBufferBuilder ,
28
- DecimalArray , Int16BufferBuilder , Int32Array , Int64Array , PrimitiveArray ,
29
- StructArray ,
27
+ DecimalArray , Int32Array , Int64Array , PrimitiveArray , StructArray ,
30
28
} ;
31
- use arrow:: buffer:: { Buffer , MutableBuffer } ;
29
+ use arrow:: buffer:: Buffer ;
32
30
use arrow:: datatypes:: {
33
31
ArrowPrimitiveType , BooleanType as ArrowBooleanType , DataType as ArrowType ,
34
32
Float32Type as ArrowFloat32Type , Float64Type as ArrowFloat64Type ,
@@ -655,8 +653,7 @@ pub struct StructArrayReader {
655
653
data_type : ArrowType ,
656
654
struct_def_level : i16 ,
657
655
struct_rep_level : i16 ,
658
- def_level_buffer : Option < Buffer > ,
659
- rep_level_buffer : Option < Buffer > ,
656
+ nullable : bool ,
660
657
}
661
658
662
659
impl StructArrayReader {
@@ -666,14 +663,14 @@ impl StructArrayReader {
666
663
children : Vec < Box < dyn ArrayReader > > ,
667
664
def_level : i16 ,
668
665
rep_level : i16 ,
666
+ nullable : bool ,
669
667
) -> Self {
670
668
Self {
671
669
data_type,
672
670
children,
673
671
struct_def_level : def_level,
674
672
struct_rep_level : rep_level,
675
- def_level_buffer : None ,
676
- rep_level_buffer : None ,
673
+ nullable,
677
674
}
678
675
}
679
676
}
@@ -708,8 +705,6 @@ impl ArrayReader for StructArrayReader {
708
705
/// ```
709
706
fn next_batch ( & mut self , batch_size : usize ) -> Result < ArrayRef > {
710
707
if self . children . is_empty ( ) {
711
- self . def_level_buffer = None ;
712
- self . rep_level_buffer = None ;
713
708
return Ok ( Arc :: new ( StructArray :: from ( Vec :: new ( ) ) ) ) ;
714
709
}
715
710
@@ -742,80 +737,59 @@ impl ArrayReader for StructArrayReader {
742
737
. collect :: < Vec < ArrayData > > ( ) ,
743
738
) ;
744
739
745
- if self . struct_def_level != 0 {
740
+ if self . nullable {
746
741
// calculate struct def level data
747
- let buffer_size = children_array_len * size_of :: < i16 > ( ) ;
748
- let mut def_level_data_buffer = MutableBuffer :: new ( buffer_size) ;
749
- def_level_data_buffer. resize ( buffer_size, 0 ) ;
750
742
751
- // Safety: the buffer is always treated as `u16` in the code below
752
- let def_level_data = unsafe { def_level_data_buffer. typed_data_mut ( ) } ;
743
+ // children should have consistent view of parent, only need to inspect first child
744
+ let def_levels = self . children [ 0 ]
745
+ . get_def_levels ( )
746
+ . expect ( "child with nullable parents must have definition level" ) ;
753
747
754
- def_level_data
755
- . iter_mut ( )
756
- . for_each ( |v| * v = self . struct_def_level ) ;
748
+ // calculate bitmap for current array
749
+ let mut bitmap_builder = BooleanBufferBuilder :: new ( children_array_len) ;
757
750
758
- for child in & self . children {
759
- if let Some ( current_child_def_levels) = child. get_def_levels ( ) {
760
- if current_child_def_levels. len ( ) != children_array_len {
761
- return Err ( general_err ! ( "Child array length are not equal!" ) ) ;
762
- } else {
763
- for i in 0 ..children_array_len {
764
- def_level_data[ i] =
765
- min ( def_level_data[ i] , current_child_def_levels[ i] ) ;
751
+ match self . children [ 0 ] . get_rep_levels ( ) {
752
+ Some ( rep_levels) => {
753
+ // Sanity check
754
+ assert_eq ! ( rep_levels. len( ) , def_levels. len( ) ) ;
755
+
756
+ for ( rep_level, def_level) in rep_levels. iter ( ) . zip ( def_levels) {
757
+ if rep_level > & self . struct_rep_level {
758
+ // Already handled by inner list - SKIP
759
+ continue ;
766
760
}
761
+ bitmap_builder. append ( * def_level >= self . struct_def_level )
762
+ }
763
+ }
764
+ None => {
765
+ for def_level in def_levels {
766
+ bitmap_builder. append ( * def_level >= self . struct_def_level )
767
767
}
768
768
}
769
769
}
770
770
771
- // calculate bitmap for current array
772
- let mut bitmap_builder = BooleanBufferBuilder :: new ( children_array_len) ;
773
- for def_level in def_level_data {
774
- let not_null = * def_level >= self . struct_def_level ;
775
- bitmap_builder. append ( not_null) ;
771
+ if bitmap_builder. len ( ) != children_array_len {
772
+ return Err ( general_err ! ( "Failed to decode level data for struct array" ) ) ;
776
773
}
777
774
778
775
array_data_builder =
779
776
array_data_builder. null_bit_buffer ( bitmap_builder. finish ( ) ) ;
780
-
781
- self . def_level_buffer = Some ( def_level_data_buffer. into ( ) ) ;
782
777
}
783
778
784
779
let array_data = unsafe { array_data_builder. build_unchecked ( ) } ;
785
-
786
- if self . struct_rep_level != 0 {
787
- // calculate struct rep level data, since struct doesn't add to repetition
788
- // levels, here we just need to keep repetition levels of first array
789
- // TODO: Verify that all children array reader has same repetition levels
790
- let rep_level_data = self
791
- . children
792
- . first ( )
793
- . ok_or_else ( || {
794
- general_err ! ( "Struct array reader should have at least one child!" )
795
- } ) ?
796
- . get_rep_levels ( )
797
- . map ( |data| -> Result < Buffer > {
798
- let mut buffer = Int16BufferBuilder :: new ( children_array_len) ;
799
- buffer. append_slice ( data) ;
800
- Ok ( buffer. finish ( ) )
801
- } )
802
- . transpose ( ) ?;
803
-
804
- self . rep_level_buffer = rep_level_data;
805
- }
806
780
Ok ( Arc :: new ( StructArray :: from ( array_data) ) )
807
781
}
808
782
809
783
fn get_def_levels ( & self ) -> Option < & [ i16 ] > {
810
- self . def_level_buffer
811
- . as_ref ( )
812
- . map ( |buf| unsafe { buf . typed_data ( ) } )
784
+ // Children definition levels should describe the same
785
+ // parent structure, so return first child's
786
+ self . children . first ( ) . and_then ( |l| l . get_def_levels ( ) )
813
787
}
814
788
815
789
fn get_rep_levels ( & self ) -> Option < & [ i16 ] > {
816
- self . rep_level_buffer
817
- . as_ref ( )
818
- . map ( |buf| unsafe { buf . typed_data ( ) } )
790
+ // Children definition levels should describe the same
791
+ // parent structure, so return first child's
792
+ self . children . first ( ) . and_then ( |l| l . get_rep_levels ( ) )
819
793
}
820
794
}
821
795
@@ -828,7 +802,9 @@ mod tests {
828
802
use rand:: { thread_rng, Rng } ;
829
803
830
804
use crate :: arrow:: array_reader:: test_util:: InMemoryArrayReader ;
831
- use arrow:: array:: { Array , ArrayRef , PrimitiveArray , StringArray , StructArray } ;
805
+ use arrow:: array:: {
806
+ Array , ArrayRef , ListArray , PrimitiveArray , StringArray , StructArray ,
807
+ } ;
832
808
use arrow:: datatypes:: {
833
809
ArrowPrimitiveType , DataType as ArrowType , Date32Type as ArrowDate32 , Field ,
834
810
Int32Type as ArrowInt32 , Int64Type as ArrowInt64 ,
@@ -1553,6 +1529,7 @@ mod tests {
1553
1529
vec ! [ Box :: new( array_reader_1) , Box :: new( array_reader_2) ] ,
1554
1530
1 ,
1555
1531
1 ,
1532
+ true ,
1556
1533
) ;
1557
1534
1558
1535
let struct_array = struct_array_reader. next_batch ( 5 ) . unwrap ( ) ;
@@ -1566,12 +1543,74 @@ mod tests {
1566
1543
. collect:: <Vec <bool >>( )
1567
1544
) ;
1568
1545
assert_eq ! (
1569
- Some ( vec![ 0 , 1 , 1 , 1 , 1 ] . as_slice( ) ) ,
1546
+ Some ( vec![ 0 , 1 , 2 , 3 , 1 ] . as_slice( ) ) ,
1570
1547
struct_array_reader. get_def_levels( )
1571
1548
) ;
1572
1549
assert_eq ! (
1573
1550
Some ( vec![ 0 , 1 , 1 , 1 , 1 ] . as_slice( ) ) ,
1574
1551
struct_array_reader. get_rep_levels( )
1575
1552
) ;
1576
1553
}
1554
+
1555
+ #[ test]
1556
+ fn test_struct_array_reader_list ( ) {
1557
+ use arrow:: datatypes:: Int32Type ;
1558
+ // [
1559
+ // {foo: [1, 2, null],
1560
+ // {foo: []},
1561
+ // {foo: null},
1562
+ // null,
1563
+ // ]
1564
+
1565
+ let expected_l =
1566
+ Arc :: new ( ListArray :: from_iter_primitive :: < Int32Type , _ , _ > ( vec ! [
1567
+ Some ( vec![ Some ( 1 ) , Some ( 2 ) , None ] ) ,
1568
+ Some ( vec![ ] ) ,
1569
+ None ,
1570
+ None ,
1571
+ ] ) ) ;
1572
+
1573
+ let nulls = Buffer :: from ( [ 0b00000111 ] ) ;
1574
+ let struct_fields = vec ! [ (
1575
+ Field :: new( "foo" , expected_l. data_type( ) . clone( ) , true ) ,
1576
+ expected_l. clone( ) as ArrayRef ,
1577
+ ) ] ;
1578
+ let expected = StructArray :: from ( ( struct_fields, nulls) ) ;
1579
+
1580
+ let array = Arc :: new ( Int32Array :: from_iter ( vec ! [
1581
+ Some ( 1 ) ,
1582
+ Some ( 2 ) ,
1583
+ None ,
1584
+ None ,
1585
+ None ,
1586
+ None ,
1587
+ ] ) ) ;
1588
+ let reader = InMemoryArrayReader :: new (
1589
+ ArrowType :: Int32 ,
1590
+ array,
1591
+ Some ( vec ! [ 4 , 4 , 3 , 2 , 1 , 0 ] ) ,
1592
+ Some ( vec ! [ 0 , 1 , 1 , 0 , 0 , 0 ] ) ,
1593
+ ) ;
1594
+
1595
+ let list_reader = ListArrayReader :: < i32 > :: new (
1596
+ Box :: new ( reader) ,
1597
+ expected_l. data_type ( ) . clone ( ) ,
1598
+ ArrowType :: Int32 ,
1599
+ 3 ,
1600
+ 1 ,
1601
+ true ,
1602
+ ) ;
1603
+
1604
+ let mut struct_reader = StructArrayReader :: new (
1605
+ expected. data_type ( ) . clone ( ) ,
1606
+ vec ! [ Box :: new( list_reader) ] ,
1607
+ 1 ,
1608
+ 0 ,
1609
+ true ,
1610
+ ) ;
1611
+
1612
+ let actual = struct_reader. next_batch ( 1024 ) . unwrap ( ) ;
1613
+ let actual = actual. as_any ( ) . downcast_ref :: < StructArray > ( ) . unwrap ( ) ;
1614
+ assert_eq ! ( actual, & expected)
1615
+ }
1577
1616
}
0 commit comments