19
19
20
20
// TODO: potentially move this to arrow-rs: https://github.com/apache/arrow-rs/issues/4328
21
21
22
+ use arrow:: array:: builder:: FixedSizeBinaryBuilder ;
22
23
use arrow:: datatypes:: i256;
23
24
use arrow:: { array:: ArrayRef , datatypes:: DataType } ;
24
25
use arrow_array:: {
@@ -600,6 +601,31 @@ make_data_page_stats_iterator!(
600
601
Index :: DOUBLE ,
601
602
f64
602
603
) ;
604
+ make_data_page_stats_iterator ! (
605
+ MinByteArrayDataPageStatsIterator ,
606
+ |x: & PageIndex <ByteArray >| { x. min. clone( ) } ,
607
+ Index :: BYTE_ARRAY ,
608
+ ByteArray
609
+ ) ;
610
+ make_data_page_stats_iterator ! (
611
+ MaxByteArrayDataPageStatsIterator ,
612
+ |x: & PageIndex <ByteArray >| { x. max. clone( ) } ,
613
+ Index :: BYTE_ARRAY ,
614
+ ByteArray
615
+ ) ;
616
+ make_data_page_stats_iterator ! (
617
+ MaxFixedLenByteArrayDataPageStatsIterator ,
618
+ |x: & PageIndex <FixedLenByteArray >| { x. max. clone( ) } ,
619
+ Index :: FIXED_LEN_BYTE_ARRAY ,
620
+ FixedLenByteArray
621
+ ) ;
622
+
623
+ make_data_page_stats_iterator ! (
624
+ MinFixedLenByteArrayDataPageStatsIterator ,
625
+ |x: & PageIndex <FixedLenByteArray >| { x. min. clone( ) } ,
626
+ Index :: FIXED_LEN_BYTE_ARRAY ,
627
+ FixedLenByteArray
628
+ ) ;
603
629
604
630
macro_rules! get_decimal_page_stats_iterator {
605
631
( $iterator_type: ident, $func: ident, $stat_value_type: ident, $convert_func: ident) => {
@@ -634,9 +660,7 @@ macro_rules! get_decimal_page_stats_iterator {
634
660
. indexes
635
661
. iter( )
636
662
. map( |x| {
637
- Some ( $stat_value_type:: from(
638
- x. $func. unwrap_or_default( ) ,
639
- ) )
663
+ x. $func. and_then( |x| Some ( $stat_value_type:: from( x) ) )
640
664
} )
641
665
. collect:: <Vec <_>>( ) ,
642
666
) ,
@@ -645,9 +669,7 @@ macro_rules! get_decimal_page_stats_iterator {
645
669
. indexes
646
670
. iter( )
647
671
. map( |x| {
648
- Some ( $stat_value_type:: from(
649
- x. $func. unwrap_or_default( ) ,
650
- ) )
672
+ x. $func. and_then( |x| Some ( $stat_value_type:: from( x) ) )
651
673
} )
652
674
. collect:: <Vec <_>>( ) ,
653
675
) ,
@@ -656,9 +678,9 @@ macro_rules! get_decimal_page_stats_iterator {
656
678
. indexes
657
679
. iter( )
658
680
. map( |x| {
659
- Some ( $convert_func (
660
- x . clone ( ) . $func. unwrap_or_default ( ) . data ( ) ,
661
- ) )
681
+ x . clone ( )
682
+ . $func
683
+ . and_then ( |x| Some ( $convert_func ( x . data ( ) ) ) )
662
684
} )
663
685
. collect:: <Vec <_>>( ) ,
664
686
) ,
@@ -667,9 +689,9 @@ macro_rules! get_decimal_page_stats_iterator {
667
689
. indexes
668
690
. iter( )
669
691
. map( |x| {
670
- Some ( $convert_func (
671
- x . clone ( ) . $func. unwrap_or_default ( ) . data ( ) ,
672
- ) )
692
+ x . clone ( )
693
+ . $func
694
+ . and_then ( |x| Some ( $convert_func ( x . data ( ) ) ) )
673
695
} )
674
696
. collect:: <Vec <_>>( ) ,
675
697
) ,
@@ -713,32 +735,6 @@ get_decimal_page_stats_iterator!(
713
735
i256,
714
736
from_bytes_to_i256
715
737
) ;
716
- make_data_page_stats_iterator ! (
717
- MinByteArrayDataPageStatsIterator ,
718
- |x: & PageIndex <ByteArray >| { x. min. clone( ) } ,
719
- Index :: BYTE_ARRAY ,
720
- ByteArray
721
- ) ;
722
- make_data_page_stats_iterator ! (
723
- MaxByteArrayDataPageStatsIterator ,
724
- |x: & PageIndex <ByteArray >| { x. max. clone( ) } ,
725
- Index :: BYTE_ARRAY ,
726
- ByteArray
727
- ) ;
728
-
729
- make_data_page_stats_iterator ! (
730
- MaxFixedLenByteArrayDataPageStatsIterator ,
731
- |x: & PageIndex <FixedLenByteArray >| { x. max. clone( ) } ,
732
- Index :: FIXED_LEN_BYTE_ARRAY ,
733
- FixedLenByteArray
734
- ) ;
735
-
736
- make_data_page_stats_iterator ! (
737
- MinFixedLenByteArrayDataPageStatsIterator ,
738
- |x: & PageIndex <FixedLenByteArray >| { x. min. clone( ) } ,
739
- Index :: FIXED_LEN_BYTE_ARRAY ,
740
- FixedLenByteArray
741
- ) ;
742
738
743
739
macro_rules! get_data_page_statistics {
744
740
( $stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
@@ -757,7 +753,7 @@ macro_rules! get_data_page_statistics {
757
753
UInt8Array :: from_iter(
758
754
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
759
755
. map( |x| {
760
- x. into_iter( ) . filter_map ( |x| {
756
+ x. into_iter( ) . map ( |x| {
761
757
x. and_then( |x| u8 :: try_from( x) . ok( ) )
762
758
} )
763
759
} )
@@ -768,7 +764,7 @@ macro_rules! get_data_page_statistics {
768
764
UInt16Array :: from_iter(
769
765
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
770
766
. map( |x| {
771
- x. into_iter( ) . filter_map ( |x| {
767
+ x. into_iter( ) . map ( |x| {
772
768
x. and_then( |x| u16 :: try_from( x) . ok( ) )
773
769
} )
774
770
} )
@@ -779,7 +775,7 @@ macro_rules! get_data_page_statistics {
779
775
UInt32Array :: from_iter(
780
776
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
781
777
. map( |x| {
782
- x. into_iter( ) . filter_map ( |x| {
778
+ x. into_iter( ) . map ( |x| {
783
779
x. and_then( |x| Some ( x as u32 ) )
784
780
} )
785
781
} )
@@ -789,7 +785,7 @@ macro_rules! get_data_page_statistics {
789
785
UInt64Array :: from_iter(
790
786
[ <$stat_type_prefix Int64DataPageStatsIterator >] :: new( $iterator)
791
787
. map( |x| {
792
- x. into_iter( ) . filter_map ( |x| {
788
+ x. into_iter( ) . map ( |x| {
793
789
x. and_then( |x| Some ( x as u64 ) )
794
790
} )
795
791
} )
@@ -799,7 +795,7 @@ macro_rules! get_data_page_statistics {
799
795
Int8Array :: from_iter(
800
796
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
801
797
. map( |x| {
802
- x. into_iter( ) . filter_map ( |x| {
798
+ x. into_iter( ) . map ( |x| {
803
799
x. and_then( |x| i8 :: try_from( x) . ok( ) )
804
800
} )
805
801
} )
@@ -810,7 +806,7 @@ macro_rules! get_data_page_statistics {
810
806
Int16Array :: from_iter(
811
807
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
812
808
. map( |x| {
813
- x. into_iter( ) . filter_map ( |x| {
809
+ x. into_iter( ) . map ( |x| {
814
810
x. and_then( |x| i16 :: try_from( x) . ok( ) )
815
811
} )
816
812
} )
@@ -823,8 +819,8 @@ macro_rules! get_data_page_statistics {
823
819
Float16Array :: from_iter(
824
820
[ <$stat_type_prefix Float16DataPageStatsIterator >] :: new( $iterator)
825
821
. map( |x| {
826
- x. into_iter( ) . filter_map ( |x| {
827
- x. and_then( |x| Some ( from_bytes_to_f16( x. data( ) ) ) )
822
+ x. into_iter( ) . map ( |x| {
823
+ x. and_then( |x| from_bytes_to_f16( x. data( ) ) )
828
824
} )
829
825
} )
830
826
. flatten( )
@@ -836,7 +832,7 @@ macro_rules! get_data_page_statistics {
836
832
Some ( DataType :: LargeBinary ) => Ok ( Arc :: new( LargeBinaryArray :: from_iter( [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
837
833
Some ( DataType :: Utf8 ) => Ok ( Arc :: new( StringArray :: from(
838
834
[ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . map( |x| {
839
- x. into_iter( ) . filter_map ( |x| {
835
+ x. into_iter( ) . map ( |x| {
840
836
x. and_then( |x| {
841
837
let res = std:: str :: from_utf8( x. data( ) ) . map( |s| s. to_string( ) ) . ok( ) ;
842
838
if res. is_none( ) {
@@ -849,7 +845,7 @@ macro_rules! get_data_page_statistics {
849
845
) ) ) ,
850
846
Some ( DataType :: LargeUtf8 ) => Ok ( Arc :: new( LargeStringArray :: from(
851
847
[ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . map( |x| {
852
- x. into_iter( ) . filter_map ( |x| {
848
+ x. into_iter( ) . map ( |x| {
853
849
x. and_then( |x| {
854
850
let res = std:: str :: from_utf8( x. data( ) ) . map( |s| s. to_string( ) ) . ok( ) ;
855
851
if res. is_none( ) {
@@ -878,10 +874,10 @@ macro_rules! get_data_page_statistics {
878
874
Date64Array :: from( [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
879
875
. map( |x| {
880
876
x. into_iter( )
881
- . filter_map ( |x| {
877
+ . map ( |x| {
882
878
x. and_then( |x| i64 :: try_from( x) . ok( ) )
879
+ . map( |x| x * 24 * 60 * 60 * 1000 )
883
880
} )
884
- . map( |x| x * 24 * 60 * 60 * 1000 )
885
881
} ) . flatten( ) . collect:: <Vec <_>>( )
886
882
)
887
883
)
@@ -919,16 +915,28 @@ macro_rules! get_data_page_statistics {
919
915
} )
920
916
} ,
921
917
Some ( DataType :: FixedSizeBinary ( size) ) => {
922
- Ok ( Arc :: new(
923
- FixedSizeBinaryArray :: try_from_iter(
924
- [ <$stat_type_prefix FixedLenByteArrayDataPageStatsIterator >] :: new( $iterator)
925
- . flat_map( |x| x. into_iter( ) )
926
- . filter_map( |x| x)
927
- ) . unwrap_or_else( |e| {
928
- log:: debug!( "FixedSizeBinary statistics is invalid: {}" , e) ;
929
- FixedSizeBinaryArray :: new( * size, vec![ ] . into( ) , None )
930
- } )
931
- ) )
918
+ let mut builder = FixedSizeBinaryBuilder :: new( * size) ;
919
+ let iterator = [ <$stat_type_prefix FixedLenByteArrayDataPageStatsIterator >] :: new( $iterator) ;
920
+ for x in iterator {
921
+ for x in x. into_iter( ) {
922
+ let Some ( x) = x else {
923
+ builder. append_null( ) ; // no statistics value
924
+ continue ;
925
+ } ;
926
+
927
+ if x. len( ) == * size as usize {
928
+ let _ = builder. append_value( x. data( ) ) ;
929
+ } else {
930
+ log:: debug!(
931
+ "FixedSizeBinary({}) statistics is a binary of size {}, ignoring it." ,
932
+ size,
933
+ x. len( ) ,
934
+ ) ;
935
+ builder. append_null( ) ;
936
+ }
937
+ }
938
+ }
939
+ Ok ( Arc :: new( builder. finish( ) ) )
932
940
} ,
933
941
_ => unimplemented!( )
934
942
}
0 commit comments