@@ -23,7 +23,8 @@ use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion};
23
23
use num:: FromPrimitive ;
24
24
use num_bigint:: BigInt ;
25
25
use parquet:: arrow:: array_reader:: {
26
- make_byte_array_reader, make_fixed_len_byte_array_reader, ListArrayReader ,
26
+ make_byte_array_reader, make_byte_view_array_reader, make_fixed_len_byte_array_reader,
27
+ ListArrayReader ,
27
28
} ;
28
29
use parquet:: basic:: Type ;
29
30
use parquet:: data_type:: { ByteArray , FixedLenByteArrayType } ;
@@ -502,6 +503,13 @@ fn create_string_byte_array_reader(
502
503
make_byte_array_reader ( Box :: new ( page_iterator) , column_desc, None ) . unwrap ( )
503
504
}
504
505
506
+ fn create_string_view_byte_array_reader (
507
+ page_iterator : impl PageIterator + ' static ,
508
+ column_desc : ColumnDescPtr ,
509
+ ) -> Box < dyn ArrayReader > {
510
+ make_byte_view_array_reader ( Box :: new ( page_iterator) , column_desc, None ) . unwrap ( )
511
+ }
512
+
505
513
fn create_string_byte_array_dictionary_reader (
506
514
page_iterator : impl PageIterator + ' static ,
507
515
column_desc : ColumnDescPtr ,
@@ -993,6 +1001,95 @@ fn add_benches(c: &mut Criterion) {
993
1001
994
1002
group. finish ( ) ;
995
1003
1004
+ // string view benchmarks
1005
+ //==============================
1006
+
1007
+ let mut group = c. benchmark_group ( "arrow_array_reader/StringViewArray" ) ;
1008
+
1009
+ // string, plain encoded, no NULLs
1010
+ let plain_string_no_null_data =
1011
+ build_plain_encoded_string_page_iterator ( mandatory_string_column_desc. clone ( ) , 0.0 ) ;
1012
+ group. bench_function ( "plain encoded, mandatory, no NULLs" , |b| {
1013
+ b. iter ( || {
1014
+ let array_reader = create_string_view_byte_array_reader (
1015
+ plain_string_no_null_data. clone ( ) ,
1016
+ mandatory_string_column_desc. clone ( ) ,
1017
+ ) ;
1018
+ count = bench_array_reader ( array_reader) ;
1019
+ } ) ;
1020
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1021
+ } ) ;
1022
+
1023
+ let plain_string_no_null_data =
1024
+ build_plain_encoded_string_page_iterator ( optional_string_column_desc. clone ( ) , 0.0 ) ;
1025
+ group. bench_function ( "plain encoded, optional, no NULLs" , |b| {
1026
+ b. iter ( || {
1027
+ let array_reader = create_string_view_byte_array_reader (
1028
+ plain_string_no_null_data. clone ( ) ,
1029
+ optional_string_column_desc. clone ( ) ,
1030
+ ) ;
1031
+ count = bench_array_reader ( array_reader) ;
1032
+ } ) ;
1033
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1034
+ } ) ;
1035
+
1036
+ // string, plain encoded, half NULLs
1037
+ let plain_string_half_null_data =
1038
+ build_plain_encoded_string_page_iterator ( optional_string_column_desc. clone ( ) , 0.5 ) ;
1039
+ group. bench_function ( "plain encoded, optional, half NULLs" , |b| {
1040
+ b. iter ( || {
1041
+ let array_reader = create_string_view_byte_array_reader (
1042
+ plain_string_half_null_data. clone ( ) ,
1043
+ optional_string_column_desc. clone ( ) ,
1044
+ ) ;
1045
+ count = bench_array_reader ( array_reader) ;
1046
+ } ) ;
1047
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1048
+ } ) ;
1049
+
1050
+ // string, dictionary encoded, no NULLs
1051
+ let dictionary_string_no_null_data =
1052
+ build_dictionary_encoded_string_page_iterator ( mandatory_string_column_desc. clone ( ) , 0.0 ) ;
1053
+ group. bench_function ( "dictionary encoded, mandatory, no NULLs" , |b| {
1054
+ b. iter ( || {
1055
+ let array_reader = create_string_view_byte_array_reader (
1056
+ dictionary_string_no_null_data. clone ( ) ,
1057
+ mandatory_string_column_desc. clone ( ) ,
1058
+ ) ;
1059
+ count = bench_array_reader ( array_reader) ;
1060
+ } ) ;
1061
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1062
+ } ) ;
1063
+
1064
+ let dictionary_string_no_null_data =
1065
+ build_dictionary_encoded_string_page_iterator ( optional_string_column_desc. clone ( ) , 0.0 ) ;
1066
+ group. bench_function ( "dictionary encoded, optional, no NULLs" , |b| {
1067
+ b. iter ( || {
1068
+ let array_reader = create_string_view_byte_array_reader (
1069
+ dictionary_string_no_null_data. clone ( ) ,
1070
+ optional_string_column_desc. clone ( ) ,
1071
+ ) ;
1072
+ count = bench_array_reader ( array_reader) ;
1073
+ } ) ;
1074
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1075
+ } ) ;
1076
+
1077
+ // string, dictionary encoded, half NULLs
1078
+ let dictionary_string_half_null_data =
1079
+ build_dictionary_encoded_string_page_iterator ( optional_string_column_desc. clone ( ) , 0.5 ) ;
1080
+ group. bench_function ( "dictionary encoded, optional, half NULLs" , |b| {
1081
+ b. iter ( || {
1082
+ let array_reader = create_string_view_byte_array_reader (
1083
+ dictionary_string_half_null_data. clone ( ) ,
1084
+ optional_string_column_desc. clone ( ) ,
1085
+ ) ;
1086
+ count = bench_array_reader ( array_reader) ;
1087
+ } ) ;
1088
+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1089
+ } ) ;
1090
+
1091
+ group. finish ( ) ;
1092
+
996
1093
// list benchmarks
997
1094
//==============================
998
1095
0 commit comments