@@ -510,27 +510,24 @@ async fn test_int_32() {
510
510
. build ( )
511
511
. await ;
512
512
513
- Test {
514
- reader : & reader,
515
- // mins are [-5, -4, 0, 5]
516
- expected_min : Arc :: new ( Int32Array :: from ( vec ! [ -5 , -4 , 0 , 5 ] ) ) ,
517
- // maxes are [-1, 0, 4, 9]
518
- expected_max : Arc :: new ( Int32Array :: from ( vec ! [ -1 , 0 , 4 , 9 ] ) ) ,
519
- // nulls are [0, 0, 0, 0]
520
- expected_null_counts : UInt64Array :: from ( vec ! [ 0 , 0 , 0 , 0 ] ) ,
521
- // row counts are [5, 5, 5, 5]
522
- expected_row_counts : UInt64Array :: from ( vec ! [ 5 , 5 , 5 , 5 ] ) ,
523
- column_name : "i32" ,
524
- test_data_page_statistics : false ,
513
+ for test_data_page_statistics in [ true , false ] {
514
+ Test {
515
+ reader : & reader,
516
+ // mins are [-5, -4, 0, 5]
517
+ expected_min : Arc :: new ( Int32Array :: from ( vec ! [ -5 , -4 , 0 , 5 ] ) ) ,
518
+ // maxes are [-1, 0, 4, 9]
519
+ expected_max : Arc :: new ( Int32Array :: from ( vec ! [ -1 , 0 , 4 , 9 ] ) ) ,
520
+ // nulls are [0, 0, 0, 0]
521
+ expected_null_counts : UInt64Array :: from ( vec ! [ 0 , 0 , 0 , 0 ] ) ,
522
+ // row counts are [5, 5, 5, 5]
523
+ expected_row_counts : UInt64Array :: from ( vec ! [ 5 , 5 , 5 , 5 ] ) ,
524
+ column_name : "i32" ,
525
+ test_data_page_statistics,
526
+ }
527
+ . run ( ) ;
525
528
}
526
- . run ( ) ;
527
529
}
528
530
529
- // BUG: ignore this test for now
530
- // https://github.com/apache/datafusion/issues/10585
531
- // Note that the file has 4 columns named "i8", "i16", "i32", "i64".
532
- // - The tests on column i32 and i64 passed.
533
- // - The tests on column i8 and i16 failed.
534
531
#[ tokio:: test]
535
532
async fn test_int_16 ( ) {
536
533
// This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
@@ -541,34 +538,24 @@ async fn test_int_16() {
541
538
. build ( )
542
539
. await ;
543
540
544
- Test {
545
- reader : & reader,
546
- // mins are [-5, -4, 0, 5]
547
- // BUG: not sure why this returns same data but in Int32Array type even though I debugged and the columns name is "i16" an its data is Int16
548
- // My debugging tells me the bug is either at:
549
- // 1. The new code to get "iter". See the code in this PR with
550
- // // Get an iterator over the column statistics
551
- // let iter = row_groups
552
- // .iter()
553
- // .map(|x| x.column(parquet_idx).statistics());
554
- // OR
555
- // 2. in the function (and/or its marco) `pub(crate) fn min_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>` here
556
- // https://github.com/apache/datafusion/blob/ea023e2d4878240eece870cf4b346c7a0667aeed/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs#L179
557
- expected_min : Arc :: new ( Int16Array :: from ( vec ! [ -5 , -4 , 0 , 5 ] ) ) , // panic here because the actual data is Int32Array
558
- // maxes are [-1, 0, 4, 9]
559
- expected_max : Arc :: new ( Int16Array :: from ( vec ! [ -1 , 0 , 4 , 9 ] ) ) ,
560
- // nulls are [0, 0, 0, 0]
561
- expected_null_counts : UInt64Array :: from ( vec ! [ 0 , 0 , 0 , 0 ] ) ,
562
- // row counts are [5, 5, 5, 5]
563
- expected_row_counts : UInt64Array :: from ( vec ! [ 5 , 5 , 5 , 5 ] ) ,
564
- column_name : "i16" ,
565
- test_data_page_statistics : false ,
541
+ for test_data_page_statistics in [ true , false ] {
542
+ Test {
543
+ reader : & reader,
544
+ // mins are [-5, -4, 0, 5]
545
+ expected_min : Arc :: new ( Int16Array :: from ( vec ! [ -5 , -4 , 0 , 5 ] ) ) , // panic here because the actual data is Int32Array
546
+ // maxes are [-1, 0, 4, 9]
547
+ expected_max : Arc :: new ( Int16Array :: from ( vec ! [ -1 , 0 , 4 , 9 ] ) ) ,
548
+ // nulls are [0, 0, 0, 0]
549
+ expected_null_counts : UInt64Array :: from ( vec ! [ 0 , 0 , 0 , 0 ] ) ,
550
+ // row counts are [5, 5, 5, 5]
551
+ expected_row_counts : UInt64Array :: from ( vec ! [ 5 , 5 , 5 , 5 ] ) ,
552
+ column_name : "i16" ,
553
+ test_data_page_statistics,
554
+ }
555
+ . run ( ) ;
566
556
}
567
- . run ( ) ;
568
557
}
569
558
570
- // BUG (same as above): ignore this test for now
571
- // https://github.com/apache/datafusion/issues/10585
572
559
#[ tokio:: test]
573
560
async fn test_int_8 ( ) {
574
561
// This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
@@ -579,21 +566,22 @@ async fn test_int_8() {
579
566
. build ( )
580
567
. await ;
581
568
582
- Test {
583
- reader : & reader,
584
- // mins are [-5, -4, 0, 5]
585
- // BUG: not sure why this returns same data but in Int32Array even though I debugged and the columns name is "i8" an its data is Int8
586
- expected_min : Arc :: new ( Int8Array :: from ( vec ! [ -5 , -4 , 0 , 5 ] ) ) , // panic here because the actual data is Int32Array
587
- // maxes are [-1, 0, 4, 9]
588
- expected_max : Arc :: new ( Int8Array :: from ( vec ! [ -1 , 0 , 4 , 9 ] ) ) ,
589
- // nulls are [0, 0, 0, 0]
590
- expected_null_counts : UInt64Array :: from ( vec ! [ 0 , 0 , 0 , 0 ] ) ,
591
- // row counts are [5, 5, 5, 5]
592
- expected_row_counts : UInt64Array :: from ( vec ! [ 5 , 5 , 5 , 5 ] ) ,
593
- column_name : "i8" ,
594
- test_data_page_statistics : false ,
569
+ for test_data_page_statistics in [ true , false ] {
570
+ Test {
571
+ reader : & reader,
572
+ // mins are [-5, -4, 0, 5]
573
+ expected_min : Arc :: new ( Int8Array :: from ( vec ! [ -5 , -4 , 0 , 5 ] ) ) ,
574
+ // maxes are [-1, 0, 4, 9]
575
+ expected_max : Arc :: new ( Int8Array :: from ( vec ! [ -1 , 0 , 4 , 9 ] ) ) ,
576
+ // nulls are [0, 0, 0, 0]
577
+ expected_null_counts : UInt64Array :: from ( vec ! [ 0 , 0 , 0 , 0 ] ) ,
578
+ // row counts are [5, 5, 5, 5]
579
+ expected_row_counts : UInt64Array :: from ( vec ! [ 5 , 5 , 5 , 5 ] ) ,
580
+ column_name : "i8" ,
581
+ test_data_page_statistics,
582
+ }
583
+ . run ( ) ;
595
584
}
596
- . run ( ) ;
597
585
}
598
586
599
587
// timestamp
0 commit comments