@@ -450,19 +450,57 @@ mod tests {
450
450
true ,
451
451
) ] ) ) ;
452
452
453
- let a: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ Some ( 1 ) , None , Some ( 2 ) ] ) ) ;
453
+ // build row groups / pages that exercise different combinations of nulls and values
454
+ // note that below we set the row group and page sizes to 4 and 2 respectively
455
+ // so that these "groupings" make sense
456
+ let a: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [
457
+ // a row group that has all values
458
+ Some ( i32 :: MIN ) ,
459
+ Some ( -1 ) ,
460
+ Some ( 1 ) ,
461
+ Some ( i32 :: MAX ) ,
462
+ // a row group with a page of all nulls and a page of all values
463
+ None ,
464
+ None ,
465
+ Some ( 2 ) ,
466
+ Some ( 3 ) ,
467
+ // a row group that has all null pages
468
+ None ,
469
+ None ,
470
+ None ,
471
+ None ,
472
+ // a row group having 1 page with all values and 1 page with some nulls
473
+ Some ( 4 ) ,
474
+ Some ( 5 ) ,
475
+ None ,
476
+ Some ( 6 ) ,
477
+ // a row group having 1 page with all nulls and 1 page with some nulls
478
+ None ,
479
+ None ,
480
+ Some ( 7 ) ,
481
+ None ,
482
+ // a row group having all pages with some nulls
483
+ None ,
484
+ Some ( 8 ) ,
485
+ Some ( 9 ) ,
486
+ None ,
487
+ ] ) ) ;
454
488
455
489
let batch = RecordBatch :: try_from_iter ( vec ! [ ( "a" , a) ] ) . unwrap ( ) ;
456
490
457
- let writer_props = match write_page_index {
458
- true => WriterProperties :: builder ( )
459
- . set_statistics_enabled ( EnabledStatistics :: Page )
460
- . build ( ) ,
461
- false => WriterProperties :: builder ( )
462
- . set_statistics_enabled ( EnabledStatistics :: Chunk )
463
- . build ( ) ,
491
+ let writer_props_builder = match write_page_index {
492
+ true => WriterProperties :: builder ( ) . set_statistics_enabled ( EnabledStatistics :: Page ) ,
493
+ false => WriterProperties :: builder ( ) . set_statistics_enabled ( EnabledStatistics :: Chunk ) ,
464
494
} ;
465
495
496
+ // tune the size or pages to the data above
497
+ // to make sure we exercise code paths where all items in a page are null, etc.
498
+ let writer_props = writer_props_builder
499
+ . set_max_row_group_size ( 4 )
500
+ . set_data_page_row_count_limit ( 2 )
501
+ . set_write_batch_size ( 2 )
502
+ . build ( ) ;
503
+
466
504
let mut writer = ArrowWriter :: try_new ( & mut buf, schema, Some ( writer_props) ) . unwrap ( ) ;
467
505
writer. write ( & batch) . unwrap ( ) ;
468
506
writer. close ( ) . unwrap ( ) ;
@@ -610,6 +648,29 @@ mod tests {
610
648
decoded_metadata. num_row_groups( )
611
649
) ;
612
650
651
+ // check that the mins and maxes are what we expect for each page
652
+ // also indirectly checking that the pages were written out as we expected them to be laid out
653
+ // (if they're not, or something gets refactored in the future that breaks that assumption,
654
+ // this test may have to drop down to a lower level and create metadata directly instead of relying on
655
+ // writing an entire file)
656
+ let column_indexes = metadata. metadata . column_index ( ) . unwrap ( ) ;
657
+ assert_eq ! ( column_indexes. len( ) , 6 ) ;
658
+ // make sure each row group has 2 pages by checking the first column
659
+ // page counts for each column for each row group, should all be the same and there should be
660
+ // 12 pages in total across 6 row groups / 1 column
661
+ let mut page_counts = vec ! [ ] ;
662
+ for row_group in column_indexes {
663
+ for column in row_group {
664
+ match column {
665
+ Index :: INT32 ( column_index) => {
666
+ page_counts. push ( column_index. indexes . len ( ) ) ;
667
+ }
668
+ _ => panic ! ( "unexpected column index type" ) ,
669
+ }
670
+ }
671
+ }
672
+ assert_eq ! ( page_counts, vec![ 2 ; 6 ] ) ;
673
+
613
674
metadata
614
675
. metadata
615
676
. row_groups ( )
0 commit comments