@@ -268,12 +268,13 @@ mod tests {
268
268
Int64Array , Int8Array , LargeBinaryArray , LargeStringArray , RecordBatchReader ,
269
269
StringArray ,
270
270
} ,
271
+ buffer:: NullBuffer ,
271
272
compute:: concat_batches,
272
273
datatypes:: { DataType as ArrowDataType , Field , Schema } ,
273
274
} ;
274
275
use bytes:: Bytes ;
275
276
276
- use crate :: ArrowReaderBuilder ;
277
+ use crate :: { stripe :: Stripe , ArrowReaderBuilder } ;
277
278
278
279
use super :: * ;
279
280
@@ -474,4 +475,62 @@ mod tests {
474
475
let rows = roundtrip ( & [ batch1, batch2] ) ;
475
476
assert_eq ! ( expected_batch, rows[ 0 ] ) ;
476
477
}
478
+
479
+ #[ test]
480
+ fn test_empty_null_buffers ( ) {
481
+ // Create an ORC file with present streams, but which have no nulls.
482
+ // When this file is read then the resulting Arrow arrays show have
483
+ // NO null buffer, even though there is a present stream.
484
+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
485
+ "int64" ,
486
+ ArrowDataType :: Int64 ,
487
+ true ,
488
+ ) ] ) ) ;
489
+
490
+ // Array with null buffer but has no nulls
491
+ let array_empty_nulls = Arc :: new ( Int64Array :: from_iter_values_with_nulls (
492
+ vec ! [ 1 ] ,
493
+ Some ( NullBuffer :: from_iter ( vec ! [ true ] ) ) ,
494
+ ) ) ;
495
+ assert ! ( array_empty_nulls. nulls( ) . is_some( ) ) ;
496
+ assert ! ( array_empty_nulls. null_count( ) == 0 ) ;
497
+
498
+ let batch = RecordBatch :: try_new ( schema, vec ! [ array_empty_nulls] ) . unwrap ( ) ;
499
+
500
+ // Encoding to bytes
501
+ let mut f = vec ! [ ] ;
502
+ let mut writer = ArrowWriterBuilder :: new ( & mut f, batch. schema ( ) )
503
+ . try_build ( )
504
+ . unwrap ( ) ;
505
+ writer. write ( & batch) . unwrap ( ) ;
506
+ writer. close ( ) . unwrap ( ) ;
507
+ let mut f = Bytes :: from ( f) ;
508
+ let builder = ArrowReaderBuilder :: try_new ( f. clone ( ) ) . unwrap ( ) ;
509
+
510
+ // Ensure the ORC file we wrote indeed has a present stream
511
+ let stripe = Stripe :: new (
512
+ & mut f,
513
+ & builder. file_metadata ,
514
+ builder. file_metadata ( ) . root_data_type ( ) ,
515
+ & builder. file_metadata ( ) . stripe_metadatas ( ) [ 0 ] ,
516
+ )
517
+ . unwrap ( ) ;
518
+ assert_eq ! ( stripe. columns( ) . len( ) , 1 ) ;
519
+ // Make sure we're getting the right column
520
+ assert_eq ! ( stripe. columns( ) [ 0 ] . name( ) , "int64" ) ;
521
+ // Then check present stream
522
+ let present_stream = stripe
523
+ . stream_map ( )
524
+ . get_opt ( & stripe. columns ( ) [ 0 ] , proto:: stream:: Kind :: Present ) ;
525
+ assert ! ( present_stream. is_some( ) ) ;
526
+
527
+ // Decoding from bytes
528
+ let reader = builder. build ( ) ;
529
+ let rows = reader. collect :: < Result < Vec < _ > , _ > > ( ) . unwrap ( ) ;
530
+
531
+ assert_eq ! ( rows. len( ) , 1 ) ;
532
+ assert_eq ! ( rows[ 0 ] . num_columns( ) , 1 ) ;
533
+ // Ensure read array has no null buffer
534
+ assert ! ( rows[ 0 ] . column( 0 ) . nulls( ) . is_none( ) ) ;
535
+ }
477
536
}
0 commit comments