@@ -9,6 +9,7 @@ use arrow_array::StructArray as ArrowStructArray;
9
9
use futures_util:: Stream ;
10
10
use indicatif:: ProgressBar ;
11
11
use parquet:: arrow:: ParquetRecordBatchStreamBuilder ;
12
+ use parquet:: arrow:: arrow_reader:: { ArrowReaderMetadata , ArrowReaderOptions } ;
12
13
use pin_project:: pin_project;
13
14
use tokio:: fs:: File ;
14
15
use vortex:: arrays:: ChunkedArray ;
@@ -36,14 +37,19 @@ pub async fn exec_convert(input_path: impl AsRef<Path>, flags: Flags) -> VortexR
36
37
let wall_start = Instant :: now ( ) ;
37
38
38
39
let output_path = input_path. as_ref ( ) . with_extension ( "vortex" ) ;
39
- let file = File :: open ( input_path) . await ?;
40
+ let mut file = File :: open ( input_path) . await ?;
41
+
42
+ let metadata =
43
+ ArrowReaderMetadata :: load_async ( & mut file, ArrowReaderOptions :: default ( ) ) . await ?;
44
+ let has_root_level_nulls = metadata. parquet_schema ( ) . root_schema ( ) . is_optional ( ) ;
45
+
40
46
let mut reader = ParquetRecordBatchStreamBuilder :: new ( file) . await ?. build ( ) ?;
41
47
let mut chunks = Vec :: new ( ) ;
42
48
43
49
while let Some ( mut reader) = reader. next_row_group ( ) . await ? {
44
50
for batch in reader. by_ref ( ) {
45
51
let batch = ArrowStructArray :: from ( batch?) ;
46
- let next_chunk = ArrayRef :: from_arrow ( & batch, true ) ;
52
+ let next_chunk = ArrayRef :: from_arrow ( & batch, has_root_level_nulls ) ;
47
53
chunks. push ( next_chunk) ;
48
54
}
49
55
}
0 commit comments