@@ -81,6 +81,7 @@ use std::{
81
81
use arrow_csv:: ReaderBuilder ;
82
82
use arrow_schema:: { ArrowError , Schema } ;
83
83
use clap:: { Parser , ValueEnum } ;
84
+ use parquet:: arrow:: arrow_writer:: ArrowWriterOptions ;
84
85
use parquet:: {
85
86
arrow:: { parquet_to_arrow_schema, ArrowWriter } ,
86
87
basic:: Compression ,
@@ -333,21 +334,16 @@ fn configure_reader_builder(args: &Args, arrow_schema: Arc<Schema>) -> ReaderBui
333
334
builder
334
335
}
335
336
336
- fn arrow_schema_from_string ( schema : & str ) -> Result < Arc < Schema > , ParquetFromCsvError > {
337
- let schema = Arc :: new ( parse_message_type ( schema) ?) ;
338
- let desc = SchemaDescriptor :: new ( schema) ;
339
- let arrow_schema = Arc :: new ( parquet_to_arrow_schema ( & desc, None ) ?) ;
340
- Ok ( arrow_schema)
341
- }
342
-
343
337
fn convert_csv_to_parquet ( args : & Args ) -> Result < ( ) , ParquetFromCsvError > {
344
338
let schema = read_to_string ( args. schema_path ( ) ) . map_err ( |e| {
345
339
ParquetFromCsvError :: with_context (
346
340
e,
347
341
& format ! ( "Failed to open schema file {:#?}" , args. schema_path( ) ) ,
348
342
)
349
343
} ) ?;
350
- let arrow_schema = arrow_schema_from_string ( & schema) ?;
344
+ let parquet_schema = Arc :: new ( parse_message_type ( & schema) ?) ;
345
+ let desc = SchemaDescriptor :: new ( parquet_schema) ;
346
+ let arrow_schema = Arc :: new ( parquet_to_arrow_schema ( & desc, None ) ?) ;
351
347
352
348
// create output parquet writer
353
349
let parquet_file = File :: create ( & args. output_file ) . map_err ( |e| {
@@ -357,9 +353,12 @@ fn convert_csv_to_parquet(args: &Args) -> Result<(), ParquetFromCsvError> {
357
353
)
358
354
} ) ?;
359
355
360
- let writer_properties = Some ( configure_writer_properties ( args) ) ;
356
+ let options = ArrowWriterOptions :: new ( )
357
+ . with_properties ( configure_writer_properties ( args) )
358
+ . with_schema_root ( desc. name ( ) . to_string ( ) ) ;
359
+
361
360
let mut arrow_writer =
362
- ArrowWriter :: try_new ( parquet_file, arrow_schema. clone ( ) , writer_properties )
361
+ ArrowWriter :: try_new_with_options ( parquet_file, arrow_schema. clone ( ) , options )
363
362
. map_err ( |e| ParquetFromCsvError :: with_context ( e, "Failed to create ArrowWriter" ) ) ?;
364
363
365
364
// open input file
@@ -426,6 +425,7 @@ mod tests {
426
425
use clap:: { CommandFactory , Parser } ;
427
426
use flate2:: write:: GzEncoder ;
428
427
use parquet:: basic:: { BrotliLevel , GzipLevel , ZstdLevel } ;
428
+ use parquet:: file:: reader:: { FileReader , SerializedFileReader } ;
429
429
use snap:: write:: FrameEncoder ;
430
430
use tempfile:: NamedTempFile ;
431
431
@@ -647,7 +647,7 @@ mod tests {
647
647
648
648
fn test_convert_compressed_csv_to_parquet ( csv_compression : Compression ) {
649
649
let schema = NamedTempFile :: new ( ) . unwrap ( ) ;
650
- let schema_text = r"message schema {
650
+ let schema_text = r"message my_amazing_schema {
651
651
optional int32 id;
652
652
optional binary name (STRING);
653
653
}" ;
@@ -728,6 +728,10 @@ mod tests {
728
728
help : None ,
729
729
} ;
730
730
convert_csv_to_parquet ( & args) . unwrap ( ) ;
731
+
732
+ let file = SerializedFileReader :: new ( output_parquet. into_file ( ) ) . unwrap ( ) ;
733
+ let schema_name = file. metadata ( ) . file_metadata ( ) . schema ( ) . name ( ) ;
734
+ assert_eq ! ( schema_name, "my_amazing_schema" ) ;
731
735
}
732
736
733
737
#[ test]
0 commit comments