@@ -78,6 +78,26 @@ pub struct ParquetMetaDataReader {
78
78
file_decryption_properties : Option < FileDecryptionProperties > ,
79
79
}
80
80
81
+ /// Describes how the footer metadata is stored
82
+ ///
83
+ /// This is parsed from the last 8 bytes of the Parquet file
84
+ pub struct FooterTail {
85
+ metadata_length : usize ,
86
+ encrypted_footer : bool ,
87
+ }
88
+
89
+ impl FooterTail {
90
+ /// The length of the footer metadata in bytes
91
+ pub fn metadata_length ( & self ) -> usize {
92
+ self . metadata_length
93
+ }
94
+
95
+ /// Whether the footer metadata is encrypted
96
+ pub fn encrypted_footer ( & self ) -> bool {
97
+ self . encrypted_footer
98
+ }
99
+ }
100
+
81
101
impl ParquetMetaDataReader {
82
102
/// Create a new [`ParquetMetaDataReader`]
83
103
pub fn new ( ) -> Self {
@@ -366,6 +386,7 @@ impl ParquetMetaDataReader {
366
386
& mut fetch,
367
387
file_size,
368
388
self . get_prefetch_size ( ) ,
389
+ #[ cfg( feature = "encryption" ) ]
369
390
self . file_decryption_properties . as_ref ( ) ,
370
391
)
371
392
. await ?;
@@ -520,7 +541,8 @@ impl ParquetMetaDataReader {
520
541
. get_read ( file_size - 8 ) ?
521
542
. read_exact ( & mut footer) ?;
522
543
523
- let metadata_len = Self :: decode_footer ( & footer) ?;
544
+ let footer = Self :: decode_footer_tail ( & footer) ?;
545
+ let metadata_len = footer. metadata_length ( ) ;
524
546
let footer_metadata_len = FOOTER_SIZE + metadata_len;
525
547
self . metadata_size = Some ( footer_metadata_len) ;
526
548
@@ -534,6 +556,7 @@ impl ParquetMetaDataReader {
534
556
let start = file_size - footer_metadata_len as u64 ;
535
557
Self :: decode_metadata (
536
558
chunk_reader. get_bytes ( start, metadata_len) ?. as_ref ( ) ,
559
+ footer. encrypted_footer ( ) ,
537
560
#[ cfg( feature = "encryption" ) ]
538
561
self . file_decryption_properties . as_ref ( ) ,
539
562
)
@@ -557,7 +580,9 @@ impl ParquetMetaDataReader {
557
580
fetch : & mut F ,
558
581
file_size : usize ,
559
582
prefetch : usize ,
560
- file_decryption_properties : Option < & FileDecryptionProperties > ,
583
+ #[ cfg( feature = "encryption" ) ] file_decryption_properties : Option <
584
+ & FileDecryptionProperties ,
585
+ > ,
561
586
) -> Result < ( ParquetMetaData , Option < ( usize , Bytes ) > ) > {
562
587
if file_size < FOOTER_SIZE {
563
588
return Err ( eof_err ! ( "file size of {} is less than footer" , file_size) ) ;
@@ -582,7 +607,8 @@ impl ParquetMetaDataReader {
582
607
let mut footer = [ 0 ; FOOTER_SIZE ] ;
583
608
footer. copy_from_slice ( & suffix[ suffix_len - FOOTER_SIZE ..suffix_len] ) ;
584
609
585
- let length = Self :: decode_footer ( & footer) ?;
610
+ let footer = Self :: decode_footer_tail ( & footer) ?;
611
+ let length = footer. metadata_length ( ) ;
586
612
587
613
if file_size < length + FOOTER_SIZE {
588
614
return Err ( eof_err ! (
@@ -597,22 +623,32 @@ impl ParquetMetaDataReader {
597
623
let metadata_start = file_size - length - FOOTER_SIZE ;
598
624
let meta = fetch. fetch ( metadata_start..file_size - FOOTER_SIZE ) . await ?;
599
625
Ok ( (
600
- Self :: decode_metadata ( & meta, file_decryption_properties) ?,
626
+ Self :: decode_metadata (
627
+ & meta,
628
+ footer. encrypted_footer ( ) ,
629
+ #[ cfg( feature = "encryption" ) ]
630
+ file_decryption_properties,
631
+ ) ?,
601
632
None ,
602
633
) )
603
634
} else {
604
635
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
605
636
let slice = & suffix[ metadata_start..suffix_len - FOOTER_SIZE ] ;
606
637
Ok ( (
607
- Self :: decode_metadata ( slice, file_decryption_properties) ?,
638
+ Self :: decode_metadata (
639
+ slice,
640
+ footer. encrypted_footer ( ) ,
641
+ #[ cfg( feature = "encryption" ) ]
642
+ file_decryption_properties,
643
+ ) ?,
608
644
Some ( ( footer_start, suffix. slice ( ..metadata_start) ) ) ,
609
645
) )
610
646
}
611
647
}
612
648
613
- /// Decodes the Parquet footer returning the metadata length in bytes
649
+ /// Decodes the end of the Parquet footer
614
650
///
615
- /// A parquet footer is 8 bytes long and has the following layout:
651
+ /// There are 8 bytes at the end of the Parquet footer with the following layout:
616
652
/// * 4 bytes for the metadata length
617
653
/// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
618
654
///
@@ -621,16 +657,28 @@ impl ParquetMetaDataReader {
621
657
/// | len | 'PAR1' or 'PARE' |
622
658
/// +-----+------------------+
623
659
/// ```
624
- pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < usize > {
625
- // check this is indeed a parquet file
626
- if slice[ 4 ..] != PARQUET_MAGIC && slice[ 4 ..] != PARQUET_MAGIC_ENCR_FOOTER {
660
+ pub fn decode_footer_tail ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < FooterTail > {
661
+ let magic = & slice[ 4 ..] ;
662
+ let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
663
+ true
664
+ } else if magic == PARQUET_MAGIC {
665
+ false
666
+ } else {
627
667
return Err ( general_err ! ( "Invalid Parquet file. Corrupt footer" ) ) ;
628
- }
629
-
668
+ } ;
630
669
// get the metadata length from the footer
631
670
let metadata_len = u32:: from_le_bytes ( slice[ ..4 ] . try_into ( ) . unwrap ( ) ) ;
632
- // u32 won't be larger than usize in most cases
633
- Ok ( metadata_len as usize )
671
+ Ok ( FooterTail {
672
+ // u32 won't be larger than usize in most cases
673
+ metadata_length : metadata_len as usize ,
674
+ encrypted_footer,
675
+ } )
676
+ }
677
+
678
+ /// Decodes the Parquet footer, returning the metadata length in bytes
679
+ #[ deprecated( note = "use decode_footer_tail instead" ) ]
680
+ pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < usize > {
681
+ Self :: decode_footer_tail ( slice) . map ( |f| f. metadata_length )
634
682
}
635
683
636
684
/// Decodes [`ParquetMetaData`] from the provided bytes.
@@ -642,21 +690,32 @@ impl ParquetMetaDataReader {
642
690
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
643
691
pub fn decode_metadata (
644
692
buf : & [ u8 ] ,
693
+ encrypted_footer : bool ,
645
694
#[ cfg( feature = "encryption" ) ] file_decryption_properties : Option <
646
695
& FileDecryptionProperties ,
647
696
> ,
648
697
) -> Result < ParquetMetaData > {
649
698
let mut prot = TCompactSliceInputProtocol :: new ( buf) ;
650
699
700
+ #[ cfg( not( feature = "encryption" ) ) ]
701
+ if encrypted_footer {
702
+ return Err ( general_err ! (
703
+ "Parquet file has an encrypted footer but the encryption feature is disabled"
704
+ ) ) ;
705
+ }
706
+
651
707
#[ cfg( feature = "encryption" ) ]
652
708
let mut file_decryptor = None ;
653
709
#[ cfg( feature = "encryption" ) ]
654
710
let decrypted_fmd_buf;
655
711
656
712
#[ cfg( feature = "encryption" ) ]
657
- if file_decryption_properties. is_some ( )
658
- && file_decryption_properties. unwrap ( ) . has_footer_key ( )
659
- {
713
+ if encrypted_footer {
714
+ if file_decryption_properties. is_none ( ) {
715
+ return Err ( general_err ! ( "Parquet file has an encrypted footer but no decryption properties were provided" ) ) ;
716
+ } ;
717
+ let file_decryption_properties = file_decryption_properties. unwrap ( ) ;
718
+
660
719
let t_file_crypto_metadata: TFileCryptoMetaData =
661
720
TFileCryptoMetaData :: read_from_in_protocol ( & mut prot)
662
721
. map_err ( |e| general_err ! ( "Could not parse crypto metadata: {}" , e) ) ?;
@@ -678,7 +737,7 @@ impl ParquetMetaDataReader {
678
737
let aad_prefix: Vec < u8 > = aes_gcm_algo. aad_prefix . unwrap_or_default ( ) ;
679
738
680
739
file_decryptor = Some ( FileDecryptor :: new (
681
- file_decryption_properties. unwrap ( ) ,
740
+ file_decryption_properties,
682
741
aad_file_unique. clone ( ) ,
683
742
aad_prefix. clone ( ) ,
684
743
) ) ;
0 commit comments