@@ -78,6 +78,26 @@ pub struct ParquetMetaDataReader {
78
78
file_decryption_properties : Option < FileDecryptionProperties > ,
79
79
}
80
80
81
+ /// Describes how the footer metadata is stored
82
+ ///
83
+ /// This is parsed from the last 8 bytes of the Parquet file
84
+ pub struct FooterTail {
85
+ metadata_length : usize ,
86
+ encrypted_footer : bool ,
87
+ }
88
+
89
+ impl FooterTail {
90
+ /// The length of the footer metadata in bytes
91
+ pub fn metadata_length ( & self ) -> usize {
92
+ self . metadata_length
93
+ }
94
+
95
+ /// Whether the footer metadata is encrypted
96
+ pub fn encrypted_footer ( & self ) -> bool {
97
+ self . encrypted_footer
98
+ }
99
+ }
100
+
81
101
impl ParquetMetaDataReader {
82
102
/// Create a new [`ParquetMetaDataReader`]
83
103
pub fn new ( ) -> Self {
@@ -366,6 +386,7 @@ impl ParquetMetaDataReader {
366
386
& mut fetch,
367
387
file_size,
368
388
self . get_prefetch_size ( ) ,
389
+ #[ cfg( feature = "encryption" ) ]
369
390
self . file_decryption_properties . as_ref ( ) ,
370
391
)
371
392
. await ?;
@@ -520,7 +541,8 @@ impl ParquetMetaDataReader {
520
541
. get_read ( file_size - 8 ) ?
521
542
. read_exact ( & mut footer) ?;
522
543
523
- let metadata_len = Self :: decode_footer ( & footer) ?;
544
+ let footer = Self :: decode_footer_tail ( & footer) ?;
545
+ let metadata_len = footer. metadata_length ( ) ;
524
546
let footer_metadata_len = FOOTER_SIZE + metadata_len;
525
547
self . metadata_size = Some ( footer_metadata_len) ;
526
548
@@ -536,6 +558,8 @@ impl ParquetMetaDataReader {
536
558
chunk_reader. get_bytes ( start, metadata_len) ?. as_ref ( ) ,
537
559
#[ cfg( feature = "encryption" ) ]
538
560
self . file_decryption_properties . as_ref ( ) ,
561
+ #[ cfg( feature = "encryption" ) ]
562
+ footer. encrypted_footer ( ) ,
539
563
)
540
564
}
541
565
@@ -557,7 +581,9 @@ impl ParquetMetaDataReader {
557
581
fetch : & mut F ,
558
582
file_size : usize ,
559
583
prefetch : usize ,
560
- file_decryption_properties : Option < & FileDecryptionProperties > ,
584
+ #[ cfg( feature = "encryption" ) ] file_decryption_properties : Option <
585
+ & FileDecryptionProperties ,
586
+ > ,
561
587
) -> Result < ( ParquetMetaData , Option < ( usize , Bytes ) > ) > {
562
588
if file_size < FOOTER_SIZE {
563
589
return Err ( eof_err ! ( "file size of {} is less than footer" , file_size) ) ;
@@ -582,7 +608,8 @@ impl ParquetMetaDataReader {
582
608
let mut footer = [ 0 ; FOOTER_SIZE ] ;
583
609
footer. copy_from_slice ( & suffix[ suffix_len - FOOTER_SIZE ..suffix_len] ) ;
584
610
585
- let length = Self :: decode_footer ( & footer) ?;
611
+ let footer = Self :: decode_footer_tail ( & footer) ?;
612
+ let length = footer. metadata_length ( ) ;
586
613
587
614
if file_size < length + FOOTER_SIZE {
588
615
return Err ( eof_err ! (
@@ -597,22 +624,34 @@ impl ParquetMetaDataReader {
597
624
let metadata_start = file_size - length - FOOTER_SIZE ;
598
625
let meta = fetch. fetch ( metadata_start..file_size - FOOTER_SIZE ) . await ?;
599
626
Ok ( (
600
- Self :: decode_metadata ( & meta, file_decryption_properties) ?,
627
+ Self :: decode_metadata (
628
+ & meta,
629
+ #[ cfg( feature = "encryption" ) ]
630
+ file_decryption_properties,
631
+ #[ cfg( feature = "encryption" ) ]
632
+ footer. encrypted_footer ( ) ,
633
+ ) ?,
601
634
None ,
602
635
) )
603
636
} else {
604
637
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
605
638
let slice = & suffix[ metadata_start..suffix_len - FOOTER_SIZE ] ;
606
639
Ok ( (
607
- Self :: decode_metadata ( slice, file_decryption_properties) ?,
640
+ Self :: decode_metadata (
641
+ slice,
642
+ #[ cfg( feature = "encryption" ) ]
643
+ file_decryption_properties,
644
+ #[ cfg( feature = "encryption" ) ]
645
+ footer. encrypted_footer ( ) ,
646
+ ) ?,
608
647
Some ( ( footer_start, suffix. slice ( ..metadata_start) ) ) ,
609
648
) )
610
649
}
611
650
}
612
651
613
- /// Decodes the Parquet footer returning the metadata length in bytes
652
+ /// Decodes the end of the Parquet footer
614
653
///
615
- /// A parquet footer is 8 bytes long and has the following layout:
654
+ /// There are 8 bytes at the end of the Parquet footer with the following layout:
616
655
/// * 4 bytes for the metadata length
617
656
/// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
618
657
///
@@ -621,16 +660,28 @@ impl ParquetMetaDataReader {
621
660
/// | len | 'PAR1' or 'PARE' |
622
661
/// +-----+------------------+
623
662
/// ```
624
- pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < usize > {
625
- // check this is indeed a parquet file
626
- if slice[ 4 ..] != PARQUET_MAGIC && slice[ 4 ..] != PARQUET_MAGIC_ENCR_FOOTER {
663
+ pub fn decode_footer_tail ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < FooterTail > {
664
+ let magic = & slice[ 4 ..] ;
665
+ let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
666
+ true
667
+ } else if magic == PARQUET_MAGIC {
668
+ false
669
+ } else {
627
670
return Err ( general_err ! ( "Invalid Parquet file. Corrupt footer" ) ) ;
628
- }
629
-
671
+ } ;
630
672
// get the metadata length from the footer
631
673
let metadata_len = u32:: from_le_bytes ( slice[ ..4 ] . try_into ( ) . unwrap ( ) ) ;
632
- // u32 won't be larger than usize in most cases
633
- Ok ( metadata_len as usize )
674
+ Ok ( FooterTail {
675
+ // u32 won't be larger than usize in most cases
676
+ metadata_length : metadata_len as usize ,
677
+ encrypted_footer,
678
+ } )
679
+ }
680
+
681
+ /// Decodes the Parquet footer, returning the metadata length in bytes
682
+ #[ deprecated( note = "use decode_footer_tail instead" ) ]
683
+ pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < usize > {
684
+ Self :: decode_footer_tail ( slice) . map ( |f| f. metadata_length )
634
685
}
635
686
636
687
/// Decodes [`ParquetMetaData`] from the provided bytes.
@@ -645,18 +696,29 @@ impl ParquetMetaDataReader {
645
696
#[ cfg( feature = "encryption" ) ] file_decryption_properties : Option <
646
697
& FileDecryptionProperties ,
647
698
> ,
699
+ #[ cfg( feature = "encryption" ) ] encrypted_footer : bool ,
648
700
) -> Result < ParquetMetaData > {
649
701
let mut prot = TCompactSliceInputProtocol :: new ( buf) ;
650
702
703
+ #[ cfg( not( feature = "encryption" ) ) ]
704
+ if encrypted_footer ( ) {
705
+ return Err ( general_err ! (
706
+ "Parquet file has an encrypted footer but the encryption feature is disabled"
707
+ ) ) ;
708
+ }
709
+
651
710
#[ cfg( feature = "encryption" ) ]
652
711
let mut file_decryptor = None ;
653
712
#[ cfg( feature = "encryption" ) ]
654
713
let decrypted_fmd_buf;
655
714
656
715
#[ cfg( feature = "encryption" ) ]
657
- if file_decryption_properties. is_some ( )
658
- && file_decryption_properties. unwrap ( ) . has_footer_key ( )
659
- {
716
+ if encrypted_footer {
717
+ if file_decryption_properties. is_none ( ) {
718
+ return Err ( general_err ! ( "Parquet file has an encrypted footer but no decryption properties were provided" ) ) ;
719
+ } ;
720
+ let file_decryption_properties = file_decryption_properties. unwrap ( ) ;
721
+
660
722
let t_file_crypto_metadata: TFileCryptoMetaData =
661
723
TFileCryptoMetaData :: read_from_in_protocol ( & mut prot)
662
724
. map_err ( |e| general_err ! ( "Could not parse crypto metadata: {}" , e) ) ?;
@@ -678,7 +740,7 @@ impl ParquetMetaDataReader {
678
740
let aad_prefix: Vec < u8 > = aes_gcm_algo. aad_prefix . unwrap_or_default ( ) ;
679
741
680
742
file_decryptor = Some ( FileDecryptor :: new (
681
- file_decryption_properties. unwrap ( ) ,
743
+ file_decryption_properties,
682
744
aad_file_unique. clone ( ) ,
683
745
aad_prefix. clone ( ) ,
684
746
) ) ;
0 commit comments