Skip to content

Commit 27e77ad

Browse files
committed
Handle when a file is encrypted but encryption is disabled or no decryption properties are provided
1 parent 9030f3a commit 27e77ad

File tree

4 files changed

+115
-30
lines changed

4 files changed

+115
-30
lines changed

parquet/src/arrow/async_reader/metadata.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ impl<F: MetadataFetch> MetadataLoader<F> {
113113
let mut footer = [0; FOOTER_SIZE];
114114
footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
115115

116-
let length = ParquetMetaDataReader::decode_footer(&footer)?;
116+
let footer = ParquetMetaDataReader::decode_footer_tail(&footer)?;
117+
let length = footer.metadata_length();
117118

118119
if file_size < length + FOOTER_SIZE {
119120
return Err(ParquetError::EOF(format!(
@@ -127,13 +128,28 @@ impl<F: MetadataFetch> MetadataLoader<F> {
127128
let (metadata, remainder) = if length > suffix_len - FOOTER_SIZE {
128129
let metadata_start = file_size - length - FOOTER_SIZE;
129130
let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?;
130-
(ParquetMetaDataReader::decode_metadata(&meta, None)?, None)
131+
(
132+
ParquetMetaDataReader::decode_metadata(
133+
&meta,
134+
#[cfg(feature = "encryption")]
135+
None,
136+
#[cfg(feature = "encryption")]
137+
footer.encrypted_footer(),
138+
)?,
139+
None,
140+
)
131141
} else {
132142
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
133143

134144
let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
135145
(
136-
ParquetMetaDataReader::decode_metadata(slice, None)?,
146+
ParquetMetaDataReader::decode_metadata(
147+
slice,
148+
#[cfg(feature = "encryption")]
149+
None,
150+
#[cfg(feature = "encryption")]
151+
footer.encrypted_footer(),
152+
)?,
137153
Some((footer_start, suffix.slice(..metadata_start))),
138154
)
139155
};

parquet/src/arrow/async_reader/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,8 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
198198
let mut buf = [0_u8; FOOTER_SIZE];
199199
self.read_exact(&mut buf).await?;
200200

201-
let metadata_len = ParquetMetaDataReader::decode_footer(&buf)?;
201+
let footer = ParquetMetaDataReader::decode_footer_tail(&buf)?;
202+
let metadata_len = footer.metadata_length();
202203
self.seek(SeekFrom::End(-FOOTER_SIZE_I64 - metadata_len as i64))
203204
.await?;
204205

@@ -207,7 +208,11 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
207208

208209
// todo: use file_decryption_properties
209210
Ok(Arc::new(ParquetMetaDataReader::decode_metadata(
210-
&buf, None,
211+
&buf,
212+
#[cfg(feature = "encryption")]
213+
None,
214+
#[cfg(feature = "encryption")]
215+
footer.encrypted_footer(),
211216
)?))
212217
}
213218
.boxed()

parquet/src/file/footer.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,13 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
5959
///
6060
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
6161
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_metadata")]
62-
pub fn decode_metadata(
63-
buf: &[u8],
64-
#[cfg(feature = "encryption")] file_decryption_properties: Option<&FileDecryptionProperties>,
65-
) -> Result<ParquetMetaData> {
62+
pub fn decode_metadata(buf: &[u8]) -> Result<ParquetMetaData> {
6663
ParquetMetaDataReader::decode_metadata(
6764
buf,
6865
#[cfg(feature = "encryption")]
69-
file_decryption_properties,
66+
None,
67+
#[cfg(feature = "encryption")]
68+
false,
7069
)
7170
}
7271

@@ -81,7 +80,10 @@ pub fn decode_metadata(
8180
/// | len | 'PAR1' |
8281
/// +-----+--------+
8382
/// ```
84-
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_footer")]
83+
#[deprecated(
84+
since = "53.1.0",
85+
note = "Use ParquetMetaDataReader::decode_footer_tail"
86+
)]
8587
pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
86-
ParquetMetaDataReader::decode_footer(slice)
88+
ParquetMetaDataReader::decode_footer_tail(slice).map(|f| f.metadata_length())
8789
}

parquet/src/file/metadata/reader.rs

Lines changed: 80 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,26 @@ pub struct ParquetMetaDataReader {
7878
file_decryption_properties: Option<FileDecryptionProperties>,
7979
}
8080

81+
/// Describes how the footer metadata is stored
82+
///
83+
/// This is parsed from the last 8 bytes of the Parquet file
84+
pub struct FooterTail {
85+
metadata_length: usize,
86+
encrypted_footer: bool,
87+
}
88+
89+
impl FooterTail {
90+
/// The length of the footer metadata in bytes
91+
pub fn metadata_length(&self) -> usize {
92+
self.metadata_length
93+
}
94+
95+
/// Whether the footer metadata is encrypted
96+
pub fn encrypted_footer(&self) -> bool {
97+
self.encrypted_footer
98+
}
99+
}
100+
81101
impl ParquetMetaDataReader {
82102
/// Create a new [`ParquetMetaDataReader`]
83103
pub fn new() -> Self {
@@ -366,6 +386,7 @@ impl ParquetMetaDataReader {
366386
&mut fetch,
367387
file_size,
368388
self.get_prefetch_size(),
389+
#[cfg(feature = "encryption")]
369390
self.file_decryption_properties.as_ref(),
370391
)
371392
.await?;
@@ -520,7 +541,8 @@ impl ParquetMetaDataReader {
520541
.get_read(file_size - 8)?
521542
.read_exact(&mut footer)?;
522543

523-
let metadata_len = Self::decode_footer(&footer)?;
544+
let footer = Self::decode_footer_tail(&footer)?;
545+
let metadata_len = footer.metadata_length();
524546
let footer_metadata_len = FOOTER_SIZE + metadata_len;
525547
self.metadata_size = Some(footer_metadata_len);
526548

@@ -536,6 +558,8 @@ impl ParquetMetaDataReader {
536558
chunk_reader.get_bytes(start, metadata_len)?.as_ref(),
537559
#[cfg(feature = "encryption")]
538560
self.file_decryption_properties.as_ref(),
561+
#[cfg(feature = "encryption")]
562+
footer.encrypted_footer(),
539563
)
540564
}
541565

@@ -557,7 +581,9 @@ impl ParquetMetaDataReader {
557581
fetch: &mut F,
558582
file_size: usize,
559583
prefetch: usize,
560-
file_decryption_properties: Option<&FileDecryptionProperties>,
584+
#[cfg(feature = "encryption")] file_decryption_properties: Option<
585+
&FileDecryptionProperties,
586+
>,
561587
) -> Result<(ParquetMetaData, Option<(usize, Bytes)>)> {
562588
if file_size < FOOTER_SIZE {
563589
return Err(eof_err!("file size of {} is less than footer", file_size));
@@ -582,7 +608,8 @@ impl ParquetMetaDataReader {
582608
let mut footer = [0; FOOTER_SIZE];
583609
footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
584610

585-
let length = Self::decode_footer(&footer)?;
611+
let footer = Self::decode_footer_tail(&footer)?;
612+
let length = footer.metadata_length();
586613

587614
if file_size < length + FOOTER_SIZE {
588615
return Err(eof_err!(
@@ -597,22 +624,34 @@ impl ParquetMetaDataReader {
597624
let metadata_start = file_size - length - FOOTER_SIZE;
598625
let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?;
599626
Ok((
600-
Self::decode_metadata(&meta, file_decryption_properties)?,
627+
Self::decode_metadata(
628+
&meta,
629+
#[cfg(feature = "encryption")]
630+
file_decryption_properties,
631+
#[cfg(feature = "encryption")]
632+
footer.encrypted_footer(),
633+
)?,
601634
None,
602635
))
603636
} else {
604637
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
605638
let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
606639
Ok((
607-
Self::decode_metadata(slice, file_decryption_properties)?,
640+
Self::decode_metadata(
641+
slice,
642+
#[cfg(feature = "encryption")]
643+
file_decryption_properties,
644+
#[cfg(feature = "encryption")]
645+
footer.encrypted_footer(),
646+
)?,
608647
Some((footer_start, suffix.slice(..metadata_start))),
609648
))
610649
}
611650
}
612651

613-
/// Decodes the Parquet footer returning the metadata length in bytes
652+
/// Decodes the end of the Parquet footer
614653
///
615-
/// A parquet footer is 8 bytes long and has the following layout:
654+
/// There are 8 bytes at the end of the Parquet footer with the following layout:
616655
/// * 4 bytes for the metadata length
617656
/// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
618657
///
@@ -621,16 +660,28 @@ impl ParquetMetaDataReader {
621660
/// | len | 'PAR1' or 'PARE' |
622661
/// +-----+------------------+
623662
/// ```
624-
pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
625-
// check this is indeed a parquet file
626-
if slice[4..] != PARQUET_MAGIC && slice[4..] != PARQUET_MAGIC_ENCR_FOOTER {
663+
pub fn decode_footer_tail(slice: &[u8; FOOTER_SIZE]) -> Result<FooterTail> {
664+
let magic = &slice[4..];
665+
let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
666+
true
667+
} else if magic == PARQUET_MAGIC {
668+
false
669+
} else {
627670
return Err(general_err!("Invalid Parquet file. Corrupt footer"));
628-
}
629-
671+
};
630672
// get the metadata length from the footer
631673
let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap());
632-
// u32 won't be larger than usize in most cases
633-
Ok(metadata_len as usize)
674+
Ok(FooterTail {
675+
// u32 won't be larger than usize in most cases
676+
metadata_length: metadata_len as usize,
677+
encrypted_footer,
678+
})
679+
}
680+
681+
/// Decodes the Parquet footer, returning the metadata length in bytes
682+
#[deprecated(note = "use decode_footer_tail instead")]
683+
pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
684+
Self::decode_footer_tail(slice).map(|f| f.metadata_length)
634685
}
635686

636687
/// Decodes [`ParquetMetaData`] from the provided bytes.
@@ -645,18 +696,29 @@ impl ParquetMetaDataReader {
645696
#[cfg(feature = "encryption")] file_decryption_properties: Option<
646697
&FileDecryptionProperties,
647698
>,
699+
#[cfg(feature = "encryption")] encrypted_footer: bool,
648700
) -> Result<ParquetMetaData> {
649701
let mut prot = TCompactSliceInputProtocol::new(buf);
650702

703+
#[cfg(not(feature = "encryption"))]
704+
if encrypted_footer() {
705+
return Err(general_err!(
706+
"Parquet file has an encrypted footer but the encryption feature is disabled"
707+
));
708+
}
709+
651710
#[cfg(feature = "encryption")]
652711
let mut file_decryptor = None;
653712
#[cfg(feature = "encryption")]
654713
let decrypted_fmd_buf;
655714

656715
#[cfg(feature = "encryption")]
657-
if file_decryption_properties.is_some()
658-
&& file_decryption_properties.unwrap().has_footer_key()
659-
{
716+
if encrypted_footer {
717+
if file_decryption_properties.is_none() {
718+
return Err(general_err!("Parquet file has an encrypted footer but no decryption properties were provided"));
719+
};
720+
let file_decryption_properties = file_decryption_properties.unwrap();
721+
660722
let t_file_crypto_metadata: TFileCryptoMetaData =
661723
TFileCryptoMetaData::read_from_in_protocol(&mut prot)
662724
.map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?;
@@ -678,7 +740,7 @@ impl ParquetMetaDataReader {
678740
let aad_prefix: Vec<u8> = aes_gcm_algo.aad_prefix.unwrap_or_default();
679741

680742
file_decryptor = Some(FileDecryptor::new(
681-
file_decryption_properties.unwrap(),
743+
file_decryption_properties,
682744
aad_file_unique.clone(),
683745
aad_prefix.clone(),
684746
));

0 commit comments

Comments
 (0)