Skip to content

Commit 7db06cc

Browse files
committed
Handle when a file is encrypted but encryption is disabled or no decryption properties are provided
1 parent 9030f3a commit 7db06cc

File tree

4 files changed

+109
-30
lines changed

4 files changed

+109
-30
lines changed

parquet/src/arrow/async_reader/metadata.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ impl<F: MetadataFetch> MetadataLoader<F> {
113113
let mut footer = [0; FOOTER_SIZE];
114114
footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
115115

116-
let length = ParquetMetaDataReader::decode_footer(&footer)?;
116+
let footer = ParquetMetaDataReader::decode_footer_tail(&footer)?;
117+
let length = footer.metadata_length();
117118

118119
if file_size < length + FOOTER_SIZE {
119120
return Err(ParquetError::EOF(format!(
@@ -127,13 +128,26 @@ impl<F: MetadataFetch> MetadataLoader<F> {
127128
let (metadata, remainder) = if length > suffix_len - FOOTER_SIZE {
128129
let metadata_start = file_size - length - FOOTER_SIZE;
129130
let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?;
130-
(ParquetMetaDataReader::decode_metadata(&meta, None)?, None)
131+
(
132+
ParquetMetaDataReader::decode_metadata(
133+
&meta,
134+
footer.encrypted_footer(),
135+
#[cfg(feature = "encryption")]
136+
None,
137+
)?,
138+
None,
139+
)
131140
} else {
132141
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
133142

134143
let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
135144
(
136-
ParquetMetaDataReader::decode_metadata(slice, None)?,
145+
ParquetMetaDataReader::decode_metadata(
146+
slice,
147+
footer.encrypted_footer(),
148+
#[cfg(feature = "encryption")]
149+
None,
150+
)?,
137151
Some((footer_start, suffix.slice(..metadata_start))),
138152
)
139153
};

parquet/src/arrow/async_reader/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,8 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
198198
let mut buf = [0_u8; FOOTER_SIZE];
199199
self.read_exact(&mut buf).await?;
200200

201-
let metadata_len = ParquetMetaDataReader::decode_footer(&buf)?;
201+
let footer = ParquetMetaDataReader::decode_footer_tail(&buf)?;
202+
let metadata_len = footer.metadata_length();
202203
self.seek(SeekFrom::End(-FOOTER_SIZE_I64 - metadata_len as i64))
203204
.await?;
204205

@@ -207,7 +208,10 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
207208

208209
// todo: use file_decryption_properties
209210
Ok(Arc::new(ParquetMetaDataReader::decode_metadata(
210-
&buf, None,
211+
&buf,
212+
footer.encrypted_footer(),
213+
#[cfg(feature = "encryption")]
214+
None,
211215
)?))
212216
}
213217
.boxed()
@@ -842,6 +846,7 @@ impl RowGroups for InMemoryRowGroup<'_> {
842846
self.metadata.column(i),
843847
self.row_count,
844848
page_locations,
849+
#[cfg(feature = "encryption")]
845850
None,
846851
)?);
847852

parquet/src/file/footer.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,12 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
5959
///
6060
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
6161
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_metadata")]
62-
pub fn decode_metadata(
63-
buf: &[u8],
64-
#[cfg(feature = "encryption")] file_decryption_properties: Option<&FileDecryptionProperties>,
65-
) -> Result<ParquetMetaData> {
62+
pub fn decode_metadata(buf: &[u8]) -> Result<ParquetMetaData> {
6663
ParquetMetaDataReader::decode_metadata(
6764
buf,
65+
false,
6866
#[cfg(feature = "encryption")]
69-
file_decryption_properties,
67+
None,
7068
)
7169
}
7270

@@ -81,7 +79,10 @@ pub fn decode_metadata(
8179
/// | len | 'PAR1' |
8280
/// +-----+--------+
8381
/// ```
84-
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_footer")]
82+
#[deprecated(
83+
since = "53.1.0",
84+
note = "Use ParquetMetaDataReader::decode_footer_tail"
85+
)]
8586
pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
86-
ParquetMetaDataReader::decode_footer(slice)
87+
ParquetMetaDataReader::decode_footer_tail(slice).map(|f| f.metadata_length())
8788
}

parquet/src/file/metadata/reader.rs

Lines changed: 77 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,26 @@ pub struct ParquetMetaDataReader {
7878
file_decryption_properties: Option<FileDecryptionProperties>,
7979
}
8080

81+
/// Describes how the footer metadata is stored
82+
///
83+
/// This is parsed from the last 8 bytes of the Parquet file
84+
pub struct FooterTail {
85+
metadata_length: usize,
86+
encrypted_footer: bool,
87+
}
88+
89+
impl FooterTail {
90+
/// The length of the footer metadata in bytes
91+
pub fn metadata_length(&self) -> usize {
92+
self.metadata_length
93+
}
94+
95+
/// Whether the footer metadata is encrypted
96+
pub fn encrypted_footer(&self) -> bool {
97+
self.encrypted_footer
98+
}
99+
}
100+
81101
impl ParquetMetaDataReader {
82102
/// Create a new [`ParquetMetaDataReader`]
83103
pub fn new() -> Self {
@@ -366,6 +386,7 @@ impl ParquetMetaDataReader {
366386
&mut fetch,
367387
file_size,
368388
self.get_prefetch_size(),
389+
#[cfg(feature = "encryption")]
369390
self.file_decryption_properties.as_ref(),
370391
)
371392
.await?;
@@ -520,7 +541,8 @@ impl ParquetMetaDataReader {
520541
.get_read(file_size - 8)?
521542
.read_exact(&mut footer)?;
522543

523-
let metadata_len = Self::decode_footer(&footer)?;
544+
let footer = Self::decode_footer_tail(&footer)?;
545+
let metadata_len = footer.metadata_length();
524546
let footer_metadata_len = FOOTER_SIZE + metadata_len;
525547
self.metadata_size = Some(footer_metadata_len);
526548

@@ -534,6 +556,7 @@ impl ParquetMetaDataReader {
534556
let start = file_size - footer_metadata_len as u64;
535557
Self::decode_metadata(
536558
chunk_reader.get_bytes(start, metadata_len)?.as_ref(),
559+
footer.encrypted_footer(),
537560
#[cfg(feature = "encryption")]
538561
self.file_decryption_properties.as_ref(),
539562
)
@@ -557,7 +580,9 @@ impl ParquetMetaDataReader {
557580
fetch: &mut F,
558581
file_size: usize,
559582
prefetch: usize,
560-
file_decryption_properties: Option<&FileDecryptionProperties>,
583+
#[cfg(feature = "encryption")] file_decryption_properties: Option<
584+
&FileDecryptionProperties,
585+
>,
561586
) -> Result<(ParquetMetaData, Option<(usize, Bytes)>)> {
562587
if file_size < FOOTER_SIZE {
563588
return Err(eof_err!("file size of {} is less than footer", file_size));
@@ -582,7 +607,8 @@ impl ParquetMetaDataReader {
582607
let mut footer = [0; FOOTER_SIZE];
583608
footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
584609

585-
let length = Self::decode_footer(&footer)?;
610+
let footer = Self::decode_footer_tail(&footer)?;
611+
let length = footer.metadata_length();
586612

587613
if file_size < length + FOOTER_SIZE {
588614
return Err(eof_err!(
@@ -597,22 +623,32 @@ impl ParquetMetaDataReader {
597623
let metadata_start = file_size - length - FOOTER_SIZE;
598624
let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?;
599625
Ok((
600-
Self::decode_metadata(&meta, file_decryption_properties)?,
626+
Self::decode_metadata(
627+
&meta,
628+
footer.encrypted_footer(),
629+
#[cfg(feature = "encryption")]
630+
file_decryption_properties,
631+
)?,
601632
None,
602633
))
603634
} else {
604635
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
605636
let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
606637
Ok((
607-
Self::decode_metadata(slice, file_decryption_properties)?,
638+
Self::decode_metadata(
639+
slice,
640+
footer.encrypted_footer(),
641+
#[cfg(feature = "encryption")]
642+
file_decryption_properties,
643+
)?,
608644
Some((footer_start, suffix.slice(..metadata_start))),
609645
))
610646
}
611647
}
612648

613-
/// Decodes the Parquet footer returning the metadata length in bytes
649+
/// Decodes the end of the Parquet footer
614650
///
615-
/// A parquet footer is 8 bytes long and has the following layout:
651+
/// There are 8 bytes at the end of the Parquet footer with the following layout:
616652
/// * 4 bytes for the metadata length
617653
/// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
618654
///
@@ -621,16 +657,28 @@ impl ParquetMetaDataReader {
621657
/// | len | 'PAR1' or 'PARE' |
622658
/// +-----+------------------+
623659
/// ```
624-
pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
625-
// check this is indeed a parquet file
626-
if slice[4..] != PARQUET_MAGIC && slice[4..] != PARQUET_MAGIC_ENCR_FOOTER {
660+
pub fn decode_footer_tail(slice: &[u8; FOOTER_SIZE]) -> Result<FooterTail> {
661+
let magic = &slice[4..];
662+
let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
663+
true
664+
} else if magic == PARQUET_MAGIC {
665+
false
666+
} else {
627667
return Err(general_err!("Invalid Parquet file. Corrupt footer"));
628-
}
629-
668+
};
630669
// get the metadata length from the footer
631670
let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap());
632-
// u32 won't be larger than usize in most cases
633-
Ok(metadata_len as usize)
671+
Ok(FooterTail {
672+
// u32 won't be larger than usize in most cases
673+
metadata_length: metadata_len as usize,
674+
encrypted_footer,
675+
})
676+
}
677+
678+
/// Decodes the Parquet footer, returning the metadata length in bytes
679+
#[deprecated(note = "use decode_footer_tail instead")]
680+
pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
681+
Self::decode_footer_tail(slice).map(|f| f.metadata_length)
634682
}
635683

636684
/// Decodes [`ParquetMetaData`] from the provided bytes.
@@ -642,21 +690,32 @@ impl ParquetMetaDataReader {
642690
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
643691
pub fn decode_metadata(
644692
buf: &[u8],
693+
encrypted_footer: bool,
645694
#[cfg(feature = "encryption")] file_decryption_properties: Option<
646695
&FileDecryptionProperties,
647696
>,
648697
) -> Result<ParquetMetaData> {
649698
let mut prot = TCompactSliceInputProtocol::new(buf);
650699

700+
#[cfg(not(feature = "encryption"))]
701+
if encrypted_footer {
702+
return Err(general_err!(
703+
"Parquet file has an encrypted footer but the encryption feature is disabled"
704+
));
705+
}
706+
651707
#[cfg(feature = "encryption")]
652708
let mut file_decryptor = None;
653709
#[cfg(feature = "encryption")]
654710
let decrypted_fmd_buf;
655711

656712
#[cfg(feature = "encryption")]
657-
if file_decryption_properties.is_some()
658-
&& file_decryption_properties.unwrap().has_footer_key()
659-
{
713+
if encrypted_footer {
714+
if file_decryption_properties.is_none() {
715+
return Err(general_err!("Parquet file has an encrypted footer but no decryption properties were provided"));
716+
};
717+
let file_decryption_properties = file_decryption_properties.unwrap();
718+
660719
let t_file_crypto_metadata: TFileCryptoMetaData =
661720
TFileCryptoMetaData::read_from_in_protocol(&mut prot)
662721
.map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?;
@@ -678,7 +737,7 @@ impl ParquetMetaDataReader {
678737
let aad_prefix: Vec<u8> = aes_gcm_algo.aad_prefix.unwrap_or_default();
679738

680739
file_decryptor = Some(FileDecryptor::new(
681-
file_decryption_properties.unwrap(),
740+
file_decryption_properties,
682741
aad_file_unique.clone(),
683742
aad_prefix.clone(),
684743
));

0 commit comments

Comments
 (0)