From b059926f9cac6abb303d44eb28d4ce6ba4502a63 Mon Sep 17 00:00:00 2001 From: Sam Hughes Date: Tue, 10 Sep 2024 22:54:30 -0700 Subject: [PATCH] feat: add ParquetEncryptionMode, to permit unencrypted files in initial key rotation --- parquet/src/file/encryption.rs | 23 +++++++++++++---- parquet/src/file/footer.rs | 47 ++++++++++++++++++++++++---------- parquet/src/file/writer.rs | 8 +++--- 3 files changed, 56 insertions(+), 22 deletions(-) diff --git a/parquet/src/file/encryption.rs b/parquet/src/file/encryption.rs index 3680db28ba3f..b1f41f0fdf48 100644 --- a/parquet/src/file/encryption.rs +++ b/parquet/src/file/encryption.rs @@ -38,16 +38,29 @@ pub struct ParquetEncryptionKeyInfo { pub key: ParquetEncryptionKey, } +/// Tells what mode (and also the key value(s)) a file is to be encrypted in (when writing) or is +/// permitted to be encrypted in (when reading). +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum ParquetEncryptionMode { + /// Means the file is unencrypted + Unencrypted, + /// Means the file is footer-encrypted -- well, fully-encrypted. The same key is used for all + /// the columns too, in this implementation. + FooterEncrypted(ParquetEncryptionKeyInfo), +} + /// Describes general parquet encryption configuration -- new files are encrypted with the /// write_key(), but old files can be decrypted with any of the valid read keys. #[derive(Serialize, Deserialize, Debug, Clone)] pub struct ParquetEncryptionConfig { - // The last key is the write key, and all the keys are valid read keys. - keys: Vec, + // The last mode is the write mode (i.e. it has the write key), and all the prior modes are + // valid read modes (i.e. valid read keys, or Unencrypted mode, if a user turned on encryption + // but hasn't key-rotated unencrypted files away yet). + keys: Vec, } impl ParquetEncryptionConfig { - pub fn new(keys: Vec) -> Option { + pub fn new(keys: Vec) -> Option { if keys.is_empty() { None } else { @@ -55,11 +68,11 @@ impl ParquetEncryptionConfig { } } - pub fn write_key(&self) -> &ParquetEncryptionKeyInfo { + pub fn write_key(&self) -> &ParquetEncryptionMode { self.keys.last().unwrap() } - pub fn read_keys(&self) -> &[ParquetEncryptionKeyInfo] { + pub fn read_keys(&self) -> &[ParquetEncryptionMode] { self.keys.as_slice() } } diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs index 90a167615608..a8a861479a16 100644 --- a/parquet/src/file/footer.rs +++ b/parquet/src/file/footer.rs @@ -35,7 +35,10 @@ use crate::file::{ use crate::schema::types::{self, SchemaDescriptor}; -use crate::file::{encryption::{decrypt_module, parquet_magic, ParquetEncryptionConfig, PARQUET_KEY_HASH_LENGTH, ParquetEncryptionKey, ParquetEncryptionKeyInfo, RandomFileIdentifier, AAD_FILE_UNIQUE_SIZE}, PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE, PARQUET_MAGIC_UNSUPPORTED_PARE}; +use crate::file::{encryption::{decrypt_module, ParquetEncryptionConfig, ParquetEncryptionMode, + ParquetEncryptionKey, + PARQUET_KEY_HASH_LENGTH, RandomFileIdentifier, AAD_FILE_UNIQUE_SIZE}, + PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE, PARQUET_MAGIC_UNSUPPORTED_PARE}; fn select_key(encryption_config: &ParquetEncryptionConfig, key_metadata: &Option>) -> Result { if let Some(key_id) = key_metadata { @@ -44,10 +47,15 @@ fn select_key(encryption_config: &ParquetEncryptionConfig, key_metadata: &Option } let mut key_id_arr = [0u8; PARQUET_KEY_HASH_LENGTH]; key_id_arr.copy_from_slice(&key_id); - let read_keys: &[ParquetEncryptionKeyInfo] = encryption_config.read_keys(); - for key_info in read_keys { - if key_info.key.compute_key_hash() == key_id_arr { - return Ok(key_info.key) + let read_keys: &[ParquetEncryptionMode] = encryption_config.read_keys(); + for mode in read_keys { + match mode { + ParquetEncryptionMode::Unencrypted => { }, + ParquetEncryptionMode::FooterEncrypted(key_info) => { + if key_info.key.compute_key_hash() == key_id_arr { + return Ok(key_info.key) + } + } } } return Err(general_err!("Parquet file is encrypted with an unknown or out-of-rotation key")); @@ -81,18 +89,28 @@ pub fn parse_metadata(chunk_reader: &R, encryption_config: &Opti default_end_reader.read_exact(&mut default_len_end_buf)?; // check this is indeed a parquet file + let encrypted_footer: bool; { + // and check that its encryption setting conceivably matches our encryption_config (but without yet checking keys) let trailing_magic: &[u8] = &default_len_end_buf[default_end_len - 4..]; - if trailing_magic != parquet_magic(encryption_config.is_some()) { - if trailing_magic == PARQUET_MAGIC { - return Err(general_err!("Invalid Parquet file in encrypted mode. File (or at least the Parquet footer) is not encrypted")); - } else if trailing_magic == PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE { + if trailing_magic == PARQUET_MAGIC { + if let Some(config) = encryption_config { + if !config.read_keys().iter().any(|m| matches!(m, ParquetEncryptionMode::Unencrypted)) { + return Err(general_err!("Invalid Parquet file in encrypted mode. File (or at least the Parquet footer) is not encrypted")); + } + } + encrypted_footer = false; + } else if trailing_magic == PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE { + let has_keys = encryption_config.as_ref().map_or(false, + |config| config.read_keys().iter().any(|m| matches!(m, ParquetEncryptionMode::FooterEncrypted(_)))); + if !has_keys { return Err(general_err!("Invalid Parquet file in unencrypted mode. File is encrypted")); - } else if trailing_magic == PARQUET_MAGIC_UNSUPPORTED_PARE { - return Err(general_err!("Unsupported Parquet file. File is encrypted with the standard PARE encryption format")); - } else { - return Err(general_err!("Invalid Parquet file. Corrupt footer")); } + encrypted_footer = true; + } else if trailing_magic == PARQUET_MAGIC_UNSUPPORTED_PARE { + return Err(general_err!("Unsupported Parquet file. File is encrypted with the standard PARE encryption format")); + } else { + return Err(general_err!("Invalid Parquet file. Corrupt footer")); } } @@ -135,7 +153,8 @@ pub fn parse_metadata(chunk_reader: &R, encryption_config: &Opti let returned_encryption_key: Option; let random_file_identifier: Option; - if let Some(encryption_config) = encryption_config { + if encrypted_footer { + let encryption_config: &ParquetEncryptionConfig = encryption_config.as_ref().unwrap(); let file_crypto_metadata = { let mut prot = TCompactInputProtocol::new(&mut metadata_read); TFileCryptoMetaData::read_from_in_protocol(&mut prot) diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index d1303e7926f4..38f1ceb764a0 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -643,7 +643,7 @@ mod tests { use crate::basic::{Compression, Encoding, IntType, LogicalType, Repetition, Type}; use crate::column::page::PageReader; use crate::compression::{create_codec, Codec}; - use crate::file::encryption::{generate_random_file_identifier, ParquetEncryptionConfig, ParquetEncryptionKeyInfo}; + use crate::file::encryption::{generate_random_file_identifier, ParquetEncryptionConfig, ParquetEncryptionKeyInfo, ParquetEncryptionMode}; use crate::file::reader::Length; use crate::file::{PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE}; use crate::file::{ @@ -1209,7 +1209,8 @@ mod tests { file_writer.close().unwrap(); - let encryption_config = encryption_info.map(|(key_info, _)| ParquetEncryptionConfig::new(vec![key_info]).unwrap()); + let encryption_config = encryption_info.map(|(key_info, _)| + ParquetEncryptionConfig::new(vec![ParquetEncryptionMode::FooterEncrypted(key_info)]).unwrap()); let reader = assert_send(SerializedFileReader::new_maybe_encrypted(file, &encryption_config).unwrap()); assert_eq!(reader.num_row_groups(), data.len()); assert_eq!( @@ -1303,7 +1304,8 @@ mod tests { let buffer = cursor.into_inner().unwrap(); let reading_cursor = crate::file::serialized_reader::SliceableCursor::new(buffer); - let encryption_config = encryption_info.map(|(key_info, _)| ParquetEncryptionConfig::new(vec![key_info]).unwrap()); + let encryption_config = encryption_info + .map(|(key_info, _)| ParquetEncryptionConfig::new(vec![ParquetEncryptionMode::FooterEncrypted(key_info)]).unwrap()); let reader = SerializedFileReader::new_maybe_encrypted(reading_cursor, &encryption_config).unwrap(); assert_eq!(reader.num_row_groups(), data.len());