Skip to content

Commit b059926

Browse files
committed
feat: add ParquetEncryptionMode, to permit unencrypted files in initial key rotation
1 parent 9d6173c commit b059926

File tree

3 files changed

+56
-22
lines changed

3 files changed

+56
-22
lines changed

parquet/src/file/encryption.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,28 +38,41 @@ pub struct ParquetEncryptionKeyInfo {
3838
pub key: ParquetEncryptionKey,
3939
}
4040

41+
/// Tells what mode (and also the key value(s)) a file is to be encrypted in (when writing) or is
42+
/// permitted to be encrypted in (when reading).
43+
#[derive(Serialize, Deserialize, Debug, Clone)]
44+
pub enum ParquetEncryptionMode {
45+
/// Means the file is unencrypted
46+
Unencrypted,
47+
/// Means the file is footer-encrypted -- well, fully-encrypted. The same key is used for all
48+
/// the columns too, in this implementation.
49+
FooterEncrypted(ParquetEncryptionKeyInfo),
50+
}
51+
4152
/// Describes general parquet encryption configuration -- new files are encrypted with the
4253
/// write_key(), but old files can be decrypted with any of the valid read keys.
4354
#[derive(Serialize, Deserialize, Debug, Clone)]
4455
pub struct ParquetEncryptionConfig {
45-
// The last key is the write key, and all the keys are valid read keys.
46-
keys: Vec<ParquetEncryptionKeyInfo>,
56+
// The last mode is the write mode (i.e. it has the write key), and all the prior modes are
57+
// valid read modes (i.e. valid read keys, or Unencrypted mode, if a user turned on encryption
58+
// but hasn't key-rotated unencrypted files away yet).
59+
keys: Vec<ParquetEncryptionMode>,
4760
}
4861

4962
impl ParquetEncryptionConfig {
50-
pub fn new(keys: Vec<ParquetEncryptionKeyInfo>) -> Option<ParquetEncryptionConfig> {
63+
pub fn new(keys: Vec<ParquetEncryptionMode>) -> Option<ParquetEncryptionConfig> {
5164
if keys.is_empty() {
5265
None
5366
} else {
5467
Some(ParquetEncryptionConfig{ keys })
5568
}
5669
}
5770

58-
pub fn write_key(&self) -> &ParquetEncryptionKeyInfo {
71+
pub fn write_key(&self) -> &ParquetEncryptionMode {
5972
self.keys.last().unwrap()
6073
}
6174

62-
pub fn read_keys(&self) -> &[ParquetEncryptionKeyInfo] {
75+
pub fn read_keys(&self) -> &[ParquetEncryptionMode] {
6376
self.keys.as_slice()
6477
}
6578
}

parquet/src/file/footer.rs

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ use crate::file::{
3535

3636
use crate::schema::types::{self, SchemaDescriptor};
3737

38-
use crate::file::{encryption::{decrypt_module, parquet_magic, ParquetEncryptionConfig, PARQUET_KEY_HASH_LENGTH, ParquetEncryptionKey, ParquetEncryptionKeyInfo, RandomFileIdentifier, AAD_FILE_UNIQUE_SIZE}, PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE, PARQUET_MAGIC_UNSUPPORTED_PARE};
38+
use crate::file::{encryption::{decrypt_module, ParquetEncryptionConfig, ParquetEncryptionMode,
39+
ParquetEncryptionKey,
40+
PARQUET_KEY_HASH_LENGTH, RandomFileIdentifier, AAD_FILE_UNIQUE_SIZE},
41+
PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE, PARQUET_MAGIC_UNSUPPORTED_PARE};
3942

4043
fn select_key(encryption_config: &ParquetEncryptionConfig, key_metadata: &Option<Vec<u8>>) -> Result<ParquetEncryptionKey> {
4144
if let Some(key_id) = key_metadata {
@@ -44,10 +47,15 @@ fn select_key(encryption_config: &ParquetEncryptionConfig, key_metadata: &Option
4447
}
4548
let mut key_id_arr = [0u8; PARQUET_KEY_HASH_LENGTH];
4649
key_id_arr.copy_from_slice(&key_id);
47-
let read_keys: &[ParquetEncryptionKeyInfo] = encryption_config.read_keys();
48-
for key_info in read_keys {
49-
if key_info.key.compute_key_hash() == key_id_arr {
50-
return Ok(key_info.key)
50+
let read_keys: &[ParquetEncryptionMode] = encryption_config.read_keys();
51+
for mode in read_keys {
52+
match mode {
53+
ParquetEncryptionMode::Unencrypted => { },
54+
ParquetEncryptionMode::FooterEncrypted(key_info) => {
55+
if key_info.key.compute_key_hash() == key_id_arr {
56+
return Ok(key_info.key)
57+
}
58+
}
5159
}
5260
}
5361
return Err(general_err!("Parquet file is encrypted with an unknown or out-of-rotation key"));
@@ -81,18 +89,28 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R, encryption_config: &Opti
8189
default_end_reader.read_exact(&mut default_len_end_buf)?;
8290

8391
// check this is indeed a parquet file
92+
let encrypted_footer: bool;
8493
{
94+
// and check that its encryption setting conceivably matches our encryption_config (but without yet checking keys)
8595
let trailing_magic: &[u8] = &default_len_end_buf[default_end_len - 4..];
86-
if trailing_magic != parquet_magic(encryption_config.is_some()) {
87-
if trailing_magic == PARQUET_MAGIC {
88-
return Err(general_err!("Invalid Parquet file in encrypted mode. File (or at least the Parquet footer) is not encrypted"));
89-
} else if trailing_magic == PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE {
96+
if trailing_magic == PARQUET_MAGIC {
97+
if let Some(config) = encryption_config {
98+
if !config.read_keys().iter().any(|m| matches!(m, ParquetEncryptionMode::Unencrypted)) {
99+
return Err(general_err!("Invalid Parquet file in encrypted mode. File (or at least the Parquet footer) is not encrypted"));
100+
}
101+
}
102+
encrypted_footer = false;
103+
} else if trailing_magic == PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE {
104+
let has_keys = encryption_config.as_ref().map_or(false,
105+
|config| config.read_keys().iter().any(|m| matches!(m, ParquetEncryptionMode::FooterEncrypted(_))));
106+
if !has_keys {
90107
return Err(general_err!("Invalid Parquet file in unencrypted mode. File is encrypted"));
91-
} else if trailing_magic == PARQUET_MAGIC_UNSUPPORTED_PARE {
92-
return Err(general_err!("Unsupported Parquet file. File is encrypted with the standard PARE encryption format"));
93-
} else {
94-
return Err(general_err!("Invalid Parquet file. Corrupt footer"));
95108
}
109+
encrypted_footer = true;
110+
} else if trailing_magic == PARQUET_MAGIC_UNSUPPORTED_PARE {
111+
return Err(general_err!("Unsupported Parquet file. File is encrypted with the standard PARE encryption format"));
112+
} else {
113+
return Err(general_err!("Invalid Parquet file. Corrupt footer"));
96114
}
97115
}
98116

@@ -135,7 +153,8 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R, encryption_config: &Opti
135153
let returned_encryption_key: Option<ParquetEncryptionKey>;
136154

137155
let random_file_identifier: Option<RandomFileIdentifier>;
138-
if let Some(encryption_config) = encryption_config {
156+
if encrypted_footer {
157+
let encryption_config: &ParquetEncryptionConfig = encryption_config.as_ref().unwrap();
139158
let file_crypto_metadata = {
140159
let mut prot = TCompactInputProtocol::new(&mut metadata_read);
141160
TFileCryptoMetaData::read_from_in_protocol(&mut prot)

parquet/src/file/writer.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ mod tests {
643643
use crate::basic::{Compression, Encoding, IntType, LogicalType, Repetition, Type};
644644
use crate::column::page::PageReader;
645645
use crate::compression::{create_codec, Codec};
646-
use crate::file::encryption::{generate_random_file_identifier, ParquetEncryptionConfig, ParquetEncryptionKeyInfo};
646+
use crate::file::encryption::{generate_random_file_identifier, ParquetEncryptionConfig, ParquetEncryptionKeyInfo, ParquetEncryptionMode};
647647
use crate::file::reader::Length;
648648
use crate::file::{PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED_FOOTER_CUBE};
649649
use crate::file::{
@@ -1209,7 +1209,8 @@ mod tests {
12091209

12101210
file_writer.close().unwrap();
12111211

1212-
let encryption_config = encryption_info.map(|(key_info, _)| ParquetEncryptionConfig::new(vec![key_info]).unwrap());
1212+
let encryption_config = encryption_info.map(|(key_info, _)|
1213+
ParquetEncryptionConfig::new(vec![ParquetEncryptionMode::FooterEncrypted(key_info)]).unwrap());
12131214
let reader = assert_send(SerializedFileReader::new_maybe_encrypted(file, &encryption_config).unwrap());
12141215
assert_eq!(reader.num_row_groups(), data.len());
12151216
assert_eq!(
@@ -1303,7 +1304,8 @@ mod tests {
13031304
let buffer = cursor.into_inner().unwrap();
13041305

13051306
let reading_cursor = crate::file::serialized_reader::SliceableCursor::new(buffer);
1306-
let encryption_config = encryption_info.map(|(key_info, _)| ParquetEncryptionConfig::new(vec![key_info]).unwrap());
1307+
let encryption_config = encryption_info
1308+
.map(|(key_info, _)| ParquetEncryptionConfig::new(vec![ParquetEncryptionMode::FooterEncrypted(key_info)]).unwrap());
13071309
let reader = SerializedFileReader::new_maybe_encrypted(reading_cursor, &encryption_config).unwrap();
13081310

13091311
assert_eq!(reader.num_row_groups(), data.len());

0 commit comments

Comments
 (0)