Skip to content

Commit 7f94e39

Browse files
committed
Handle reading plaintext footer files without decryption properties
1 parent 06cfe65 commit 7f94e39

File tree

4 files changed

+116
-62
lines changed

4 files changed

+116
-62
lines changed

parquet/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ sysinfo = ["dep:sysinfo"]
126126
crc = ["dep:crc32fast"]
127127
# Enable SIMD UTF-8 validation
128128
simdutf8 = ["dep:simdutf8"]
129-
#encryption = ["aes-gcm", "base64"]
130129
# Enable Parquet modular encryption support
131130
encryption = ["dep:ring"]
132131

parquet/src/arrow/arrow_reader/mod.rs

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,6 @@ mod tests {
10261026
};
10271027
use arrow_select::concat::concat_batches;
10281028

1029-
#[cfg(feature = "encryption")]
10301029
use crate::arrow::arrow_reader::ArrowReaderMetadata;
10311030
use crate::arrow::arrow_reader::{
10321031
ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
@@ -1897,6 +1896,63 @@ mod tests {
18971896
verify_encryption_test_file_read(file, decryption_properties);
18981897
}
18991898

1899+
#[test]
1900+
fn test_non_uniform_encryption_plaintext_footer_without_decryption() {
1901+
let testdata = arrow::util::test_util::parquet_test_data();
1902+
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
1903+
let file = File::open(&path).unwrap();
1904+
1905+
let metadata = ArrowReaderMetadata::load(&file, Default::default(), None).unwrap();
1906+
let file_metadata = metadata.metadata.file_metadata();
1907+
1908+
assert_eq!(file_metadata.num_rows(), 50);
1909+
assert_eq!(file_metadata.schema_descr().num_columns(), 8);
1910+
assert_eq!(
1911+
file_metadata.created_by().unwrap(),
1912+
"parquet-cpp-arrow version 19.0.0-SNAPSHOT"
1913+
);
1914+
1915+
metadata.metadata.row_groups().iter().for_each(|rg| {
1916+
assert_eq!(rg.num_columns(), 8);
1917+
assert_eq!(rg.num_rows(), 50);
1918+
});
1919+
1920+
// Should be able to read unencrypted columns. Test reading one column.
1921+
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
1922+
let mask = ProjectionMask::leaves(builder.parquet_schema(), [1]);
1923+
let record_reader = builder.with_projection(mask).build().unwrap();
1924+
1925+
let mut row_count = 0;
1926+
for batch in record_reader {
1927+
let batch = batch.unwrap();
1928+
row_count += batch.num_rows();
1929+
1930+
let time_col = batch
1931+
.column(0)
1932+
.as_primitive::<types::Time32MillisecondType>();
1933+
for (i, x) in time_col.iter().enumerate() {
1934+
assert_eq!(x.unwrap(), i as i32);
1935+
}
1936+
}
1937+
1938+
assert_eq!(row_count, file_metadata.num_rows() as usize);
1939+
1940+
// Reading an encrypted column should fail
1941+
let file = File::open(&path).unwrap();
1942+
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
1943+
let mask = ProjectionMask::leaves(builder.parquet_schema(), [4]);
1944+
let mut record_reader = builder.with_projection(mask).build().unwrap();
1945+
1946+
match record_reader.next() {
1947+
Some(Err(ArrowError::ParquetError(s))) => {
1948+
assert!(s.contains("protocol error"));
1949+
}
1950+
_ => {
1951+
panic!("Expected ArrowError::ParquetError");
1952+
}
1953+
};
1954+
}
1955+
19001956
#[test]
19011957
#[cfg(feature = "encryption")]
19021958
fn test_non_uniform_encryption() {

parquet/src/file/metadata/mod.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -655,18 +655,23 @@ impl RowGroupMetaData {
655655
.zip(schema_descr.columns())
656656
.enumerate()
657657
{
658-
if c.encrypted_column_metadata.is_some() {
659-
// TODO: Allow ignoring encrypted column metadata in plaintext mode when no
660-
// decryptor is set
661-
let decryptor = decryptor.unwrap();
662-
let Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) =
663-
c.crypto_metadata.clone()
664-
else {
665-
todo!()
658+
// Read encrypted metadata if it's present and we have a decryptor.
659+
if let (true, Some(decryptor)) = (c.encrypted_column_metadata.is_some(), decryptor) {
660+
let column_decryptor = match c.crypto_metadata.as_ref() {
661+
None => {
662+
return Err(general_err!(
663+
"No crypto_metadata is set for column {}, which has encrypted metadata",
664+
i
665+
));
666+
}
667+
Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) => {
668+
let column_name = crypto_metadata.path_in_schema.join(".");
669+
decryptor.get_column_metadata_decryptor(column_name.as_bytes())
670+
}
671+
Some(ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => {
672+
decryptor.get_footer_decryptor()
673+
}
666674
};
667-
let column_name = crypto_metadata.path_in_schema.join(".");
668-
let column_decryptor =
669-
decryptor.get_column_metadata_decryptor(column_name.as_bytes());
670675

671676
let column_aad = create_page_aad(
672677
decryptor.file_aad(),

parquet/src/file/metadata/reader.rs

Lines changed: 43 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -717,42 +717,31 @@ impl ParquetMetaDataReader {
717717
}
718718

719719
#[cfg(feature = "encryption")]
720-
let mut decryptor = None;
720+
let mut file_decryptor = None;
721721
#[cfg(feature = "encryption")]
722722
let decrypted_fmd_buf;
723723

724724
#[cfg(feature = "encryption")]
725725
if encrypted_footer {
726-
if file_decryption_properties.is_none() {
727-
return Err(general_err!("Parquet file has an encrypted footer but no decryption properties were provided"));
728-
};
729-
730-
let t_file_crypto_metadata: TFileCryptoMetaData =
731-
TFileCryptoMetaData::read_from_in_protocol(&mut prot)
732-
.map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?;
733-
let algo = t_file_crypto_metadata.encryption_algorithm;
734-
let aes_gcm_algo = if let EncryptionAlgorithm::AESGCMV1(a) = algo {
735-
a
736-
} else {
737-
unreachable!()
738-
}; // todo decr: add support for GCMCTRV1
739-
740-
// todo decr: get key_metadata
741-
let aad_file_unique = aes_gcm_algo.aad_file_unique.unwrap();
742-
let aad_prefix: Vec<u8> = aes_gcm_algo.aad_prefix.unwrap_or_default();
743-
744-
decryptor = Some(FileDecryptor::new(
745-
file_decryption_properties.unwrap(),
746-
aad_file_unique,
747-
aad_prefix,
748-
));
749-
let footer_decryptor = decryptor.clone().unwrap().get_footer_decryptor();
726+
if let Some(file_decryption_properties) = file_decryption_properties {
727+
let t_file_crypto_metadata: TFileCryptoMetaData =
728+
TFileCryptoMetaData::read_from_in_protocol(&mut prot)
729+
.map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?;
730+
let decryptor = get_file_decryptor(
731+
t_file_crypto_metadata.encryption_algorithm,
732+
file_decryption_properties,
733+
);
734+
let footer_decryptor = decryptor.get_footer_decryptor();
735+
let aad_footer = create_footer_aad(decryptor.file_aad())?;
750736

751-
let aad_footer = create_footer_aad(decryptor.as_ref().unwrap().file_aad())?;
737+
decrypted_fmd_buf =
738+
footer_decryptor.decrypt(prot.as_slice().as_ref(), aad_footer.as_ref())?;
739+
prot = TCompactSliceInputProtocol::new(decrypted_fmd_buf.as_ref());
752740

753-
decrypted_fmd_buf =
754-
footer_decryptor.decrypt(prot.as_slice().as_ref(), aad_footer.as_ref())?;
755-
prot = TCompactSliceInputProtocol::new(decrypted_fmd_buf.as_ref());
741+
file_decryptor = Some(decryptor);
742+
} else {
743+
return Err(general_err!("Parquet file has an encrypted footer but no decryption properties were provided"));
744+
}
756745
}
757746

758747
let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot)
@@ -761,33 +750,21 @@ impl ParquetMetaDataReader {
761750
let schema_descr = Arc::new(SchemaDescriptor::new(schema));
762751

763752
#[cfg(feature = "encryption")]
764-
if t_file_metadata.encryption_algorithm.is_some() {
765-
let algo = t_file_metadata.encryption_algorithm;
766-
let aes_gcm_algo = if let Some(EncryptionAlgorithm::AESGCMV1(a)) = algo {
767-
a
768-
} else {
769-
unreachable!()
770-
}; // todo decr: add support for GCMCTRV1
771-
let aad_file_unique = aes_gcm_algo.aad_file_unique.unwrap();
772-
let aad_prefix: Vec<u8> = aes_gcm_algo.aad_prefix.unwrap_or_default();
773-
774-
decryptor = Some(FileDecryptor::new(
775-
file_decryption_properties.unwrap(),
776-
aad_file_unique,
777-
aad_prefix,
778-
));
779-
// todo get key_metadata etc. Set file decryptor in return value
780-
// todo check signature
753+
if let (Some(algo), Some(file_decryption_properties)) = (
754+
t_file_metadata.encryption_algorithm,
755+
file_decryption_properties,
756+
) {
757+
// File has a plaintext footer but encryption algorithm is set
758+
file_decryptor = Some(get_file_decryptor(algo, file_decryption_properties));
781759
}
782760

783761
let mut row_groups = Vec::new();
784-
// TODO: row group filtering
785762
for rg in t_file_metadata.row_groups {
786763
let r = RowGroupMetaData::from_thrift(
787764
schema_descr.clone(),
788765
rg,
789766
#[cfg(feature = "encryption")]
790-
decryptor.as_ref(),
767+
file_decryptor.as_ref(),
791768
)?;
792769
row_groups.push(r);
793770
}
@@ -806,7 +783,7 @@ impl ParquetMetaDataReader {
806783
file_metadata,
807784
row_groups,
808785
#[cfg(feature = "encryption")]
809-
decryptor,
786+
file_decryptor,
810787
))
811788
}
812789

@@ -842,6 +819,23 @@ impl ParquetMetaDataReader {
842819
}
843820
}
844821

822+
#[cfg(feature = "encryption")]
823+
fn get_file_decryptor(
824+
encryption_algorithm: EncryptionAlgorithm,
825+
file_decryption_properties: &FileDecryptionProperties,
826+
) -> FileDecryptor {
827+
let aes_gcm_algo = if let EncryptionAlgorithm::AESGCMV1(a) = encryption_algorithm {
828+
a
829+
} else {
830+
todo!("GCMCTRV1 encryption algorithm")
831+
};
832+
833+
let aad_file_unique = aes_gcm_algo.aad_file_unique.unwrap();
834+
let aad_prefix: Vec<u8> = aes_gcm_algo.aad_prefix.unwrap_or_default();
835+
836+
FileDecryptor::new(file_decryption_properties, aad_file_unique, aad_prefix)
837+
}
838+
845839
#[cfg(test)]
846840
mod tests {
847841
use super::*;

0 commit comments

Comments
 (0)