Skip to content

Commit af5a576

Browse files
committed
Add uniform encryption test
1 parent 6876d52 commit af5a576

File tree

3 files changed

+56
-48
lines changed

3 files changed

+56
-48
lines changed

parquet/src/arrow/arrow_writer/mod.rs

Lines changed: 21 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,16 +1290,14 @@ mod tests {
12901290

12911291
use std::fs::File;
12921292

1293-
#[cfg(feature = "encryption")]
1294-
use crate::arrow::arrow_reader::ArrowReaderMetadata;
12951293
#[cfg(feature = "encryption")]
12961294
use crate::arrow::arrow_reader::ArrowReaderOptions;
12971295
use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
12981296
use crate::arrow::ARROW_SCHEMA_META_KEY;
12991297
#[cfg(feature = "encryption")]
13001298
use crate::encryption::encrypt::EncryptionKey;
13011299
#[cfg(feature = "encryption")]
1302-
use crate::util::test_common::encryption_util::verify_encryption_test_file_read;
1300+
use crate::util::test_common::encryption_util::read_and_roundtrip_to_encrypted_file;
13031301
use arrow::datatypes::ToByteSlice;
13041302
use arrow::datatypes::{DataType, Schema};
13051303
use arrow::error::Result as ArrowResult;
@@ -3808,7 +3806,6 @@ mod tests {
38083806
fn test_non_uniform_encryption() {
38093807
let testdata = arrow::util::test_util::parquet_test_data();
38103808
let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
3811-
let temp_file = tempfile::tempfile().unwrap();
38123809

38133810
let footer_key = "0123456789012345".as_bytes(); // 128bit/16
38143811
let column_1_key = "1234567890123450".as_bytes();
@@ -3820,62 +3817,39 @@ mod tests {
38203817
.build()
38213818
.unwrap();
38223819

3823-
// read example data
3824-
let (batches, schema) = read_encrypted_file(&path, decryption_properties.clone());
3825-
3826-
// write example data
38273820
let column_1_key = EncryptionKey::new(column_1_key.as_bytes().to_vec());
38283821
let column_2_key = EncryptionKey::new(column_2_key.as_bytes().to_vec());
38293822
let file_encryption_properties = FileEncryptionProperties::builder(footer_key.to_vec())
38303823
.with_column_key("double_field".into(), column_1_key)
38313824
.with_column_key("float_field".into(), column_2_key)
38323825
.build();
38333826

3834-
let temp_file =
3835-
write_batches_to_encrypted_file(temp_file, file_encryption_properties, batches, schema);
3836-
3837-
// check re-written example data
3838-
verify_encryption_test_file_read(temp_file, decryption_properties);
3827+
read_and_roundtrip_to_encrypted_file(
3828+
&path,
3829+
decryption_properties,
3830+
file_encryption_properties,
3831+
);
38393832
}
38403833

3834+
#[test]
38413835
#[cfg(feature = "encryption")]
3842-
fn write_batches_to_encrypted_file(
3843-
temp_file: File,
3844-
file_encryption_properties: FileEncryptionProperties,
3845-
batches: Vec<RecordBatch>,
3846-
schema: SchemaRef,
3847-
) -> File {
3848-
let props = WriterProperties::builder()
3849-
.with_file_encryption_properties(file_encryption_properties)
3850-
.build();
3836+
fn test_uniform_encryption() {
3837+
let testdata = arrow::util::test_util::parquet_test_data();
3838+
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
38513839

3852-
let mut writer =
3853-
ArrowWriter::try_new(temp_file.try_clone().unwrap(), schema, Some(props)).unwrap();
3854-
for batch in batches {
3855-
writer.write(&batch).unwrap();
3856-
}
3840+
let footer_key = "0123456789012345".as_bytes(); // 128bit/16
38573841

3858-
writer.close().unwrap();
3859-
temp_file
3860-
}
3842+
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
3843+
.build()
3844+
.unwrap();
38613845

3862-
#[cfg(feature = "encryption")]
3863-
fn read_encrypted_file(
3864-
path: &str,
3865-
decryption_properties: FileDecryptionProperties,
3866-
) -> (Vec<RecordBatch>, SchemaRef) {
3867-
let file = File::open(path).unwrap();
3868-
let options =
3869-
ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
3870-
let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
3846+
let file_encryption_properties =
3847+
FileEncryptionProperties::builder(footer_key.to_vec()).build();
38713848

3872-
let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
3873-
let batch_reader = builder.build().unwrap();
3874-
(
3875-
batch_reader
3876-
.collect::<Result<Vec<RecordBatch>, _>>()
3877-
.unwrap(),
3878-
metadata.schema,
3879-
)
3849+
read_and_roundtrip_to_encrypted_file(
3850+
&path,
3851+
decryption_properties,
3852+
file_encryption_properties,
3853+
);
38803854
}
38813855
}

parquet/src/file/metadata/writer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
317317
column_index: usize,
318318
) -> Result<ColumnChunk> {
319319
// Column crypto metadata should have already been set when the column was created.
320-
// Here we apply the encryption by encrypted the column metadata if required.
320+
// Here we apply the encryption by encrypting the column metadata if required.
321321
match column_chunk.crypto_metadata.as_ref() {
322322
None => {}
323323
Some(ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => {

parquet/src/util/test_common/encryption_util.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ use arrow_array::cast::AsArray;
2626
use arrow_array::{types, RecordBatch};
2727
use futures::TryStreamExt;
2828
use std::fs::File;
29+
use crate::arrow::ArrowWriter;
30+
use crate::encryption::encrypt::FileEncryptionProperties;
31+
use crate::file::properties::WriterProperties;
2932

3033
/// Tests reading an encrypted file from the parquet-testing repository
3134
pub(crate) fn verify_encryption_test_file_read(
@@ -134,3 +137,34 @@ fn verify_encryption_test_data(
134137

135138
assert_eq!(row_count, file_metadata.num_rows() as usize);
136139
}
140+
141+
#[cfg(feature = "encryption")]
142+
pub fn read_and_roundtrip_to_encrypted_file(path: &str, decryption_properties: FileDecryptionProperties, encryption_properties: FileEncryptionProperties) {
143+
let temp_file = tempfile::tempfile().unwrap();
144+
145+
// read example data
146+
let file = File::open(path).unwrap();
147+
let options =
148+
ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties.clone());
149+
let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
150+
151+
let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
152+
let batch_reader = builder.build().unwrap();
153+
let batches = batch_reader.collect::<crate::errors::Result<Vec<RecordBatch>, _>>().unwrap();
154+
155+
// write example data
156+
let props = WriterProperties::builder()
157+
.with_file_encryption_properties(encryption_properties)
158+
.build();
159+
160+
let mut writer =
161+
ArrowWriter::try_new(temp_file.try_clone().unwrap(), metadata.schema, Some(props)).unwrap();
162+
for batch in batches {
163+
writer.write(&batch).unwrap();
164+
}
165+
166+
writer.close().unwrap();
167+
168+
// check re-written example data
169+
verify_encryption_test_file_read(temp_file, decryption_properties);
170+
}

0 commit comments

Comments
 (0)