Skip to content

Commit a8f0957

Browse files
authored
Move Parquet encryption tests into the arrow_reader integration tests (#7279)
* Move encryption tests to arrow_reader tests * Move object store tests and simplify verification function * Test reading plaintext footer files with encryption both disabled and enabled * Rename encryption_common module to encryption_agnostic
1 parent c26e427 commit a8f0957

File tree

10 files changed

+715
-620
lines changed

10 files changed

+715
-620
lines changed

parquet/src/arrow/arrow_reader/mod.rs

-240
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,6 @@ mod tests {
10051005
};
10061006
use arrow_select::concat::concat_batches;
10071007

1008-
use crate::arrow::arrow_reader::ArrowReaderMetadata;
10091008
use crate::arrow::arrow_reader::{
10101009
ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
10111010
ParquetRecordBatchReaderBuilder, RowFilter, RowSelection, RowSelector,
@@ -1018,15 +1017,11 @@ mod tests {
10181017
BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType,
10191018
FloatType, Int32Type, Int64Type, Int96Type,
10201019
};
1021-
#[cfg(feature = "encryption")]
1022-
use crate::encryption::decrypt::FileDecryptionProperties;
10231020
use crate::errors::Result;
10241021
use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
10251022
use crate::file::writer::SerializedFileWriter;
10261023
use crate::schema::parser::parse_message_type;
10271024
use crate::schema::types::{Type, TypePtr};
1028-
#[cfg(feature = "encryption")]
1029-
use crate::util::test_common::encryption_util::verify_encryption_test_file_read;
10301025
use crate::util::test_common::rand_gen::RandGen;
10311026

10321027
#[test]
@@ -1855,241 +1850,6 @@ mod tests {
18551850
assert!(col.value(2).is_nan());
18561851
}
18571852

1858-
#[test]
1859-
#[cfg(feature = "encryption")]
1860-
fn test_non_uniform_encryption_plaintext_footer() {
1861-
let testdata = arrow::util::test_util::parquet_test_data();
1862-
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
1863-
let file = File::open(path).unwrap();
1864-
1865-
// There is always a footer key even with a plaintext footer,
1866-
// but this is used for signing the footer.
1867-
let footer_key = "0123456789012345".as_bytes(); // 128bit/16
1868-
let column_1_key = "1234567890123450".as_bytes();
1869-
let column_2_key = "1234567890123451".as_bytes();
1870-
1871-
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
1872-
.with_column_key("double_field", column_1_key.to_vec())
1873-
.with_column_key("float_field", column_2_key.to_vec())
1874-
.build()
1875-
.unwrap();
1876-
1877-
verify_encryption_test_file_read(file, decryption_properties);
1878-
}
1879-
1880-
#[test]
1881-
#[cfg(feature = "encryption")]
1882-
fn test_non_uniform_encryption_disabled_aad_storage() {
1883-
let testdata = arrow::util::test_util::parquet_test_data();
1884-
let path =
1885-
format!("{testdata}/encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted");
1886-
let file = File::open(path.clone()).unwrap();
1887-
1888-
let footer_key = "0123456789012345".as_bytes(); // 128bit/16
1889-
let column_1_key = "1234567890123450".as_bytes();
1890-
let column_2_key = "1234567890123451".as_bytes();
1891-
1892-
// Can read successfully when providing the correct AAD prefix
1893-
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
1894-
.with_column_key("double_field", column_1_key.to_vec())
1895-
.with_column_key("float_field", column_2_key.to_vec())
1896-
.with_aad_prefix("tester".as_bytes().to_vec())
1897-
.build()
1898-
.unwrap();
1899-
1900-
verify_encryption_test_file_read(file, decryption_properties);
1901-
1902-
// Using wrong AAD prefix should fail
1903-
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
1904-
.with_column_key("double_field", column_1_key.to_vec())
1905-
.with_column_key("float_field", column_2_key.to_vec())
1906-
.with_aad_prefix("wrong_aad_prefix".as_bytes().to_vec())
1907-
.build()
1908-
.unwrap();
1909-
1910-
let file = File::open(path.clone()).unwrap();
1911-
let options = ArrowReaderOptions::default()
1912-
.with_file_decryption_properties(decryption_properties.clone());
1913-
let result = ArrowReaderMetadata::load(&file, options.clone());
1914-
assert!(result.is_err());
1915-
assert_eq!(
1916-
result.unwrap_err().to_string(),
1917-
"Parquet error: Provided footer key and AAD were unable to decrypt parquet footer"
1918-
);
1919-
1920-
// Not providing any AAD prefix should fail as it isn't stored in the file
1921-
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
1922-
.with_column_key("double_field", column_1_key.to_vec())
1923-
.with_column_key("float_field", column_2_key.to_vec())
1924-
.build()
1925-
.unwrap();
1926-
1927-
let file = File::open(path).unwrap();
1928-
let options = ArrowReaderOptions::default()
1929-
.with_file_decryption_properties(decryption_properties.clone());
1930-
let result = ArrowReaderMetadata::load(&file, options.clone());
1931-
assert!(result.is_err());
1932-
assert_eq!(
1933-
result.unwrap_err().to_string(),
1934-
"Parquet error: Provided footer key and AAD were unable to decrypt parquet footer"
1935-
);
1936-
}
1937-
1938-
#[test]
1939-
fn test_non_uniform_encryption_plaintext_footer_without_decryption() {
1940-
let testdata = arrow::util::test_util::parquet_test_data();
1941-
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
1942-
let file = File::open(&path).unwrap();
1943-
1944-
let metadata = ArrowReaderMetadata::load(&file, Default::default()).unwrap();
1945-
let file_metadata = metadata.metadata.file_metadata();
1946-
1947-
assert_eq!(file_metadata.num_rows(), 50);
1948-
assert_eq!(file_metadata.schema_descr().num_columns(), 8);
1949-
assert_eq!(
1950-
file_metadata.created_by().unwrap(),
1951-
"parquet-cpp-arrow version 19.0.0-SNAPSHOT"
1952-
);
1953-
1954-
metadata.metadata.row_groups().iter().for_each(|rg| {
1955-
assert_eq!(rg.num_columns(), 8);
1956-
assert_eq!(rg.num_rows(), 50);
1957-
});
1958-
1959-
// Should be able to read unencrypted columns. Test reading one column.
1960-
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
1961-
let mask = ProjectionMask::leaves(builder.parquet_schema(), [1]);
1962-
let record_reader = builder.with_projection(mask).build().unwrap();
1963-
1964-
let mut row_count = 0;
1965-
for batch in record_reader {
1966-
let batch = batch.unwrap();
1967-
row_count += batch.num_rows();
1968-
1969-
let time_col = batch
1970-
.column(0)
1971-
.as_primitive::<types::Time32MillisecondType>();
1972-
for (i, x) in time_col.iter().enumerate() {
1973-
assert_eq!(x.unwrap(), i as i32);
1974-
}
1975-
}
1976-
1977-
assert_eq!(row_count, file_metadata.num_rows() as usize);
1978-
1979-
// Reading an encrypted column should fail
1980-
let file = File::open(&path).unwrap();
1981-
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
1982-
let mask = ProjectionMask::leaves(builder.parquet_schema(), [4]);
1983-
let mut record_reader = builder.with_projection(mask).build().unwrap();
1984-
1985-
match record_reader.next() {
1986-
Some(Err(ArrowError::ParquetError(s))) => {
1987-
assert!(s.contains("protocol error"));
1988-
}
1989-
_ => {
1990-
panic!("Expected ArrowError::ParquetError");
1991-
}
1992-
};
1993-
}
1994-
1995-
#[test]
1996-
#[cfg(feature = "encryption")]
1997-
fn test_non_uniform_encryption() {
1998-
let testdata = arrow::util::test_util::parquet_test_data();
1999-
let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
2000-
let file = File::open(path).unwrap();
2001-
2002-
let footer_key = "0123456789012345".as_bytes(); // 128bit/16
2003-
let column_1_key = "1234567890123450".as_bytes();
2004-
let column_2_key = "1234567890123451".as_bytes();
2005-
2006-
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
2007-
.with_column_key("double_field", column_1_key.to_vec())
2008-
.with_column_key("float_field", column_2_key.to_vec())
2009-
.build()
2010-
.unwrap();
2011-
2012-
verify_encryption_test_file_read(file, decryption_properties);
2013-
}
2014-
2015-
#[test]
2016-
#[cfg(feature = "encryption")]
2017-
fn test_uniform_encryption() {
2018-
let testdata = arrow::util::test_util::parquet_test_data();
2019-
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
2020-
let file = File::open(path).unwrap();
2021-
2022-
let key_code: &[u8] = "0123456789012345".as_bytes();
2023-
let decryption_properties = FileDecryptionProperties::builder(key_code.to_vec())
2024-
.build()
2025-
.unwrap();
2026-
2027-
verify_encryption_test_file_read(file, decryption_properties);
2028-
}
2029-
2030-
#[test]
2031-
#[cfg(not(feature = "encryption"))]
2032-
fn test_decrypting_without_encryption_flag_fails() {
2033-
let testdata = arrow::util::test_util::parquet_test_data();
2034-
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
2035-
let file = File::open(path).unwrap();
2036-
2037-
let options = ArrowReaderOptions::default();
2038-
let result = ArrowReaderMetadata::load(&file, options.clone());
2039-
assert!(result.is_err());
2040-
assert_eq!(
2041-
result.unwrap_err().to_string(),
2042-
"Parquet error: Parquet file has an encrypted footer but the encryption feature is disabled"
2043-
);
2044-
}
2045-
2046-
#[test]
2047-
#[cfg(feature = "encryption")]
2048-
fn test_decrypting_without_decryption_properties_fails() {
2049-
let testdata = arrow::util::test_util::parquet_test_data();
2050-
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
2051-
let file = File::open(path).unwrap();
2052-
2053-
let options = ArrowReaderOptions::default();
2054-
let result = ArrowReaderMetadata::load(&file, options.clone());
2055-
assert!(result.is_err());
2056-
assert_eq!(
2057-
result.unwrap_err().to_string(),
2058-
"Parquet error: Parquet file has an encrypted footer but no decryption properties were provided"
2059-
);
2060-
}
2061-
2062-
#[test]
2063-
#[cfg(feature = "encryption")]
2064-
fn test_aes_ctr_encryption() {
2065-
let testdata = arrow::util::test_util::parquet_test_data();
2066-
let path = format!("{testdata}/encrypt_columns_and_footer_ctr.parquet.encrypted");
2067-
let file = File::open(path).unwrap();
2068-
2069-
let footer_key = "0123456789012345".as_bytes();
2070-
let column_1_key = "1234567890123450".as_bytes();
2071-
let column_2_key = "1234567890123451".as_bytes();
2072-
2073-
let decryption_properties = FileDecryptionProperties::builder(footer_key.to_vec())
2074-
.with_column_key("double_field", column_1_key.to_vec())
2075-
.with_column_key("float_field", column_2_key.to_vec())
2076-
.build()
2077-
.unwrap();
2078-
2079-
let options =
2080-
ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
2081-
let metadata = ArrowReaderMetadata::load(&file, options);
2082-
2083-
match metadata {
2084-
Err(crate::errors::ParquetError::NYI(s)) => {
2085-
assert!(s.contains("AES_GCM_CTR_V1"));
2086-
}
2087-
_ => {
2088-
panic!("Expected ParquetError::NYI");
2089-
}
2090-
};
2091-
}
2092-
20931853
#[test]
20941854
fn test_read_float32_float64_byte_stream_split() {
20951855
let path = format!(

0 commit comments

Comments
 (0)