Skip to content

Commit 9030f3a

Browse files
committed
Make encryption an optional feature
1 parent 31405dd commit 9030f3a

File tree

10 files changed

+114
-43
lines changed

10 files changed

+114
-43
lines changed

parquet/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ paste = { version = "1.0" }
6969
half = { version = "2.1", default-features = false, features = ["num-traits"] }
7070
sysinfo = { version = "0.32.0", optional = true, default-features = false, features = ["system"] }
7171
crc32fast = { version = "1.4.2", optional = true, default-features = false }
72-
ring = { version = "0.17", default-features = false, features = ["std"]}
72+
ring = { version = "0.17", default-features = false, features = ["std"], optional = true }
7373

7474
[dev-dependencies]
7575
base64 = { version = "0.22", default-features = false, features = ["std"] }
@@ -121,7 +121,8 @@ zstd = ["dep:zstd", "zstd-sys"]
121121
sysinfo = ["dep:sysinfo"]
122122
# Verify 32-bit CRC checksum when decoding parquet pages
123123
crc = ["dep:crc32fast"]
124-
#encryption = ["aes-gcm", "base64"]
124+
# Enable Parquet modular encryption support
125+
encryption = ["dep:ring"]
125126

126127

127128
[[example]]

parquet/src/arrow/arrow_reader/mod.rs

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ mod filter;
4141
mod selection;
4242
pub mod statistics;
4343

44+
#[cfg(feature = "encryption")]
4445
use crate::encryption::ciphers::{CryptoContext, FileDecryptionProperties};
4546

4647
/// Builder for constructing parquet readers into arrow.
@@ -383,12 +384,14 @@ impl ArrowReaderMetadata {
383384
pub fn load<T: ChunkReader>(
384385
reader: &T,
385386
options: ArrowReaderOptions,
386-
file_decryption_properties: Option<&FileDecryptionProperties>,
387+
#[cfg(feature = "encryption")] file_decryption_properties: Option<
388+
&FileDecryptionProperties,
389+
>,
387390
) -> Result<Self> {
388-
let metadata = ParquetMetaDataReader::new()
389-
.with_page_indexes(options.page_index)
390-
.with_encryption_properties(file_decryption_properties)
391-
.parse_and_finish(reader)?;
391+
let metadata = ParquetMetaDataReader::new().with_page_indexes(options.page_index);
392+
#[cfg(feature = "encryption")]
393+
let metadata = metadata.with_encryption_properties(file_decryption_properties);
394+
let metadata = metadata.parse_and_finish(reader)?;
392395
Self::try_new(Arc::new(metadata), options)
393396
}
394397

@@ -534,11 +537,17 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
534537

535538
/// Create a new [`ParquetRecordBatchReaderBuilder`] with [`ArrowReaderOptions`]
536539
pub fn try_new_with_options(reader: T, options: ArrowReaderOptions) -> Result<Self> {
537-
let metadata = ArrowReaderMetadata::load(&reader, options, None)?;
540+
let metadata = ArrowReaderMetadata::load(
541+
&reader,
542+
options,
543+
#[cfg(feature = "encryption")]
544+
None,
545+
)?;
538546
Ok(Self::new_with_metadata(reader, metadata))
539547
}
540548

541549
/// Create a new [`ParquetRecordBatchReaderBuilder`] with [`ArrowReaderOptions`] and [`FileDecryptionProperties`]
550+
#[cfg(feature = "encryption")]
542551
pub fn try_new_with_decryption(
543552
reader: T,
544553
options: ArrowReaderOptions,
@@ -694,6 +703,7 @@ impl<T: ChunkReader + 'static> Iterator for ReaderPageIterator<T> {
694703
let total_rows = rg.num_rows() as usize;
695704
let reader = self.reader.clone();
696705

706+
#[cfg(feature = "encryption")]
697707
let crypto_context = if self.metadata.file_decryptor().is_some() {
698708
let file_decryptor = Arc::new(self.metadata.file_decryptor().clone().unwrap());
699709

@@ -708,8 +718,14 @@ impl<T: ChunkReader + 'static> Iterator for ReaderPageIterator<T> {
708718
None
709719
};
710720

711-
let ret =
712-
SerializedPageReader::new(reader, meta, total_rows, page_locations, crypto_context);
721+
let ret = SerializedPageReader::new(
722+
reader,
723+
meta,
724+
total_rows,
725+
page_locations,
726+
#[cfg(feature = "encryption")]
727+
crypto_context,
728+
);
713729
Some(ret.map(|x| Box::new(x) as _))
714730
}
715731
}
@@ -824,6 +840,7 @@ impl ParquetRecordBatchReader {
824840
///
825841
/// Note: this is needed when the parquet file is encrypted
826842
// todo: add options or put file_decryption_properties into options
843+
#[cfg(feature = "encryption")]
827844
pub fn try_new_with_decryption<T: ChunkReader + 'static>(
828845
reader: T,
829846
batch_size: usize,
@@ -993,10 +1010,11 @@ mod tests {
9931010
};
9941011
use arrow_select::concat::concat_batches;
9951012

1013+
#[cfg(feature = "encryption")]
1014+
use crate::arrow::arrow_reader::ArrowReaderMetadata;
9961015
use crate::arrow::arrow_reader::{
997-
ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderMetadata, ArrowReaderOptions,
998-
ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder, RowFilter, RowSelection,
999-
RowSelector,
1016+
ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
1017+
ParquetRecordBatchReaderBuilder, RowFilter, RowSelection, RowSelector,
10001018
};
10011019
use crate::arrow::schema::add_encoded_arrow_schema_to_metadata;
10021020
use crate::arrow::{ArrowWriter, ProjectionMask};
@@ -1006,6 +1024,7 @@ mod tests {
10061024
BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType,
10071025
FloatType, Int32Type, Int64Type, Int96Type,
10081026
};
1027+
#[cfg(feature = "encryption")]
10091028
use crate::encryption::ciphers;
10101029
use crate::errors::Result;
10111030
use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
@@ -1716,6 +1735,7 @@ mod tests {
17161735
}
17171736

17181737
#[test]
1738+
#[cfg(feature = "encryption")]
17191739
fn test_non_uniform_encryption_plaintext_footer() {
17201740
let testdata = arrow::util::test_util::parquet_test_data();
17211741
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
@@ -1766,6 +1786,7 @@ mod tests {
17661786
}
17671787

17681788
#[test]
1789+
#[cfg(feature = "encryption")]
17691790
fn test_non_uniform_encryption() {
17701791
let testdata = arrow::util::test_util::parquet_test_data();
17711792
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
@@ -1797,6 +1818,7 @@ mod tests {
17971818
}
17981819

17991820
#[test]
1821+
#[cfg(feature = "encryption")]
18001822
fn test_uniform_encryption() {
18011823
let testdata = arrow::util::test_util::parquet_test_data();
18021824
let path = format!("{testdata}/uniform_encryption.parquet.encrypted");

parquet/src/column/writer/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,6 +2027,7 @@ mod tests {
20272027
r.rows_written as usize,
20282028
None,
20292029
Arc::new(props),
2030+
#[cfg(feature = "encryption")]
20302031
None,
20312032
)
20322033
.unwrap();
@@ -2080,6 +2081,7 @@ mod tests {
20802081
r.rows_written as usize,
20812082
None,
20822083
Arc::new(props),
2084+
#[cfg(feature = "encryption")]
20832085
None,
20842086
)
20852087
.unwrap();
@@ -2216,6 +2218,7 @@ mod tests {
22162218
r.rows_written as usize,
22172219
None,
22182220
Arc::new(props),
2221+
#[cfg(feature = "encryption")]
22192222
None,
22202223
)
22212224
.unwrap(),
@@ -3487,6 +3490,7 @@ mod tests {
34873490
result.rows_written as usize,
34883491
None,
34893492
Arc::new(props),
3493+
#[cfg(feature = "encryption")]
34903494
None,
34913495
)
34923496
.unwrap(),

parquet/src/errors.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ impl From<object_store::Error> for ParquetError {
120120
}
121121
}
122122

123-
//#[cfg(feature = "encryption")]
123+
#[cfg(feature = "encryption")]
124124
impl From<ring::error::Unspecified> for ParquetError {
125125
fn from(e: ring::error::Unspecified) -> ParquetError {
126126
ParquetError::External(Box::new(e))

parquet/src/file/footer.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
//! Module for working with Parquet file footers.
1919
20+
#[cfg(feature = "encryption")]
2021
use crate::encryption::ciphers::FileDecryptionProperties;
2122
use crate::errors::Result;
2223
use crate::file::{metadata::*, reader::ChunkReader, FOOTER_SIZE};
@@ -60,9 +61,13 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
6061
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_metadata")]
6162
pub fn decode_metadata(
6263
buf: &[u8],
63-
file_decryption_properties: Option<&FileDecryptionProperties>,
64+
#[cfg(feature = "encryption")] file_decryption_properties: Option<&FileDecryptionProperties>,
6465
) -> Result<ParquetMetaData> {
65-
ParquetMetaDataReader::decode_metadata(buf, file_decryption_properties)
66+
ParquetMetaDataReader::decode_metadata(
67+
buf,
68+
#[cfg(feature = "encryption")]
69+
file_decryption_properties,
70+
)
6671
}
6772

6873
/// Decodes the Parquet footer returning the metadata length in bytes

parquet/src/file/metadata/mod.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ use crate::format::{
104104
};
105105

106106
use crate::basic::{ColumnOrder, Compression, Encoding, Type};
107+
#[cfg(feature = "encryption")]
107108
use crate::encryption::ciphers::FileDecryptor;
108109
use crate::errors::{ParquetError, Result};
109110
pub(crate) use crate::file::metadata::memory::HeapSize;
@@ -176,6 +177,7 @@ pub struct ParquetMetaData {
176177
/// Offset index for each page in each column chunk
177178
offset_index: Option<ParquetOffsetIndex>,
178179
/// Optional file decryptor
180+
#[cfg(feature = "encryption")]
179181
file_decryptor: Option<FileDecryptor>,
180182
}
181183

@@ -185,11 +187,12 @@ impl ParquetMetaData {
185187
pub fn new(
186188
file_metadata: FileMetaData,
187189
row_groups: Vec<RowGroupMetaData>,
188-
file_decryptor: Option<FileDecryptor>,
190+
#[cfg(feature = "encryption")] file_decryptor: Option<FileDecryptor>,
189191
) -> Self {
190192
ParquetMetaData {
191193
file_metadata,
192194
row_groups,
195+
#[cfg(feature = "encryption")]
193196
file_decryptor,
194197
column_index: None,
195198
offset_index: None,
@@ -223,6 +226,7 @@ impl ParquetMetaData {
223226
}
224227

225228
/// Returns file decryptor as reference.
229+
#[cfg(feature = "encryption")]
226230
pub fn file_decryptor(&self) -> &Option<FileDecryptor> {
227231
&self.file_decryptor
228232
}
@@ -350,7 +354,12 @@ pub struct ParquetMetaDataBuilder(ParquetMetaData);
350354
impl ParquetMetaDataBuilder {
351355
/// Create a new builder from a file metadata, with no row groups
352356
pub fn new(file_meta_data: FileMetaData) -> Self {
353-
Self(ParquetMetaData::new(file_meta_data, vec![], None))
357+
Self(ParquetMetaData::new(
358+
file_meta_data,
359+
vec![],
360+
#[cfg(feature = "encryption")]
361+
None,
362+
))
354363
}
355364

356365
/// Create a new builder from an existing ParquetMetaData

parquet/src/file/metadata/reader.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use std::{io::Read, ops::Range, sync::Arc};
2020
use bytes::Bytes;
2121

2222
use crate::basic::ColumnOrder;
23+
#[cfg(feature = "encryption")]
2324
use crate::encryption::ciphers::{
2425
create_footer_aad, BlockDecryptor, FileDecryptionProperties, FileDecryptor,
2526
};
@@ -29,10 +30,9 @@ use crate::file::page_index::index::Index;
2930
use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index};
3031
use crate::file::reader::ChunkReader;
3132
use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER};
32-
use crate::format::{
33-
ColumnOrder as TColumnOrder, EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData,
34-
FileMetaData as TFileMetaData,
35-
};
33+
use crate::format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
34+
#[cfg(feature = "encryption")]
35+
use crate::format::{EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData};
3636
use crate::schema::types;
3737
use crate::schema::types::SchemaDescriptor;
3838
use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
@@ -74,6 +74,7 @@ pub struct ParquetMetaDataReader {
7474
// Size of the serialized thrift metadata plus the 8 byte footer. Only set if
7575
// `self.parse_metadata` is called.
7676
metadata_size: Option<usize>,
77+
#[cfg(feature = "encryption")]
7778
file_decryption_properties: Option<FileDecryptionProperties>,
7879
}
7980

@@ -136,6 +137,7 @@ impl ParquetMetaDataReader {
136137
/// Provide the [`FileDecryptionProperties`] to use when decrypting the file.
137138
///
138139
/// This is only necessary when the file is encrypted.
140+
#[cfg(feature = "encryption")]
139141
pub fn with_encryption_properties(
140142
mut self,
141143
properties: Option<&FileDecryptionProperties>,
@@ -532,6 +534,7 @@ impl ParquetMetaDataReader {
532534
let start = file_size - footer_metadata_len as u64;
533535
Self::decode_metadata(
534536
chunk_reader.get_bytes(start, metadata_len)?.as_ref(),
537+
#[cfg(feature = "encryption")]
535538
self.file_decryption_properties.as_ref(),
536539
)
537540
}
@@ -639,12 +642,18 @@ impl ParquetMetaDataReader {
639642
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
640643
pub fn decode_metadata(
641644
buf: &[u8],
642-
file_decryption_properties: Option<&FileDecryptionProperties>,
645+
#[cfg(feature = "encryption")] file_decryption_properties: Option<
646+
&FileDecryptionProperties,
647+
>,
643648
) -> Result<ParquetMetaData> {
644649
let mut prot = TCompactSliceInputProtocol::new(buf);
650+
651+
#[cfg(feature = "encryption")]
645652
let mut file_decryptor = None;
653+
#[cfg(feature = "encryption")]
646654
let decrypted_fmd_buf;
647655

656+
#[cfg(feature = "encryption")]
648657
if file_decryption_properties.is_some()
649658
&& file_decryption_properties.unwrap().has_footer_key()
650659
{
@@ -708,6 +717,7 @@ impl ParquetMetaDataReader {
708717
Ok(ParquetMetaData::new(
709718
file_metadata,
710719
row_groups,
720+
#[cfg(feature = "encryption")]
711721
file_decryptor,
712722
))
713723
}

0 commit comments

Comments
 (0)