Skip to content

Add more examples of using Parquet encryption #7374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 110 additions & 1 deletion parquet/src/encryption/decrypt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,77 @@ use std::io::Read;
use std::sync::Arc;

/// Trait for retrieving an encryption key using the key's metadata
///
/// # Example
///
/// This shows how you might use a `KeyRetriever` to decrypt a Parquet file
/// if you have a set of known encryption keys with identifiers, but at read time
/// you may not know which columns were encrypted and which keys were used.
///
/// In practice, the key metadata might instead store an encrypted key that must
/// be decrypted with a Key Management Server.
///
/// ```
/// # use std::collections::HashMap;
/// # use std::sync::{Arc, Mutex};
/// # use parquet::encryption::decrypt::{FileDecryptionProperties, KeyRetriever};
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
/// # use parquet::errors::ParquetError;
/// // Define known encryption keys
/// let mut keys = HashMap::new();
/// keys.insert("kf".to_owned(), b"0123456789012345".to_vec());
/// keys.insert("kc1".to_owned(), b"1234567890123450".to_vec());
/// keys.insert("kc2".to_owned(), b"1234567890123451".to_vec());
///
/// // Create encryption properties for writing a file,
/// // and specify the key identifiers as the key metadata.
/// let encryption_properties = FileEncryptionProperties::builder(keys.get("kf").unwrap().clone())
/// .with_footer_key_metadata("kf".into())
/// .with_column_key_and_metadata("x", keys.get("kc1").unwrap().clone(), "kc1".as_bytes().into())
/// .with_column_key_and_metadata("y", keys.get("kc2").unwrap().clone(), "kc2".as_bytes().into())
/// .build()?;
///
/// // Write an encrypted file with the properties
/// // ...
///
/// // Define a KeyRetriever that can get encryption keys using their identifiers
/// struct CustomKeyRetriever {
/// keys: Mutex<HashMap<String, Vec<u8>>>,
/// }
///
/// impl KeyRetriever for CustomKeyRetriever {
/// fn retrieve_key(&self, key_metadata: &[u8]) -> parquet::errors::Result<Vec<u8>> {
/// // Metadata is bytes, so convert it to a string identifier
/// let key_metadata = std::str::from_utf8(key_metadata).map_err(|e| {
/// ParquetError::General(format!("Could not convert key metadata to string: {e}"))
/// })?;
/// // Lookup the key
/// let keys = self.keys.lock().unwrap();
/// match keys.get(key_metadata) {
/// Some(key) => Ok(key.clone()),
/// None => Err(ParquetError::General(format!(
/// "Could not retrieve key for metadata {key_metadata:?}"
/// ))),
/// }
/// }
/// }
///
/// let key_retriever = Arc::new(CustomKeyRetriever {
/// keys: Mutex::new(keys),
/// });
///
/// // Create decryption properties for reading an encrypted file.
/// // Note that we don't need to specify which columns are encrypted,
/// // this is determined by the file metadata and the required keys will be retrieved
/// // dynamically using our key retriever.
/// let decryption_properties = FileDecryptionProperties::with_key_retriever(key_retriever)
/// .build()?;
///
/// // Read an encrypted file with the decryption properties
/// // ...
///
/// # Ok::<(), parquet::errors::ParquetError>(())
/// ```
pub trait KeyRetriever: Send + Sync {
/// Retrieve a decryption key given the key metadata
fn retrieve_key(&self, key_metadata: &[u8]) -> Result<Vec<u8>>;
Expand Down Expand Up @@ -195,7 +266,43 @@ impl PartialEq for DecryptionKeys {
}
}

/// FileDecryptionProperties hold keys and AAD data required to decrypt a Parquet file.
/// `FileDecryptionProperties` hold keys and AAD data required to decrypt a Parquet file.
///
/// When reading Arrow data, the `FileDecryptionProperties` should be included in the
/// [`ArrowReaderOptions`](crate::arrow::arrow_reader::ArrowReaderOptions) using
/// [`with_file_decryption_properties`](crate::arrow::arrow_reader::ArrowReaderOptions::with_file_decryption_properties).
///
/// # Examples
///
/// Create `FileDecryptionProperties` for a file encrypted with uniform encryption,
/// where all metadata and data are encrypted with the footer key:
/// ```
/// # use parquet::encryption::decrypt::FileDecryptionProperties;
/// let file_encryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
/// .build()?;
/// # Ok::<(), parquet::errors::ParquetError>(())
/// ```
///
/// Create properties for a file where columns are encrypted with different keys:
/// ```
/// # use parquet::encryption::decrypt::FileDecryptionProperties;
/// let file_encryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
/// .with_column_key("x", b"1234567890123450".into())
/// .with_column_key("y", b"1234567890123451".into())
/// .build()?;
/// # Ok::<(), parquet::errors::ParquetError>(())
/// ```
///
/// Specify additional authenticated data, used to protect against data replacement.
/// This must match the AAD prefix provided when the file was written, otherwise
/// data decryption will fail.
/// ```
/// # use parquet::encryption::decrypt::FileDecryptionProperties;
/// let file_encryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
/// .with_aad_prefix("example_file".into())
/// .build()?;
/// # Ok::<(), parquet::errors::ParquetError>(())
/// ```
#[derive(Clone, PartialEq)]
pub struct FileDecryptionProperties {
keys: DecryptionKeys,
Expand Down Expand Up @@ -277,6 +384,8 @@ impl std::fmt::Debug for FileDecryptionProperties {
}

/// Builder for [`FileDecryptionProperties`]
///
/// See [`FileDecryptionProperties`] for example usage.
pub struct DecryptionPropertiesBuilder {
footer_key: Option<Vec<u8>>,
key_retriever: Option<Arc<dyn KeyRetriever>>,
Expand Down
37 changes: 37 additions & 0 deletions parquet/src/encryption/encrypt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,41 @@ impl EncryptionKey {

#[derive(Debug, Clone, PartialEq)]
/// Defines how data in a Parquet file should be encrypted
///
/// The `FileEncryptionProperties` should be included in the [`WriterProperties`](crate::file::properties::WriterProperties)
/// used to write a file by using [`WriterPropertiesBuilder::with_file_encryption_properties`](crate::file::properties::WriterPropertiesBuilder::with_file_encryption_properties).
///
/// # Examples
///
/// Create `FileEncryptionProperties` for a file encrypted with uniform encryption,
/// where all metadata and data are encrypted with the footer key:
/// ```
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
/// let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
/// .build()?;
/// # Ok::<(), parquet::errors::ParquetError>(())
/// ```
///
/// Create properties for a file where columns are encrypted with different keys.
/// Any columns without a key specified will be unencrypted:
/// ```
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
/// let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
/// .with_column_key("x", b"1234567890123450".into())
/// .with_column_key("y", b"1234567890123451".into())
/// .build()?;
/// # Ok::<(), parquet::errors::ParquetError>(())
/// ```
///
/// Specify additional authenticated data, used to protect against data replacement.
/// This should represent the file identity:
/// ```
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
/// let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
/// .with_aad_prefix("example_file".into())
/// .build()?;
/// # Ok::<(), parquet::errors::ParquetError>(())
/// ```
pub struct FileEncryptionProperties {
encrypt_footer: bool,
footer_key: EncryptionKey,
Expand Down Expand Up @@ -141,6 +176,8 @@ impl FileEncryptionProperties {
}

/// Builder for [`FileEncryptionProperties`]
///
/// See [`FileEncryptionProperties`] for example usage.
pub struct EncryptionPropertiesBuilder {
encrypt_footer: bool,
footer_key: EncryptionKey,
Expand Down
92 changes: 90 additions & 2 deletions parquet/src/encryption/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,96 @@
// specific language governing permissions and limitations
// under the License.

//! Encryption implementation specific to Parquet, as described
//! in the [spec](https://github.com/apache/parquet-format/blob/master/Encryption.md).
//! This module implements Parquet Modular Encryption, as described in the
//! [specification](https://github.com/apache/parquet-format/blob/master/Encryption.md).
//!
//! # Example of writing and reading an encrypted Parquet file
//!
//! ```
//! use arrow::array::{ArrayRef, Float32Array, Int32Array, RecordBatch};
//! use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
//! use parquet::arrow::ArrowWriter;
//! use parquet::encryption::decrypt::FileDecryptionProperties;
//! use parquet::encryption::encrypt::FileEncryptionProperties;
//! use parquet::errors::Result;
//! use parquet::file::properties::WriterProperties;
//! use std::fs::File;
//! use std::sync::Arc;
//! use tempfile::TempDir;
//!
//! // Define 16 byte AES encryption keys to use.
//! static FOOTER_KEY: &[u8; 16] = b"0123456789012345";
//! static COLUMN_KEY_1: &[u8; 16] = b"1234567890123450";
//! static COLUMN_KEY_2: &[u8; 16] = b"1234567890123451";
//!
//! let temp_dir = TempDir::new()?;
//! let file_path = temp_dir.path().join("encrypted_example.parquet");
//!
//! // Create file encryption properties, which define how the file is encrypted.
//! // We will specify a key to encrypt the footer metadata,
//! // then separate keys for different columns.
//! // This allows fine-grained control of access to different columns within a Parquet file.
//! // Note that any columns without an encryption key specified will be left un-encrypted.
//! // If only a footer key is specified, then all columns are encrypted with the footer key.
//! let encryption_properties = FileEncryptionProperties::builder(FOOTER_KEY.into())
//! .with_column_key("x", COLUMN_KEY_1.into())
//! .with_column_key("y", COLUMN_KEY_2.into())
//! // We also set an AAD prefix, which is optional.
//! // This contributes to the "additional authenticated data" that is used to verify file
//! // integrity and prevents data being swapped with data encrypted with the same key.
//! .with_aad_prefix(b"example_aad".into())
//! // Specify that the AAD prefix is stored in the file, so readers don't need
//! // to provide it to read the data, but can optionally provide it if they want to
//! // verify file integrity.
//! .with_aad_prefix_storage(true)
//! .build()?;
//!
//! let writer_properties = WriterProperties::builder()
//! .with_file_encryption_properties(encryption_properties)
//! .build();
//!
//! // Write the encrypted Parquet file
//! {
//! let file = File::create(&file_path)?;
//!
//! let ids = Int32Array::from(vec![0, 1, 2, 3, 4, 5]);
//! let x_vals = Float32Array::from(vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.5]);
//! let y_vals = Float32Array::from(vec![1.0, 1.1, 1.2, 1.3, 1.4, 1.5]);
//! let batch = RecordBatch::try_from_iter(vec![
//! ("id", Arc::new(ids) as ArrayRef),
//! ("x", Arc::new(x_vals) as ArrayRef),
//! ("y", Arc::new(y_vals) as ArrayRef),
//! ])?;
//!
//! let mut writer = ArrowWriter::try_new(file, batch.schema(), Some(writer_properties))?;
//!
//! writer.write(&batch)?;
//! writer.close()?;
//! }
//!
//! // In order to read the encrypted Parquet file, we need to know the encryption
//! // keys used to encrypt it.
//! // We don't need to provide the AAD prefix as it was stored in the file metadata,
//! // but we could specify it here if we wanted to verify the file hasn't been tampered with:
//! let decryption_properties = FileDecryptionProperties::builder(FOOTER_KEY.into())
//! .with_column_key("x", COLUMN_KEY_1.into())
//! .with_column_key("y", COLUMN_KEY_2.into())
//! .build()?;
//!
//! let reader_options =
//! ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
//!
//! // Read the file using the configured decryption properties
//! let file = File::open(&file_path)?;
//!
//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, reader_options)?;
//! let record_reader = builder.build()?;
//! for batch in record_reader {
//! let batch = batch?;
//! println!("Read batch: {batch:?}");
//! }
//! # Ok::<(), parquet::errors::ParquetError>(())
//! ```

pub(crate) mod ciphers;
pub mod decrypt;
Expand Down
Loading