Skip to content

Commit 9f663ba

Browse files
ggershinskyrok
authored andcommitted
first commit
1 parent 853626e commit 9f663ba

File tree

7 files changed

+636
-3
lines changed

7 files changed

+636
-3
lines changed

parquet/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ paste = { version = "1.0" }
6969
half = { version = "2.1", default-features = false, features = ["num-traits"] }
7070
sysinfo = { version = "0.32.0", optional = true, default-features = false, features = ["system"] }
7171
crc32fast = { version = "1.4.2", optional = true, default-features = false }
72+
ring = { version = "0.17", default-features = false, features = ["std"]}
7273

7374
[dev-dependencies]
7475
base64 = { version = "0.22", default-features = false, features = ["std"] }
@@ -120,6 +121,7 @@ zstd = ["dep:zstd", "zstd-sys"]
120121
sysinfo = ["dep:sysinfo"]
121122
# Verify 32-bit CRC checksum when decoding parquet pages
122123
crc = ["dep:crc32fast"]
124+
#encryption = ["aes-gcm", "base64"]
123125

124126

125127
[[example]]

parquet/src/arrow/arrow_reader/mod.rs

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ mod filter;
4242
mod selection;
4343
pub mod statistics;
4444

45+
use crate::file::footer;
46+
use crate::file::page_index::index_reader;
47+
use crate::encryption::ciphers::FileDecryptionProperties;
48+
4549
/// Builder for constructing parquet readers into arrow.
4650
///
4751
/// Most users should use one of the following specializations:
@@ -317,7 +321,7 @@ impl ArrowReaderOptions {
317321
///
318322
/// // Create the reader and read the data using the supplied schema.
319323
/// let mut reader = builder.build().unwrap();
320-
/// let _batch = reader.next().unwrap().unwrap();
324+
/// let _batch = reader.next().unwrap().unwrap();
321325
/// ```
322326
pub fn with_schema(self, schema: SchemaRef) -> Self {
323327
Self {
@@ -369,6 +373,35 @@ pub struct ArrowReaderMetadata {
369373
}
370374

371375
impl ArrowReaderMetadata {
376+
/// Loads [`ArrowReaderMetadata`] from the provided [`ChunkReader`]
377+
///
378+
/// See [`ParquetRecordBatchReaderBuilder::new_with_metadata`] for how this can be used
379+
pub fn load2<T: ChunkReader>(reader: &T, options: ArrowReaderOptions) -> Result<Self> {
380+
Self::load_with_decryption(reader, options, FileDecryptionProperties::builder().build())
381+
}
382+
383+
pub fn load_with_decryption<T: ChunkReader>(reader: &T, options: ArrowReaderOptions,
384+
file_decryption_properties: FileDecryptionProperties) -> Result<Self> {
385+
let mut metadata = footer::parse_metadata_with_decryption(reader, file_decryption_properties)?;
386+
if options.page_index {
387+
let column_index = metadata
388+
.row_groups()
389+
.iter()
390+
.map(|rg| index_reader::read_columns_indexes(reader, rg.columns()))
391+
.collect::<Result<Vec<_>>>()?;
392+
metadata.set_column_index(Some(column_index));
393+
394+
let offset_index = metadata
395+
.row_groups()
396+
.iter()
397+
.map(|rg| index_reader::read_offset_indexes(reader, rg.columns()))
398+
.collect::<Result<Vec<_>>>()?;
399+
400+
metadata.set_offset_index(Some(offset_index))
401+
}
402+
Self::try_new(Arc::new(metadata), options)
403+
}
404+
372405
/// Loads [`ArrowReaderMetadata`] from the provided [`ChunkReader`], if necessary
373406
///
374407
/// See [`ParquetRecordBatchReaderBuilder::new_with_metadata`] for an
@@ -532,6 +565,11 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
532565
Ok(Self::new_with_metadata(reader, metadata))
533566
}
534567

568+
pub fn try_new_with_decryption(reader: T, options: ArrowReaderOptions, file_decryption_properties: FileDecryptionProperties) -> Result<Self> {
569+
let metadata = ArrowReaderMetadata::load_with_decryption(&reader, options, file_decryption_properties)?;
570+
Ok(Self::new_with_metadata(reader, metadata))
571+
}
572+
535573
/// Create a [`ParquetRecordBatchReaderBuilder`] from the provided [`ArrowReaderMetadata`]
536574
///
537575
/// This interface allows:
@@ -788,6 +826,13 @@ impl ParquetRecordBatchReader {
788826
.build()
789827
}
790828

829+
pub fn try_new_with_decryption<T: ChunkReader + 'static>(reader: T, batch_size: usize,
830+
file_decryption_properties: FileDecryptionProperties) -> Result<Self> {
831+
ParquetRecordBatchReaderBuilder::try_new_with_decryption(reader, Default::default(), file_decryption_properties)?
832+
.with_batch_size(batch_size)
833+
.build()
834+
}
835+
791836
/// Create a new [`ParquetRecordBatchReader`] from the provided [`RowGroups`]
792837
///
793838
/// Note: this is a low-level interface see [`ParquetRecordBatchReader::try_new`] for a
@@ -955,6 +1000,7 @@ mod tests {
9551000
BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType,
9561001
FloatType, Int32Type, Int64Type, Int96Type,
9571002
};
1003+
use crate::encryption::ciphers;
9581004
use crate::errors::Result;
9591005
use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
9601006
use crate::file::writer::SerializedFileWriter;
@@ -1663,6 +1709,23 @@ mod tests {
16631709
assert!(col.value(2).is_nan());
16641710
}
16651711

1712+
#[test]
1713+
fn test_uniform_encryption() {
1714+
let path = format!(
1715+
"{}/uniform_encryption.parquet.encrypted",
1716+
arrow::util::test_util::parquet_test_data(),
1717+
);
1718+
let file = File::open(path).unwrap();
1719+
// todo
1720+
let key_code: &[u8] = "0123456789012345".as_bytes();
1721+
// todo
1722+
let decryption_properties = ciphers::FileDecryptionProperties::builder()
1723+
.with_footer_key(key_code.to_vec())
1724+
.build();
1725+
let record_reader = ParquetRecordBatchReader::try_new_with_decryption(file, 128, decryption_properties).unwrap();
1726+
// todo check contents
1727+
}
1728+
16661729
#[test]
16671730
fn test_read_float32_float64_byte_stream_split() {
16681731
let path = format!(

0 commit comments

Comments
 (0)