@@ -42,6 +42,10 @@ mod filter;
42
42
mod selection;
43
43
pub mod statistics;
44
44
45
+ use crate :: file:: footer;
46
+ use crate :: file:: page_index:: index_reader;
47
+ use crate :: encryption:: ciphers:: FileDecryptionProperties ;
48
+
45
49
/// Builder for constructing parquet readers into arrow.
46
50
///
47
51
/// Most users should use one of the following specializations:
@@ -317,7 +321,7 @@ impl ArrowReaderOptions {
317
321
///
318
322
/// // Create the reader and read the data using the supplied schema.
319
323
/// let mut reader = builder.build().unwrap();
320
- /// let _batch = reader.next().unwrap().unwrap();
324
+ /// let _batch = reader.next().unwrap().unwrap();
321
325
/// ```
322
326
pub fn with_schema ( self , schema : SchemaRef ) -> Self {
323
327
Self {
@@ -369,6 +373,35 @@ pub struct ArrowReaderMetadata {
369
373
}
370
374
371
375
impl ArrowReaderMetadata {
376
+ /// Loads [`ArrowReaderMetadata`] from the provided [`ChunkReader`]
377
+ ///
378
+ /// See [`ParquetRecordBatchReaderBuilder::new_with_metadata`] for how this can be used
379
+ pub fn load2 < T : ChunkReader > ( reader : & T , options : ArrowReaderOptions ) -> Result < Self > {
380
+ Self :: load_with_decryption ( reader, options, FileDecryptionProperties :: builder ( ) . build ( ) )
381
+ }
382
+
383
+ pub fn load_with_decryption < T : ChunkReader > ( reader : & T , options : ArrowReaderOptions ,
384
+ file_decryption_properties : FileDecryptionProperties ) -> Result < Self > {
385
+ let mut metadata = footer:: parse_metadata_with_decryption ( reader, file_decryption_properties) ?;
386
+ if options. page_index {
387
+ let column_index = metadata
388
+ . row_groups ( )
389
+ . iter ( )
390
+ . map ( |rg| index_reader:: read_columns_indexes ( reader, rg. columns ( ) ) )
391
+ . collect :: < Result < Vec < _ > > > ( ) ?;
392
+ metadata. set_column_index ( Some ( column_index) ) ;
393
+
394
+ let offset_index = metadata
395
+ . row_groups ( )
396
+ . iter ( )
397
+ . map ( |rg| index_reader:: read_offset_indexes ( reader, rg. columns ( ) ) )
398
+ . collect :: < Result < Vec < _ > > > ( ) ?;
399
+
400
+ metadata. set_offset_index ( Some ( offset_index) )
401
+ }
402
+ Self :: try_new ( Arc :: new ( metadata) , options)
403
+ }
404
+
372
405
/// Loads [`ArrowReaderMetadata`] from the provided [`ChunkReader`], if necessary
373
406
///
374
407
/// See [`ParquetRecordBatchReaderBuilder::new_with_metadata`] for an
@@ -532,6 +565,11 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
532
565
Ok ( Self :: new_with_metadata ( reader, metadata) )
533
566
}
534
567
568
+ pub fn try_new_with_decryption ( reader : T , options : ArrowReaderOptions , file_decryption_properties : FileDecryptionProperties ) -> Result < Self > {
569
+ let metadata = ArrowReaderMetadata :: load_with_decryption ( & reader, options, file_decryption_properties) ?;
570
+ Ok ( Self :: new_with_metadata ( reader, metadata) )
571
+ }
572
+
535
573
/// Create a [`ParquetRecordBatchReaderBuilder`] from the provided [`ArrowReaderMetadata`]
536
574
///
537
575
/// This interface allows:
@@ -788,6 +826,13 @@ impl ParquetRecordBatchReader {
788
826
. build ( )
789
827
}
790
828
829
+ pub fn try_new_with_decryption < T : ChunkReader + ' static > ( reader : T , batch_size : usize ,
830
+ file_decryption_properties : FileDecryptionProperties ) -> Result < Self > {
831
+ ParquetRecordBatchReaderBuilder :: try_new_with_decryption ( reader, Default :: default ( ) , file_decryption_properties) ?
832
+ . with_batch_size ( batch_size)
833
+ . build ( )
834
+ }
835
+
791
836
/// Create a new [`ParquetRecordBatchReader`] from the provided [`RowGroups`]
792
837
///
793
838
/// Note: this is a low-level interface see [`ParquetRecordBatchReader::try_new`] for a
@@ -955,6 +1000,7 @@ mod tests {
955
1000
BoolType , ByteArray , ByteArrayType , DataType , FixedLenByteArray , FixedLenByteArrayType ,
956
1001
FloatType , Int32Type , Int64Type , Int96Type ,
957
1002
} ;
1003
+ use crate :: encryption:: ciphers;
958
1004
use crate :: errors:: Result ;
959
1005
use crate :: file:: properties:: { EnabledStatistics , WriterProperties , WriterVersion } ;
960
1006
use crate :: file:: writer:: SerializedFileWriter ;
@@ -1663,6 +1709,23 @@ mod tests {
1663
1709
assert ! ( col. value( 2 ) . is_nan( ) ) ;
1664
1710
}
1665
1711
1712
+ #[ test]
1713
+ fn test_uniform_encryption ( ) {
1714
+ let path = format ! (
1715
+ "{}/uniform_encryption.parquet.encrypted" ,
1716
+ arrow:: util:: test_util:: parquet_test_data( ) ,
1717
+ ) ;
1718
+ let file = File :: open ( path) . unwrap ( ) ;
1719
+ // todo
1720
+ let key_code: & [ u8 ] = "0123456789012345" . as_bytes ( ) ;
1721
+ // todo
1722
+ let decryption_properties = ciphers:: FileDecryptionProperties :: builder ( )
1723
+ . with_footer_key ( key_code. to_vec ( ) )
1724
+ . build ( ) ;
1725
+ let record_reader = ParquetRecordBatchReader :: try_new_with_decryption ( file, 128 , decryption_properties) . unwrap ( ) ;
1726
+ // todo check contents
1727
+ }
1728
+
1666
1729
#[ test]
1667
1730
fn test_read_float32_float64_byte_stream_split ( ) {
1668
1731
let path = format ! (
0 commit comments