Skip to content

Commit cba5551

Browse files
committed
Get column path from descriptor rather than Arrow schema
1 parent 7c63618 commit cba5551

File tree

4 files changed

+23
-26
lines changed

4 files changed

+23
-26
lines changed

parquet/src/arrow/arrow_writer/mod.rs

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -821,11 +821,8 @@ fn get_column_writers_with_encryptor(
821821
) -> Result<Vec<ArrowColumnWriter>> {
822822
let mut writers = Vec::with_capacity(arrow.fields.len());
823823
let mut leaves = parquet.columns().iter();
824-
let column_factory = ArrowColumnWriterFactory::new().with_file_encryptor(
825-
row_group_index,
826-
file_encryptor,
827-
arrow.clone(),
828-
);
824+
let column_factory =
825+
ArrowColumnWriterFactory::new().with_file_encryptor(row_group_index, file_encryptor);
829826
for field in &arrow.fields {
830827
column_factory.get_arrow_column_writer(
831828
field.data_type(),
@@ -843,8 +840,6 @@ struct ArrowColumnWriterFactory {
843840
row_group_index: usize,
844841
#[cfg(feature = "encryption")]
845842
file_encryptor: Option<Arc<FileEncryptor>>,
846-
#[cfg(feature = "encryption")]
847-
schema_ref: Option<SchemaRef>,
848843
}
849844

850845
impl ArrowColumnWriterFactory {
@@ -854,8 +849,6 @@ impl ArrowColumnWriterFactory {
854849
row_group_index: 0,
855850
#[cfg(feature = "encryption")]
856851
file_encryptor: None,
857-
#[cfg(feature = "encryption")]
858-
schema_ref: None,
859852
}
860853
}
861854

@@ -864,28 +857,31 @@ impl ArrowColumnWriterFactory {
864857
mut self,
865858
row_group_index: usize,
866859
file_encryptor: Option<Arc<FileEncryptor>>,
867-
schema_ref: SchemaRef,
868860
) -> Self {
869861
self.row_group_index = row_group_index;
870862
self.file_encryptor = file_encryptor;
871-
self.schema_ref = Some(schema_ref);
872863
self
873864
}
874865

875-
// todo: add column path
876866
#[cfg(feature = "encryption")]
877-
fn create_page_writer(&self, column_index: usize) -> Box<ArrowPageWriter> {
867+
fn create_page_writer(
868+
&self,
869+
column_descriptor: &ColumnDescPtr,
870+
column_index: usize,
871+
) -> Box<ArrowPageWriter> {
878872
let page_encryptor = self.file_encryptor.as_ref().map(|fe| {
879-
let binding = self.schema_ref.clone().unwrap();
880-
let column_path = binding.field(column_index).name().as_bytes().to_vec();
881-
873+
let column_path = column_descriptor.path().string();
882874
PageEncryptor::new(fe.clone(), self.row_group_index, column_index, column_path)
883875
});
884876
Box::new(ArrowPageWriter::default().with_encryptor(page_encryptor))
885877
}
886878

887879
#[cfg(not(feature = "encryption"))]
888-
fn create_page_writer(&self, _column_index: usize) -> Box<ArrowPageWriter> {
880+
fn create_page_writer(
881+
&self,
882+
_column_descriptor: &ColumnDescPtr,
883+
_column_index: usize,
884+
) -> Box<ArrowPageWriter> {
889885
Box::<ArrowPageWriter>::default()
890886
}
891887

@@ -897,7 +893,7 @@ impl ArrowColumnWriterFactory {
897893
out: &mut Vec<ArrowColumnWriter>,
898894
) -> Result<()> {
899895
let col = |desc: &ColumnDescPtr| {
900-
let page_writer = self.create_page_writer(out.len());
896+
let page_writer = self.create_page_writer(desc, out.len());
901897
let chunk = page_writer.buffer.clone();
902898
let writer = get_column_writer(desc.clone(), props.clone(), page_writer);
903899
ArrowColumnWriter {
@@ -907,7 +903,7 @@ impl ArrowColumnWriterFactory {
907903
};
908904

909905
let bytes = |desc: &ColumnDescPtr| {
910-
let page_writer = self.create_page_writer(out.len());
906+
let page_writer = self.create_page_writer(desc, out.len());
911907
let chunk = page_writer.buffer.clone();
912908
let writer = GenericColumnWriter::new(desc.clone(), props.clone(), page_writer);
913909
ArrowColumnWriter {

parquet/src/encryption/encrypt.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,14 @@ impl FileEncryptor {
170170
RingGcmBlockEncryptor::new(&self.properties.footer_key.key)
171171
}
172172

173-
pub(crate) fn get_column_encryptor(&self, column_path: &Vec<u8>) -> RingGcmBlockEncryptor {
173+
pub(crate) fn get_column_encryptor(&self, column_path: &str) -> RingGcmBlockEncryptor {
174174
if self.properties.column_keys.is_empty() {
175175
return RingGcmBlockEncryptor::new(self.properties.footer_key.key());
176176
}
177+
// TODO: Column paths should be stored as String
178+
let column_path = column_path.as_bytes();
177179
match self.properties.column_keys.get(column_path) {
178180
None => todo!("Handle unencrypted columns"),
179-
// None => RingGcmBlockEncryptor::new(self.properties.footer_key.key()),
180181
Some(column_key) => RingGcmBlockEncryptor::new(column_key.key()),
181182
}
182183
}

parquet/src/encryption/page_encryptor.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,23 @@ pub struct PageEncryptor {
2929
file_encryptor: Arc<FileEncryptor>,
3030
row_group_index: usize,
3131
column_index: usize,
32+
column_path: String,
3233
page_index: usize,
33-
column_path: Vec<u8>,
3434
}
3535

3636
impl PageEncryptor {
3737
pub fn new(
3838
file_encryptor: Arc<FileEncryptor>,
3939
row_group_index: usize,
4040
column_index: usize,
41-
column_path: Vec<u8>,
41+
column_path: String,
4242
) -> Self {
4343
Self {
4444
file_encryptor,
4545
row_group_index,
4646
column_index,
47-
page_index: 0,
4847
column_path,
48+
page_index: 0,
4949
}
5050
}
5151

@@ -66,7 +66,7 @@ impl PageEncryptor {
6666
self.column_index,
6767
Some(self.page_index),
6868
)?;
69-
let mut encryptor = self.file_encryptor.get_footer_encryptor();
69+
let mut encryptor = self.file_encryptor.get_column_encryptor(&self.column_path);
7070
// todo: use column encryptor when needed
7171
// self.file_encryptor.get_column_encryptor(self.column_path.as_ref())
7272
let encrypted_buffer = encryptor.encrypt(page.data(), &aad);

parquet/src/file/writer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ impl<'a, W: Write + Send> SerializedRowGroupWriter<'a, W> {
596596
file_encryptor,
597597
row_group_index,
598598
column_index,
599-
column.path().string().into_bytes(),
599+
column.path().string(),
600600
)
601601
});
602602

0 commit comments

Comments
 (0)