Skip to content

Commit e8a0b7f

Browse files
committed
add to_thrift to NativeIndex in prep for apache#6000
1 parent 2f7a9ac commit e8a0b7f

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

parquet/src/file/page_index/index.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,55 @@ impl<T: ParquetValueType> NativeIndex<T> {
225225
boundary_order: index.boundary_order,
226226
})
227227
}
228+
229+
// TODO: remove annotation after merge with #6000
230+
#[allow(dead_code)]
231+
pub(crate) fn to_thrift(&self) -> ColumnIndex {
232+
let min_values = self
233+
.indexes
234+
.iter()
235+
.map(|x| x.min_bytes().map(|x| x.to_vec()))
236+
.collect::<Option<Vec<_>>>()
237+
.unwrap_or_else(|| vec![vec![]; self.indexes.len()]);
238+
239+
let max_values = self
240+
.indexes
241+
.iter()
242+
.map(|x| x.max_bytes().map(|x| x.to_vec()))
243+
.collect::<Option<Vec<_>>>()
244+
.unwrap_or_else(|| vec![vec![]; self.indexes.len()]);
245+
246+
let null_counts = self
247+
.indexes
248+
.iter()
249+
.map(|x| x.null_count())
250+
.collect::<Option<Vec<_>>>();
251+
252+
// Concatenate page histograms into a single Option<Vec>
253+
let repetition_level_histograms = self
254+
.indexes
255+
.iter()
256+
.map(|x| x.repetition_level_histogram().map(|v| v.values()))
257+
.collect::<Option<Vec<&[i64]>>>()
258+
.map(|hists| hists.concat());
259+
260+
let definition_level_histograms = self
261+
.indexes
262+
.iter()
263+
.map(|x| x.definition_level_histogram().map(|v| v.values()))
264+
.collect::<Option<Vec<&[i64]>>>()
265+
.map(|hists| hists.concat());
266+
267+
ColumnIndex::new(
268+
self.indexes.iter().map(|x| x.min().is_none()).collect(),
269+
min_values,
270+
max_values,
271+
self.boundary_order,
272+
null_counts,
273+
repetition_level_histograms,
274+
definition_level_histograms,
275+
)
276+
}
228277
}
229278

230279
#[cfg(test)]

parquet/src/file/page_index/offset_index.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ impl OffsetIndexMetaData {
4848
self.unencoded_byte_array_data_bytes.as_ref()
4949
}
5050

51-
// TODO: remove annotation after merge
51+
// TODO: remove annotation after merge after merge with #6000
5252
#[allow(dead_code)]
5353
pub(crate) fn to_thrift(&self) -> OffsetIndex {
5454
OffsetIndex::new(

0 commit comments

Comments
 (0)