Skip to content

Commit 4916e89

Browse files
Boolean parquet get datapage stat (#11054)
* test and implement boolean data page statistics * left out a collect & forgot to change the Check to Both * Update datafusion/core/src/datasource/physical_plan/parquet/statistics.rs --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent ce4940d commit 4916e89

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

datafusion/core/src/datasource/physical_plan/parquet/statistics.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,18 @@ macro_rules! make_data_page_stats_iterator {
549549
};
550550
}
551551

552+
make_data_page_stats_iterator!(
553+
MinBooleanDataPageStatsIterator,
554+
|x: &PageIndex<bool>| { x.min },
555+
Index::BOOLEAN,
556+
bool
557+
);
558+
make_data_page_stats_iterator!(
559+
MaxBooleanDataPageStatsIterator,
560+
|x: &PageIndex<bool>| { x.max },
561+
Index::BOOLEAN,
562+
bool
563+
);
552564
make_data_page_stats_iterator!(
553565
MinInt32DataPageStatsIterator,
554566
|x: &PageIndex<i32>| { x.min },
@@ -613,6 +625,15 @@ macro_rules! get_data_page_statistics {
613625
($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
614626
paste! {
615627
match $data_type {
628+
Some(DataType::Boolean) => Ok(Arc::new(
629+
BooleanArray::from_iter(
630+
[<$stat_type_prefix BooleanDataPageStatsIterator>]::new($iterator)
631+
.flatten()
632+
// BooleanArray::from_iter required a sized iterator, so collect into Vec first
633+
.collect::<Vec<_>>()
634+
.into_iter()
635+
)
636+
)),
616637
Some(DataType::UInt8) => Ok(Arc::new(
617638
UInt8Array::from_iter(
618639
[<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
@@ -778,6 +799,11 @@ where
778799
{
779800
let iter = iterator.flat_map(|(len, index)| match index {
780801
Index::NONE => vec![None; len],
802+
Index::BOOLEAN(native_index) => native_index
803+
.indexes
804+
.iter()
805+
.map(|x| x.null_count.map(|x| x as u64))
806+
.collect::<Vec<_>>(),
781807
Index::INT32(native_index) => native_index
782808
.indexes
783809
.iter()

datafusion/core/tests/parquet/arrow_statistics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1953,7 +1953,7 @@ async fn test_boolean() {
19531953
expected_null_counts: UInt64Array::from(vec![1, 0]),
19541954
expected_row_counts: Some(UInt64Array::from(vec![5, 5])),
19551955
column_name: "bool",
1956-
check: Check::RowGroup,
1956+
check: Check::Both,
19571957
}
19581958
.run();
19591959
}

0 commit comments

Comments
 (0)