diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index 3be060ce6180..43483a0b6659 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -549,6 +549,18 @@ macro_rules! make_data_page_stats_iterator { }; } +make_data_page_stats_iterator!( + MinBooleanDataPageStatsIterator, + |x: &PageIndex| { x.min }, + Index::BOOLEAN, + bool +); +make_data_page_stats_iterator!( + MaxBooleanDataPageStatsIterator, + |x: &PageIndex| { x.max }, + Index::BOOLEAN, + bool +); make_data_page_stats_iterator!( MinInt32DataPageStatsIterator, |x: &PageIndex| { x.min }, @@ -613,6 +625,15 @@ macro_rules! get_data_page_statistics { ($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => { paste! { match $data_type { + Some(DataType::Boolean) => Ok(Arc::new( + BooleanArray::from_iter( + [<$stat_type_prefix BooleanDataPageStatsIterator>]::new($iterator) + .flatten() + // BooleanArray::from_iter required a sized iterator, so collect into Vec first + .collect::>() + .into_iter() + ) + )), Some(DataType::UInt8) => Ok(Arc::new( UInt8Array::from_iter( [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator) @@ -778,6 +799,11 @@ where { let iter = iterator.flat_map(|(len, index)| match index { Index::NONE => vec![None; len], + Index::BOOLEAN(native_index) => native_index + .indexes + .iter() + .map(|x| x.null_count.map(|x| x as u64)) + .collect::>(), Index::INT32(native_index) => native_index .indexes .iter() diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index ddb39fce4076..cd6985b311c3 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -1953,7 +1953,7 @@ async fn test_boolean() { expected_null_counts: UInt64Array::from(vec![1, 0]), expected_row_counts: Some(UInt64Array::from(vec![5, 5])), column_name: "bool", - check: Check::RowGroup, + check: Check::Both, } .run(); }