Skip to content

Commit ad56b7e

Browse files
authored
Support Date Parquet Data Page Statistics (#11135)
* add Date Parquet Data Page Statistics * indentation for readability
1 parent 0c4e4a1 commit ad56b7e

File tree

2 files changed

+16
-2
lines changed

2 files changed

+16
-2
lines changed

datafusion/core/src/datasource/physical_plan/parquet/statistics.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,20 @@ macro_rules! get_data_page_statistics {
701701
TimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
702702
})
703703
},
704+
Some(DataType::Date32) => Ok(Arc::new(Date32Array::from_iter([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten()))),
705+
Some(DataType::Date64) => Ok(
706+
Arc::new(
707+
Date64Array::from([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
708+
.map(|x| {
709+
x.into_iter()
710+
.filter_map(|x| {
711+
x.and_then(|x| i64::try_from(x).ok())
712+
})
713+
.map(|x| x * 24 * 60 * 60 * 1000)
714+
}).flatten().collect::<Vec<_>>()
715+
)
716+
)
717+
),
704718
_ => unimplemented!()
705719
}
706720
}

datafusion/core/tests/parquet/arrow_statistics.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,7 @@ async fn test_dates_32_diff_rg_sizes() {
11811181
// row counts are [13, 7]
11821182
expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
11831183
column_name: "date32",
1184-
check: Check::RowGroup,
1184+
check: Check::Both,
11851185
}
11861186
.run();
11871187
}
@@ -1324,7 +1324,7 @@ async fn test_dates_64_diff_rg_sizes() {
13241324
expected_null_counts: UInt64Array::from(vec![2, 2]),
13251325
expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
13261326
column_name: "date64",
1327-
check: Check::RowGroup,
1327+
check: Check::Both,
13281328
}
13291329
.run();
13301330
}

0 commit comments

Comments
 (0)