Skip to content

Commit 19e13b8

Browse files
tshauckfindepi
authored andcommitted
feat: support uint data page extraction (apache#11018)
1 parent b7c9338 commit 19e13b8

File tree

2 files changed

+46
-4
lines changed

2 files changed

+46
-4
lines changed

datafusion/core/src/datasource/physical_plan/parquet/statistics.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,48 @@ macro_rules! get_data_page_statistics {
613613
($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
614614
paste! {
615615
match $data_type {
616+
Some(DataType::UInt8) => Ok(Arc::new(
617+
UInt8Array::from_iter(
618+
[<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
619+
.map(|x| {
620+
x.into_iter().filter_map(|x| {
621+
x.and_then(|x| u8::try_from(x).ok())
622+
})
623+
})
624+
.flatten()
625+
)
626+
)),
627+
Some(DataType::UInt16) => Ok(Arc::new(
628+
UInt16Array::from_iter(
629+
[<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
630+
.map(|x| {
631+
x.into_iter().filter_map(|x| {
632+
x.and_then(|x| u16::try_from(x).ok())
633+
})
634+
})
635+
.flatten()
636+
)
637+
)),
638+
Some(DataType::UInt32) => Ok(Arc::new(
639+
UInt32Array::from_iter(
640+
[<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
641+
.map(|x| {
642+
x.into_iter().filter_map(|x| {
643+
x.and_then(|x| u32::try_from(x).ok())
644+
})
645+
})
646+
.flatten()
647+
))),
648+
Some(DataType::UInt64) => Ok(Arc::new(
649+
UInt64Array::from_iter(
650+
[<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator)
651+
.map(|x| {
652+
x.into_iter().filter_map(|x| {
653+
x.and_then(|x| u64::try_from(x).ok())
654+
})
655+
})
656+
.flatten()
657+
))),
616658
Some(DataType::Int8) => Ok(Arc::new(
617659
Int8Array::from_iter(
618660
[<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)

datafusion/core/tests/parquet/arrow_statistics.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,7 +1352,7 @@ async fn test_uint() {
13521352
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
13531353
expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
13541354
column_name: "u8",
1355-
check: Check::RowGroup,
1355+
check: Check::Both,
13561356
}
13571357
.run();
13581358

@@ -1363,7 +1363,7 @@ async fn test_uint() {
13631363
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
13641364
expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
13651365
column_name: "u16",
1366-
check: Check::RowGroup,
1366+
check: Check::Both,
13671367
}
13681368
.run();
13691369

@@ -1374,7 +1374,7 @@ async fn test_uint() {
13741374
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
13751375
expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
13761376
column_name: "u32",
1377-
check: Check::RowGroup,
1377+
check: Check::Both,
13781378
}
13791379
.run();
13801380

@@ -1385,7 +1385,7 @@ async fn test_uint() {
13851385
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
13861386
expected_row_counts: Some(UInt64Array::from(vec![4, 4, 4, 4, 4])),
13871387
column_name: "u64",
1388-
check: Check::RowGroup,
1388+
check: Check::Both,
13891389
}
13901390
.run();
13911391
}

0 commit comments

Comments
 (0)