Skip to content

Commit 0922d4a

Browse files
efredineEric Fredine
and
Eric Fredine
authored
Minor: clean up data page statistics tests and fix bugs (#11236)
* Change data page statistics to Check::Both for most remaining tests. Binary data still incomplete. Struct not implemented. Two failing tests that need further investigation. * Enables Check::Both for test_numeric_limits_unsigned and fixes broken tests, though uncertain why the tests were failing before the change. --------- Co-authored-by: Eric Fredine <[email protected]>
1 parent c6eee61 commit 0922d4a

File tree

2 files changed

+28
-28
lines changed

2 files changed

+28
-28
lines changed

datafusion/core/src/datasource/physical_plan/parquet/statistics.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,7 @@ macro_rules! get_data_page_statistics {
780780
[<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
781781
.map(|x| {
782782
x.into_iter().filter_map(|x| {
783-
x.and_then(|x| u32::try_from(x).ok())
783+
x.and_then(|x| Some(x as u32))
784784
})
785785
})
786786
.flatten()
@@ -790,7 +790,7 @@ macro_rules! get_data_page_statistics {
790790
[<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator)
791791
.map(|x| {
792792
x.into_iter().filter_map(|x| {
793-
x.and_then(|x| u64::try_from(x).ok())
793+
x.and_then(|x| Some(x as u64))
794794
})
795795
})
796796
.flatten()

datafusion/core/tests/parquet/arrow_statistics.rs

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ async fn test_one_row_group_without_null() {
386386
// 3 rows
387387
expected_row_counts: Some(UInt64Array::from(vec![3])),
388388
column_name: "i64",
389-
check: Check::RowGroup,
389+
check: Check::Both,
390390
}
391391
.run()
392392
}
@@ -413,7 +413,7 @@ async fn test_one_row_group_with_null_and_negative() {
413413
// 8 rows
414414
expected_row_counts: Some(UInt64Array::from(vec![8])),
415415
column_name: "i64",
416-
check: Check::RowGroup,
416+
check: Check::Both,
417417
}
418418
.run()
419419
}
@@ -440,7 +440,7 @@ async fn test_two_row_group_with_null() {
440440
// row counts are [10, 5]
441441
expected_row_counts: Some(UInt64Array::from(vec![10, 5])),
442442
column_name: "i64",
443-
check: Check::RowGroup,
443+
check: Check::Both,
444444
}
445445
.run()
446446
}
@@ -467,7 +467,7 @@ async fn test_two_row_groups_with_all_nulls_in_one() {
467467
// row counts are [5, 3]
468468
expected_row_counts: Some(UInt64Array::from(vec![5, 3])),
469469
column_name: "i64",
470-
check: Check::RowGroup,
470+
check: Check::Both,
471471
}
472472
.run()
473473
}
@@ -1408,7 +1408,7 @@ async fn test_int32_range() {
14081408
expected_null_counts: UInt64Array::from(vec![0]),
14091409
expected_row_counts: Some(UInt64Array::from(vec![4])),
14101410
column_name: "i",
1411-
check: Check::RowGroup,
1411+
check: Check::Both,
14121412
}
14131413
.run();
14141414
}
@@ -1431,7 +1431,7 @@ async fn test_uint32_range() {
14311431
expected_null_counts: UInt64Array::from(vec![0]),
14321432
expected_row_counts: Some(UInt64Array::from(vec![4])),
14331433
column_name: "u",
1434-
check: Check::RowGroup,
1434+
check: Check::Both,
14351435
}
14361436
.run();
14371437
}
@@ -1453,7 +1453,7 @@ async fn test_numeric_limits_unsigned() {
14531453
expected_null_counts: UInt64Array::from(vec![0, 0]),
14541454
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
14551455
column_name: "u8",
1456-
check: Check::RowGroup,
1456+
check: Check::Both,
14571457
}
14581458
.run();
14591459

@@ -1464,7 +1464,7 @@ async fn test_numeric_limits_unsigned() {
14641464
expected_null_counts: UInt64Array::from(vec![0, 0]),
14651465
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
14661466
column_name: "u16",
1467-
check: Check::RowGroup,
1467+
check: Check::Both,
14681468
}
14691469
.run();
14701470

@@ -1475,7 +1475,7 @@ async fn test_numeric_limits_unsigned() {
14751475
expected_null_counts: UInt64Array::from(vec![0, 0]),
14761476
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
14771477
column_name: "u32",
1478-
check: Check::RowGroup,
1478+
check: Check::Both,
14791479
}
14801480
.run();
14811481

@@ -1486,7 +1486,7 @@ async fn test_numeric_limits_unsigned() {
14861486
expected_null_counts: UInt64Array::from(vec![0, 0]),
14871487
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
14881488
column_name: "u64",
1489-
check: Check::RowGroup,
1489+
check: Check::Both,
14901490
}
14911491
.run();
14921492
}
@@ -1508,7 +1508,7 @@ async fn test_numeric_limits_signed() {
15081508
expected_null_counts: UInt64Array::from(vec![0, 0]),
15091509
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15101510
column_name: "i8",
1511-
check: Check::RowGroup,
1511+
check: Check::Both,
15121512
}
15131513
.run();
15141514

@@ -1519,7 +1519,7 @@ async fn test_numeric_limits_signed() {
15191519
expected_null_counts: UInt64Array::from(vec![0, 0]),
15201520
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15211521
column_name: "i16",
1522-
check: Check::RowGroup,
1522+
check: Check::Both,
15231523
}
15241524
.run();
15251525

@@ -1530,7 +1530,7 @@ async fn test_numeric_limits_signed() {
15301530
expected_null_counts: UInt64Array::from(vec![0, 0]),
15311531
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15321532
column_name: "i32",
1533-
check: Check::RowGroup,
1533+
check: Check::Both,
15341534
}
15351535
.run();
15361536

@@ -1541,7 +1541,7 @@ async fn test_numeric_limits_signed() {
15411541
expected_null_counts: UInt64Array::from(vec![0, 0]),
15421542
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15431543
column_name: "i64",
1544-
check: Check::RowGroup,
1544+
check: Check::Both,
15451545
}
15461546
.run();
15471547
}
@@ -1563,7 +1563,7 @@ async fn test_numeric_limits_float() {
15631563
expected_null_counts: UInt64Array::from(vec![0, 0]),
15641564
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15651565
column_name: "f32",
1566-
check: Check::RowGroup,
1566+
check: Check::Both,
15671567
}
15681568
.run();
15691569

@@ -1574,7 +1574,7 @@ async fn test_numeric_limits_float() {
15741574
expected_null_counts: UInt64Array::from(vec![0, 0]),
15751575
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15761576
column_name: "f64",
1577-
check: Check::RowGroup,
1577+
check: Check::Both,
15781578
}
15791579
.run();
15801580

@@ -1585,7 +1585,7 @@ async fn test_numeric_limits_float() {
15851585
expected_null_counts: UInt64Array::from(vec![0, 0]),
15861586
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15871587
column_name: "f32_nan",
1588-
check: Check::RowGroup,
1588+
check: Check::Both,
15891589
}
15901590
.run();
15911591

@@ -1596,7 +1596,7 @@ async fn test_numeric_limits_float() {
15961596
expected_null_counts: UInt64Array::from(vec![0, 0]),
15971597
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
15981598
column_name: "f64_nan",
1599-
check: Check::RowGroup,
1599+
check: Check::Both,
16001600
}
16011601
.run();
16021602
}
@@ -1619,7 +1619,7 @@ async fn test_float64() {
16191619
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
16201620
expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
16211621
column_name: "f",
1622-
check: Check::RowGroup,
1622+
check: Check::Both,
16231623
}
16241624
.run();
16251625
}
@@ -1652,7 +1652,7 @@ async fn test_float16() {
16521652
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0]),
16531653
expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5, 5])),
16541654
column_name: "f",
1655-
check: Check::RowGroup,
1655+
check: Check::Both,
16561656
}
16571657
.run();
16581658
}
@@ -1741,7 +1741,7 @@ async fn test_dictionary() {
17411741
expected_null_counts: UInt64Array::from(vec![1, 0]),
17421742
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
17431743
column_name: "string_dict_i8",
1744-
check: Check::RowGroup,
1744+
check: Check::Both,
17451745
}
17461746
.run();
17471747

@@ -1763,7 +1763,7 @@ async fn test_dictionary() {
17631763
expected_null_counts: UInt64Array::from(vec![1, 0]),
17641764
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
17651765
column_name: "int_dict_i8",
1766-
check: Check::RowGroup,
1766+
check: Check::Both,
17671767
}
17681768
.run();
17691769
}
@@ -1915,7 +1915,7 @@ async fn test_period_in_column_names() {
19151915
expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
19161916
expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
19171917
column_name: "name",
1918-
check: Check::RowGroup,
1918+
check: Check::Both,
19191919
}
19201920
.run();
19211921

@@ -1929,7 +1929,7 @@ async fn test_period_in_column_names() {
19291929
expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
19301930
expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
19311931
column_name: "service.name",
1932-
check: Check::RowGroup,
1932+
check: Check::Both,
19331933
}
19341934
.run();
19351935
}
@@ -2041,7 +2041,7 @@ async fn test_missing_statistics() {
20412041
expected_null_counts: UInt64Array::from(vec![None]),
20422042
expected_row_counts: Some(UInt64Array::from(vec![3])), // still has row count statistics
20432043
column_name: "i64",
2044-
check: Check::RowGroup,
2044+
check: Check::Both,
20452045
}
20462046
.run();
20472047
}
@@ -2063,7 +2063,7 @@ async fn test_column_not_found() {
20632063
expected_null_counts: UInt64Array::from(vec![2, 2]),
20642064
expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
20652065
column_name: "not_a_column",
2066-
check: Check::RowGroup,
2066+
check: Check::Both,
20672067
}
20682068
.run_col_not_found();
20692069
}

0 commit comments

Comments
 (0)