@@ -354,3 +354,183 @@ fn set_min_if_lesser(
354
354
_ => { }
355
355
}
356
356
}
357
+
358
+ #[ cfg( test) ]
359
+ mod tests {
360
+ use super :: * ;
361
+ use crate :: arrow:: datatypes:: { DataType , Field , Schema } ;
362
+ use datafusion_common:: ScalarValue ;
363
+ use std:: sync:: Arc ;
364
+
365
+ #[ test]
366
+ fn test_compute_summary_statistics_basic ( ) {
367
+ // Create a schema with two columns
368
+ let schema = Arc :: new ( Schema :: new ( vec ! [
369
+ Field :: new( "col1" , DataType :: Int32 , false ) ,
370
+ Field :: new( "col2" , DataType :: Int32 , false ) ,
371
+ ] ) ) ;
372
+
373
+ // Create items with statistics
374
+ let stats1 = Statistics {
375
+ num_rows : Precision :: Exact ( 10 ) ,
376
+ total_byte_size : Precision :: Exact ( 100 ) ,
377
+ column_statistics : vec ! [
378
+ ColumnStatistics {
379
+ null_count: Precision :: Exact ( 1 ) ,
380
+ max_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
381
+ min_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
382
+ sum_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 500 ) ) ) ,
383
+ distinct_count: Precision :: Absent ,
384
+ } ,
385
+ ColumnStatistics {
386
+ null_count: Precision :: Exact ( 2 ) ,
387
+ max_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 200 ) ) ) ,
388
+ min_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 10 ) ) ) ,
389
+ sum_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1000 ) ) ) ,
390
+ distinct_count: Precision :: Absent ,
391
+ } ,
392
+ ] ,
393
+ } ;
394
+
395
+ let stats2 = Statistics {
396
+ num_rows : Precision :: Exact ( 15 ) ,
397
+ total_byte_size : Precision :: Exact ( 150 ) ,
398
+ column_statistics : vec ! [
399
+ ColumnStatistics {
400
+ null_count: Precision :: Exact ( 2 ) ,
401
+ max_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 120 ) ) ) ,
402
+ min_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( -10 ) ) ) ,
403
+ sum_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 600 ) ) ) ,
404
+ distinct_count: Precision :: Absent ,
405
+ } ,
406
+ ColumnStatistics {
407
+ null_count: Precision :: Exact ( 3 ) ,
408
+ max_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 180 ) ) ) ,
409
+ min_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 5 ) ) ) ,
410
+ sum_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1200 ) ) ) ,
411
+ distinct_count: Precision :: Absent ,
412
+ } ,
413
+ ] ,
414
+ } ;
415
+
416
+ let items = vec ! [ Arc :: new( stats1) , Arc :: new( stats2) ] ;
417
+
418
+ // Call compute_summary_statistics
419
+ let summary_stats =
420
+ compute_summary_statistics ( items, & schema, |item| Some ( item. as_ref ( ) ) ) ;
421
+
422
+ // Verify the results
423
+ assert_eq ! ( summary_stats. num_rows, Precision :: Exact ( 25 ) ) ; // 10 + 15
424
+ assert_eq ! ( summary_stats. total_byte_size, Precision :: Exact ( 250 ) ) ; // 100 + 150
425
+
426
+ // Verify column statistics
427
+ let col1_stats = & summary_stats. column_statistics [ 0 ] ;
428
+ assert_eq ! ( col1_stats. null_count, Precision :: Exact ( 3 ) ) ; // 1 + 2
429
+ assert_eq ! (
430
+ col1_stats. max_value,
431
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 120 ) ) )
432
+ ) ;
433
+ assert_eq ! (
434
+ col1_stats. min_value,
435
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( -10 ) ) )
436
+ ) ;
437
+ assert_eq ! (
438
+ col1_stats. sum_value,
439
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1100 ) ) )
440
+ ) ; // 500 + 600
441
+
442
+ let col2_stats = & summary_stats. column_statistics [ 1 ] ;
443
+ assert_eq ! ( col2_stats. null_count, Precision :: Exact ( 5 ) ) ; // 2 + 3
444
+ assert_eq ! (
445
+ col2_stats. max_value,
446
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 200 ) ) )
447
+ ) ;
448
+ assert_eq ! (
449
+ col2_stats. min_value,
450
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 5 ) ) )
451
+ ) ;
452
+ assert_eq ! (
453
+ col2_stats. sum_value,
454
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 2200 ) ) )
455
+ ) ; // 1000 + 1200
456
+ }
457
+
458
+ #[ test]
459
+ fn test_compute_summary_statistics_mixed_precision ( ) {
460
+ // Create a schema with one column
461
+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
462
+ "col1" ,
463
+ DataType :: Int32 ,
464
+ false ,
465
+ ) ] ) ) ;
466
+
467
+ // Create items with different precision levels
468
+ let stats1 = Statistics {
469
+ num_rows : Precision :: Exact ( 10 ) ,
470
+ total_byte_size : Precision :: Inexact ( 100 ) ,
471
+ column_statistics : vec ! [ ColumnStatistics {
472
+ null_count: Precision :: Exact ( 1 ) ,
473
+ max_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
474
+ min_value: Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
475
+ sum_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( 500 ) ) ) ,
476
+ distinct_count: Precision :: Absent ,
477
+ } ] ,
478
+ } ;
479
+
480
+ let stats2 = Statistics {
481
+ num_rows : Precision :: Inexact ( 15 ) ,
482
+ total_byte_size : Precision :: Exact ( 150 ) ,
483
+ column_statistics : vec ! [ ColumnStatistics {
484
+ null_count: Precision :: Inexact ( 2 ) ,
485
+ max_value: Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 120 ) ) ) ,
486
+ min_value: Precision :: Exact ( ScalarValue :: Int32 ( Some ( -10 ) ) ) ,
487
+ sum_value: Precision :: Absent ,
488
+ distinct_count: Precision :: Absent ,
489
+ } ] ,
490
+ } ;
491
+
492
+ let items = vec ! [ Arc :: new( stats1) , Arc :: new( stats2) ] ;
493
+
494
+ let summary_stats =
495
+ compute_summary_statistics ( items, & schema, |item| Some ( item. as_ref ( ) ) ) ;
496
+
497
+ assert_eq ! ( summary_stats. num_rows, Precision :: Inexact ( 25 ) ) ;
498
+ assert_eq ! ( summary_stats. total_byte_size, Precision :: Inexact ( 250 ) ) ;
499
+
500
+ let col_stats = & summary_stats. column_statistics [ 0 ] ;
501
+ assert_eq ! ( col_stats. null_count, Precision :: Inexact ( 3 ) ) ;
502
+ assert_eq ! (
503
+ col_stats. max_value,
504
+ Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 120 ) ) )
505
+ ) ;
506
+ assert_eq ! (
507
+ col_stats. min_value,
508
+ Precision :: Inexact ( ScalarValue :: Int32 ( Some ( -10 ) ) )
509
+ ) ;
510
+ assert ! ( matches!( col_stats. sum_value, Precision :: Absent ) ) ;
511
+ }
512
+
513
+ #[ test]
514
+ fn test_compute_summary_statistics_empty ( ) {
515
+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
516
+ "col1" ,
517
+ DataType :: Int32 ,
518
+ false ,
519
+ ) ] ) ) ;
520
+
521
+ // Empty collection
522
+ let items: Vec < Arc < Statistics > > = vec ! [ ] ;
523
+
524
+ let summary_stats =
525
+ compute_summary_statistics ( items, & schema, |item| Some ( item. as_ref ( ) ) ) ;
526
+
527
+ // Verify default values for empty collection
528
+ assert_eq ! ( summary_stats. num_rows, Precision :: Absent ) ;
529
+ assert_eq ! ( summary_stats. total_byte_size, Precision :: Absent ) ;
530
+ assert_eq ! ( summary_stats. column_statistics. len( ) , 1 ) ;
531
+ assert_eq ! (
532
+ summary_stats. column_statistics[ 0 ] . null_count,
533
+ Precision :: Absent
534
+ ) ;
535
+ }
536
+ }
0 commit comments