@@ -93,21 +93,10 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
93
93
fn equal_to ( & self , lhs_row : usize , array : & ArrayRef , rhs_row : usize ) -> bool {
94
94
// Perf: skip null check (by short circuit) if input is not nullable
95
95
if NULLABLE {
96
- // In nullable path, we should check if both `exist row` and `input row`
97
- // are null/not null
98
- let is_exist_null = self . nulls . is_null ( lhs_row) ;
99
- let null_match = is_exist_null == array. is_null ( rhs_row) ;
100
- if !null_match {
101
- // If `is_null`s in `exist row` and `input row` don't match, return not equal to
102
- return false ;
103
- } else if is_exist_null {
104
- // If `is_null`s in `exist row` and `input row` match, and they are `null`s,
105
- // return equal to
106
- //
107
- // NOTICE: we should not check their values when they are `null`s, because they are
108
- // meaningless actually, and not ensured to be same
109
- //
110
- return true ;
96
+ let exist_null = self . nulls . is_null ( lhs_row) ;
97
+ let input_null = array. is_null ( rhs_row) ;
98
+ if let Some ( result) = nulls_equal_to ( exist_null, input_null) {
99
+ return result;
111
100
}
112
101
// Otherwise, we need to check their values
113
102
}
@@ -224,9 +213,14 @@ where
224
213
where
225
214
B : ByteArrayType ,
226
215
{
227
- let arr = array. as_bytes :: < B > ( ) ;
228
- self . nulls . is_null ( lhs_row) == arr. is_null ( rhs_row)
229
- && self . value ( lhs_row) == ( arr. value ( rhs_row) . as_ref ( ) as & [ u8 ] )
216
+ let array = array. as_bytes :: < B > ( ) ;
217
+ let exist_null = self . nulls . is_null ( lhs_row) ;
218
+ let input_null = array. is_null ( rhs_row) ;
219
+ if let Some ( result) = nulls_equal_to ( exist_null, input_null) {
220
+ return result;
221
+ }
222
+ // Otherwise, we need to check their values
223
+ self . value ( lhs_row) == ( array. value ( rhs_row) . as_ref ( ) as & [ u8 ] )
230
224
}
231
225
232
226
/// return the current value of the specified row irrespective of null
@@ -382,6 +376,20 @@ where
382
376
}
383
377
}
384
378
379
+ /// Determines if the nullability of the existing and new input array can be used
380
+ /// to short-circuit the comparison of the two values.
381
+ ///
382
+ /// Returns `Some(result)` if the result of the comparison can be determined
383
+ /// from the nullness of the two values, and `None` if the comparison must be
384
+ /// done on the values themselves.
385
+ fn nulls_equal_to ( lhs_null : bool , rhs_null : bool ) -> Option < bool > {
386
+ match ( lhs_null, rhs_null) {
387
+ ( true , true ) => Some ( true ) ,
388
+ ( false , true ) | ( true , false ) => Some ( false ) ,
389
+ _ => None ,
390
+ }
391
+ }
392
+
385
393
#[ cfg( test) ]
386
394
mod tests {
387
395
use std:: sync:: Arc ;
@@ -468,13 +476,14 @@ mod tests {
468
476
builder. append_val ( & builder_array, 5 ) ;
469
477
470
478
// Define input array
471
- let ( _ , values, _) =
479
+ let ( _nulls , values, _) =
472
480
Int64Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , None , None , Some ( 1 ) , Some ( 3 ) ] )
473
481
. into_parts ( ) ;
474
482
483
+ // explicitly build a boolean buffer where one of the null values also happens to match
475
484
let mut boolean_buffer_builder = BooleanBufferBuilder :: new ( 6 ) ;
476
485
boolean_buffer_builder. append ( true ) ;
477
- boolean_buffer_builder. append ( false ) ;
486
+ boolean_buffer_builder. append ( false ) ; // this sets Some(2) to null above
478
487
boolean_buffer_builder. append ( false ) ;
479
488
boolean_buffer_builder. append ( false ) ;
480
489
boolean_buffer_builder. append ( true ) ;
@@ -511,4 +520,63 @@ mod tests {
511
520
assert ! ( builder. equal_to( 0 , & input_array, 0 ) ) ;
512
521
assert ! ( !builder. equal_to( 1 , & input_array, 1 ) ) ;
513
522
}
523
+
524
+ #[ test]
525
+ fn test_byte_array_equal_to ( ) {
526
+ // Will cover such cases:
527
+ // - exist null, input not null
528
+ // - exist null, input null; values not equal
529
+ // - exist null, input null; values equal
530
+ // - exist not null, input null
531
+ // - exist not null, input not null; values not equal
532
+ // - exist not null, input not null; values equal
533
+
534
+ // Define PrimitiveGroupValueBuilder
535
+ let mut builder = ByteGroupValueBuilder :: < i32 > :: new ( OutputType :: Utf8 ) ;
536
+ let builder_array = Arc :: new ( StringArray :: from ( vec ! [
537
+ None ,
538
+ None ,
539
+ None ,
540
+ Some ( "foo" ) ,
541
+ Some ( "bar" ) ,
542
+ Some ( "baz" ) ,
543
+ ] ) ) as ArrayRef ;
544
+ builder. append_val ( & builder_array, 0 ) ;
545
+ builder. append_val ( & builder_array, 1 ) ;
546
+ builder. append_val ( & builder_array, 2 ) ;
547
+ builder. append_val ( & builder_array, 3 ) ;
548
+ builder. append_val ( & builder_array, 4 ) ;
549
+ builder. append_val ( & builder_array, 5 ) ;
550
+
551
+ // Define input array
552
+ let ( offsets, buffer, _nulls) = StringArray :: from ( vec ! [
553
+ Some ( "foo" ) ,
554
+ Some ( "bar" ) ,
555
+ None ,
556
+ None ,
557
+ Some ( "foo" ) ,
558
+ Some ( "baz" ) ,
559
+ ] )
560
+ . into_parts ( ) ;
561
+
562
+ // explicitly build a boolean buffer where one of the null values also happens to match
563
+ let mut boolean_buffer_builder = BooleanBufferBuilder :: new ( 6 ) ;
564
+ boolean_buffer_builder. append ( true ) ;
565
+ boolean_buffer_builder. append ( false ) ; // this sets Some("bar") to null above
566
+ boolean_buffer_builder. append ( false ) ;
567
+ boolean_buffer_builder. append ( false ) ;
568
+ boolean_buffer_builder. append ( true ) ;
569
+ boolean_buffer_builder. append ( true ) ;
570
+ let nulls = NullBuffer :: new ( boolean_buffer_builder. finish ( ) ) ;
571
+ let input_array =
572
+ Arc :: new ( StringArray :: new ( offsets, buffer, Some ( nulls) ) ) as ArrayRef ;
573
+
574
+ // Check
575
+ assert ! ( !builder. equal_to( 0 , & input_array, 0 ) ) ;
576
+ assert ! ( builder. equal_to( 1 , & input_array, 1 ) ) ;
577
+ assert ! ( builder. equal_to( 2 , & input_array, 2 ) ) ;
578
+ assert ! ( !builder. equal_to( 3 , & input_array, 3 ) ) ;
579
+ assert ! ( !builder. equal_to( 4 , & input_array, 4 ) ) ;
580
+ assert ! ( builder. equal_to( 5 , & input_array, 5 ) ) ;
581
+ }
514
582
}
0 commit comments