@@ -30,7 +30,6 @@ use datafusion_expr::{
30
30
} ;
31
31
use itertools:: izip;
32
32
use regex:: Regex ;
33
- use std:: collections:: hash_map:: Entry ;
34
33
use std:: collections:: HashMap ;
35
34
use std:: sync:: { Arc , OnceLock } ;
36
35
@@ -312,12 +311,12 @@ where
312
311
313
312
let pattern = compile_regex ( regex, flags_scalar) ?;
314
313
315
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
314
+ Ok ( Arc :: new (
316
315
values
317
316
. iter ( )
318
317
. map ( |value| count_matches ( value, & pattern, start_scalar) )
319
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
320
- ) ) )
318
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
319
+ ) )
321
320
}
322
321
( true , true , false ) => {
323
322
let regex = match regex_scalar {
@@ -336,17 +335,17 @@ where
336
335
) ) ) ;
337
336
}
338
337
339
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
338
+ Ok ( Arc :: new (
340
339
values
341
340
. iter ( )
342
341
. zip ( flags_array. iter ( ) )
343
342
. map ( |( value, flags) | {
344
343
let pattern =
345
344
compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
346
- count_matches ( value, & pattern, start_scalar)
345
+ count_matches ( value, pattern, start_scalar)
347
346
} )
348
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
349
- ) ) )
347
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
348
+ ) )
350
349
}
351
350
( true , false , true ) => {
352
351
let regex = match regex_scalar {
@@ -360,13 +359,13 @@ where
360
359
361
360
let start_array = start_array. unwrap ( ) ;
362
361
363
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
362
+ Ok ( Arc :: new (
364
363
values
365
364
. iter ( )
366
365
. zip ( start_array. iter ( ) )
367
366
. map ( |( value, start) | count_matches ( value, & pattern, start) )
368
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
369
- ) ) )
367
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
368
+ ) )
370
369
}
371
370
( true , false , false ) => {
372
371
let regex = match regex_scalar {
@@ -385,7 +384,7 @@ where
385
384
) ) ) ;
386
385
}
387
386
388
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
387
+ Ok ( Arc :: new (
389
388
izip ! (
390
389
values. iter( ) ,
391
390
start_array. unwrap( ) . iter( ) ,
@@ -395,10 +394,10 @@ where
395
394
let pattern =
396
395
compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
397
396
398
- count_matches ( value, & pattern, start)
397
+ count_matches ( value, pattern, start)
399
398
} )
400
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
401
- ) ) )
399
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
400
+ ) )
402
401
}
403
402
( false , true , true ) => {
404
403
if values. len ( ) != regex_array. len ( ) {
@@ -409,7 +408,7 @@ where
409
408
) ) ) ;
410
409
}
411
410
412
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
411
+ Ok ( Arc :: new (
413
412
values
414
413
. iter ( )
415
414
. zip ( regex_array. iter ( ) )
@@ -424,10 +423,10 @@ where
424
423
flags_scalar,
425
424
& mut regex_cache,
426
425
) ?;
427
- count_matches ( value, & pattern, start_scalar)
426
+ count_matches ( value, pattern, start_scalar)
428
427
} )
429
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
430
- ) ) )
428
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
429
+ ) )
431
430
}
432
431
( false , true , false ) => {
433
432
if values. len ( ) != regex_array. len ( ) {
@@ -447,7 +446,7 @@ where
447
446
) ) ) ;
448
447
}
449
448
450
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
449
+ Ok ( Arc :: new (
451
450
izip ! ( values. iter( ) , regex_array. iter( ) , flags_array. iter( ) )
452
451
. map ( |( value, regex, flags) | {
453
452
let regex = match regex {
@@ -458,10 +457,10 @@ where
458
457
let pattern =
459
458
compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
460
459
461
- count_matches ( value, & pattern, start_scalar)
460
+ count_matches ( value, pattern, start_scalar)
462
461
} )
463
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
464
- ) ) )
462
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
463
+ ) )
465
464
}
466
465
( false , false , true ) => {
467
466
if values. len ( ) != regex_array. len ( ) {
@@ -481,7 +480,7 @@ where
481
480
) ) ) ;
482
481
}
483
482
484
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
483
+ Ok ( Arc :: new (
485
484
izip ! ( values. iter( ) , regex_array. iter( ) , start_array. iter( ) )
486
485
. map ( |( value, regex, start) | {
487
486
let regex = match regex {
@@ -494,10 +493,10 @@ where
494
493
flags_scalar,
495
494
& mut regex_cache,
496
495
) ?;
497
- count_matches ( value, & pattern, start)
496
+ count_matches ( value, pattern, start)
498
497
} )
499
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
500
- ) ) )
498
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
499
+ ) )
501
500
}
502
501
( false , false , false ) => {
503
502
if values. len ( ) != regex_array. len ( ) {
@@ -526,7 +525,7 @@ where
526
525
) ) ) ;
527
526
}
528
527
529
- Ok ( Arc :: new ( Int64Array :: from_iter_values (
528
+ Ok ( Arc :: new (
530
529
izip ! (
531
530
values. iter( ) ,
532
531
regex_array. iter( ) ,
@@ -541,27 +540,24 @@ where
541
540
542
541
let pattern =
543
542
compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
544
- count_matches ( value, & pattern, start)
543
+ count_matches ( value, pattern, start)
545
544
} )
546
- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
547
- ) ) )
545
+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
546
+ ) )
548
547
}
549
548
}
550
549
}
551
550
552
- fn compile_and_cache_regex (
553
- regex : & str ,
554
- flags : Option < & str > ,
555
- regex_cache : & mut HashMap < String , Regex > ,
556
- ) -> Result < Regex , ArrowError > {
557
- match regex_cache. entry ( regex. to_string ( ) ) {
558
- Entry :: Vacant ( entry) => {
559
- let compiled = compile_regex ( regex, flags) ?;
560
- entry. insert ( compiled. clone ( ) ) ;
561
- Ok ( compiled)
562
- }
563
- Entry :: Occupied ( entry) => Ok ( entry. get ( ) . to_owned ( ) ) ,
551
+ fn compile_and_cache_regex < ' a > (
552
+ regex : & ' a str ,
553
+ flags : Option < & ' a str > ,
554
+ regex_cache : & ' a mut HashMap < String , Regex > ,
555
+ ) -> Result < & ' a Regex , ArrowError > {
556
+ if !regex_cache. contains_key ( regex) {
557
+ let compiled = compile_regex ( regex, flags) ?;
558
+ regex_cache. insert ( regex. to_string ( ) , compiled) ;
564
559
}
560
+ Ok ( regex_cache. get ( regex) . unwrap ( ) )
565
561
}
566
562
567
563
fn compile_regex ( regex : & str , flags : Option < & str > ) -> Result < Regex , ArrowError > {
0 commit comments