Skip to content

Commit 88aab46

Browse files
committed
Fix cargo check
1 parent a78d959 commit 88aab46

File tree

5 files changed

+97
-124
lines changed

5 files changed

+97
-124
lines changed

datafusion/functions/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ math_expressions = []
5454
# enable regular expressions
5555
regex_expressions = ["regex"]
5656
# enable string functions
57-
string_expressions = ["uuid"]
57+
string_expressions = ["regex", "uuid"]
5858
# enable unicode functions
5959
unicode_expressions = ["hashbrown", "unicode-segmentation"]
6060

datafusion/functions/src/regex/common.rs

Lines changed: 0 additions & 119 deletions
This file was deleted.

datafusion/functions/src/regex/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
//! "regex" DataFusion functions
1919
20-
pub mod common;
2120
pub mod regexplike;
2221
pub mod regexpmatch;
2322
pub mod regexpreplace;

datafusion/functions/src/string/common.rs

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,23 @@
1717

1818
//! Common utilities for implementing string functions
1919
20+
use std::collections::HashMap;
2021
use std::fmt::{Display, Formatter};
2122
use std::sync::Arc;
2223

2324
use arrow::array::{
2425
new_null_array, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ArrayRef,
25-
GenericStringArray, GenericStringBuilder, OffsetSizeTrait, StringArray,
26+
BooleanArray, GenericStringArray, GenericStringBuilder, OffsetSizeTrait, StringArray,
2627
StringBuilder, StringViewArray,
2728
};
2829
use arrow::buffer::{Buffer, MutableBuffer, NullBuffer};
2930
use arrow::datatypes::DataType;
31+
use arrow_buffer::BooleanBufferBuilder;
3032
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
31-
use datafusion_common::Result;
3233
use datafusion_common::{exec_err, ScalarValue};
34+
use datafusion_common::{DataFusionError, Result};
3335
use datafusion_expr::ColumnarValue;
36+
use regex::Regex;
3437

3538
pub(crate) enum TrimType {
3639
Left,
@@ -478,3 +481,93 @@ where
478481
GenericStringArray::<O>::new_unchecked(offsets, values, nulls)
479482
}))
480483
}
484+
485+
/// Perform SQL `array ~ regex_array` operation on
486+
/// [`StringArray`] / [`LargeStringArray`] / [`StringViewArray`].
487+
/// If `regex_array` element has an empty value, the corresponding result value is always true.
488+
///
489+
/// `flags_array` are optional [`StringArray`] / [`LargeStringArray`] / [`StringViewArray`] flag,
490+
/// which allow special search modes, such as case-insensitive and multi-line mode.
491+
/// See the documentation [here](https://docs.rs/regex/1.5.4/regex/#grouping-and-flags)
492+
/// for more information.
493+
///
494+
/// It is inspired / copied from `regexp_is_match_utf8` [arrow-rs].
495+
///
496+
/// [arrow-rs]: https://github.com/apache/arrow-rs/blob/8c956a9f9ab26c14072740cce64c2b99cb039b13/arrow-string/src/regexp.rs#L31-L37
497+
pub fn regexp_is_match<'a, ArrayType1, ArrayType2, ArrayType3>(
498+
array: &'a ArrayType1,
499+
regex_array: &'a ArrayType2,
500+
flags_array: Option<&'a ArrayType3>,
501+
) -> datafusion_common::Result<BooleanArray, DataFusionError>
502+
where
503+
&'a ArrayType1: StringArrayType<'a>,
504+
&'a ArrayType2: StringArrayType<'a>,
505+
&'a ArrayType3: StringArrayType<'a>,
506+
{
507+
if array.len() != regex_array.len() {
508+
return Err(DataFusionError::Execution(
509+
"Cannot perform comparison operation on arrays of different length"
510+
.to_string(),
511+
));
512+
}
513+
514+
let nulls = NullBuffer::union(array.nulls(), regex_array.nulls());
515+
516+
let mut patterns: HashMap<String, Regex> = HashMap::new();
517+
let mut result = BooleanBufferBuilder::new(array.len());
518+
519+
let complete_pattern = match flags_array {
520+
Some(flags) => Box::new(regex_array.iter().zip(flags.iter()).map(
521+
|(pattern, flags)| {
522+
pattern.map(|pattern| match flags {
523+
Some(flag) => format!("(?{flag}){pattern}"),
524+
None => pattern.to_string(),
525+
})
526+
},
527+
)) as Box<dyn Iterator<Item = Option<String>>>,
528+
None => Box::new(
529+
regex_array
530+
.iter()
531+
.map(|pattern| pattern.map(|pattern| pattern.to_string())),
532+
),
533+
};
534+
535+
array
536+
.iter()
537+
.zip(complete_pattern)
538+
.map(|(value, pattern)| {
539+
match (value, pattern) {
540+
(Some(_), Some(pattern)) if pattern == *"" => {
541+
result.append(true);
542+
}
543+
(Some(value), Some(pattern)) => {
544+
let existing_pattern = patterns.get(&pattern);
545+
let re = match existing_pattern {
546+
Some(re) => re,
547+
None => {
548+
let re = Regex::new(pattern.as_str()).map_err(|e| {
549+
DataFusionError::Execution(format!(
550+
"Regular expression did not compile: {e:?}"
551+
))
552+
})?;
553+
patterns.entry(pattern).or_insert(re)
554+
}
555+
};
556+
result.append(re.is_match(value));
557+
}
558+
_ => result.append(false),
559+
}
560+
Ok(())
561+
})
562+
.collect::<datafusion_common::Result<Vec<()>, DataFusionError>>()?;
563+
564+
let data = unsafe {
565+
ArrayDataBuilder::new(DataType::Boolean)
566+
.len(array.len())
567+
.buffers(vec![result.into()])
568+
.nulls(nulls)
569+
.build_unchecked()
570+
};
571+
572+
Ok(BooleanArray::from(data))
573+
}

datafusion/functions/src/string/contains.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::regex::common::regexp_is_match;
18+
use crate::string::common::regexp_is_match;
1919
use crate::utils::make_scalar_function;
2020

2121
use arrow::array::{Array, ArrayRef, AsArray, GenericStringArray, StringViewArray};

0 commit comments

Comments
 (0)