Skip to content

Commit 7bd0e74

Browse files
authored
fix: regexp_replace fails when pattern or replacement is a scalar NULL (#11459)
* fix: gexp_replace fails when pattern or replacement is a scalar NULL * chore
1 parent 0965455 commit 7bd0e74

File tree

2 files changed

+32
-9
lines changed

2 files changed

+32
-9
lines changed

datafusion/functions/src/regex/regexpreplace.rs

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -282,22 +282,23 @@ pub fn regexp_replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
282282

283283
fn _regexp_replace_early_abort<T: OffsetSizeTrait>(
284284
input_array: &GenericStringArray<T>,
285+
sz: usize,
285286
) -> Result<ArrayRef> {
286287
// Mimicking the existing behavior of regexp_replace, if any of the scalar arguments
287-
// are actually null, then the result will be an array of the same size but with nulls.
288+
// are actually null, then the result will be an array of the same size as the first argument with all nulls.
288289
//
289290
// Also acts like an early abort mechanism when the input array is empty.
290-
Ok(new_null_array(input_array.data_type(), input_array.len()))
291+
Ok(new_null_array(input_array.data_type(), sz))
291292
}
292293
/// Get the first argument from the given string array.
293294
///
294295
/// Note: If the array is empty or the first argument is null,
295296
/// then calls the given early abort function.
296297
macro_rules! fetch_string_arg {
297-
($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident) => {{
298+
($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{
298299
let array = as_generic_string_array::<T>($ARG)?;
299300
if array.len() == 0 || array.is_null(0) {
300-
return $EARLY_ABORT(array);
301+
return $EARLY_ABORT(array, $ARRAY_SIZE);
301302
} else {
302303
array.value(0)
303304
}
@@ -313,12 +314,24 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
313314
args: &[ArrayRef],
314315
) -> Result<ArrayRef> {
315316
let string_array = as_generic_string_array::<T>(&args[0])?;
316-
let pattern = fetch_string_arg!(&args[1], "pattern", T, _regexp_replace_early_abort);
317-
let replacement =
318-
fetch_string_arg!(&args[2], "replacement", T, _regexp_replace_early_abort);
317+
let array_size = string_array.len();
318+
let pattern = fetch_string_arg!(
319+
&args[1],
320+
"pattern",
321+
T,
322+
_regexp_replace_early_abort,
323+
array_size
324+
);
325+
let replacement = fetch_string_arg!(
326+
&args[2],
327+
"replacement",
328+
T,
329+
_regexp_replace_early_abort,
330+
array_size
331+
);
319332
let flags = match args.len() {
320333
3 => None,
321-
4 => Some(fetch_string_arg!(&args[3], "flags", T, _regexp_replace_early_abort)),
334+
4 => Some(fetch_string_arg!(&args[3], "flags", T, _regexp_replace_early_abort, array_size)),
322335
other => {
323336
return exec_err!(
324337
"regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
@@ -351,7 +364,7 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
351364
let offsets = string_array.value_offsets();
352365
(offsets[string_array.len()] - offsets[0])
353366
.to_usize()
354-
.unwrap()
367+
.expect("Failed to convert usize")
355368
});
356369
let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
357370
new_offsets.append(T::zero());

datafusion/sqllogictest/test_files/regexp.slt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,16 @@ SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'x
309309
----
310310
fooxx
311311

312+
query TTT
313+
select
314+
regexp_replace(col, NULL, 'c'),
315+
regexp_replace(col, 'a', NULL),
316+
regexp_replace(col, 'a', 'c', NULL)
317+
from (values ('a'), ('b')) as tbl(col);
318+
----
319+
NULL NULL NULL
320+
NULL NULL NULL
321+
312322
# multiline string
313323
query B
314324
SELECT 'foo\nbar\nbaz' ~ 'bar';

0 commit comments

Comments
 (0)