From 696ac49aeaedfd4346051c725222aa85cd2b3c81 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Sat, 24 Aug 2024 23:02:59 +0200 Subject: [PATCH] Check for overflow in substring with negative start (#12141) --- datafusion/functions/src/unicode/substr.rs | 47 +++++++++++++++++----- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 9fd8c75eab23..db218f9127ae 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -25,7 +25,7 @@ use arrow::array::{ use arrow::datatypes::DataType; use datafusion_common::cast::as_int64_array; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{exec_datafusion_err, exec_err, Result}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; @@ -144,19 +144,23 @@ where let result = iter .zip(start_array.iter()) .zip(count_array.iter()) - .map(|((string, start), count)| match (string, start, count) { - (Some(string), Some(start), Some(count)) => { - if count < 0 { - exec_err!( + .map(|((string, start), count)| { + match (string, start, count) { + (Some(string), Some(start), Some(count)) => { + if count < 0 { + exec_err!( "negative substring length not allowed: substr(, {start}, {count})" ) - } else { - let skip = max(0, start - 1); - let count = max(0, count + (if start < 1 {start - 1} else {0})); - Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::())) + } else { + let skip = max(0, start.checked_sub(1).ok_or_else( + || exec_datafusion_err!("negative overflow when calculating skip value") + )?); + let count = max(0, count + (if start < 1 { start - 1 } else { 0 })); + Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::())) + } } + _ => Ok(None), } - _ => Ok(None), }) .collect::>>()?; @@ -482,6 +486,29 @@ mod tests { Utf8, StringArray ); + test_function!( + SubstrFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::from("abc")), + ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)), + ], + Ok(Some("abc")), + &str, + Utf8, + StringArray + ); + test_function!( + SubstrFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::from("overflow")), + ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)), + ColumnarValue::Scalar(ScalarValue::from(1i64)), + ], + exec_err!("negative overflow when calculating skip value"), + &str, + Utf8, + StringArray + ); Ok(()) }