From eb4be68a823c02533e9b58e0534ac99437b07718 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 15 Feb 2024 00:47:59 -0800 Subject: [PATCH] Return null for overflow when casting string to integer under safe option enabled (#5398) * Return null for overflow when casting string to integer * Use atoi_simd * Use atoi * Return to str.parse. * Revert "Return to str.parse." This reverts commit 53dd0479a5221e6bc7b6447389abdf712a1819ad. * Check trailing string --- arrow-cast/Cargo.toml | 1 + arrow-cast/src/cast.rs | 19 +++++++++++++++++++ arrow-cast/src/parse.rs | 7 ++++++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml index 19b857297d14..81dd0ebd415f 100644 --- a/arrow-cast/Cargo.toml +++ b/arrow-cast/Cargo.toml @@ -49,6 +49,7 @@ chrono = { workspace = true } half = { version = "2.1", default-features = false } num = { version = "0.4", default-features = false, features = ["std"] } lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] } +atoi = "2.0.0" comfy-table = { version = "7.0", optional = true, default-features = false } base64 = "0.21" diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index a813c5f6c87e..7868946532c4 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -4911,6 +4911,25 @@ mod tests { assert!(c.is_null(2)); } + #[test] + fn test_cast_string_to_integral_overflow() { + let str = Arc::new(StringArray::from(vec![ + Some("123"), + Some("-123"), + Some("86374"), + None, + ])) as ArrayRef; + + let options = CastOptions { + safe: true, + format_options: FormatOptions::default(), + }; + let res = cast_with_options(&str, &DataType::Int16, &options).expect("should cast to i16"); + let expected = + Arc::new(Int16Array::from(vec![Some(123), Some(-123), None, None])) as ArrayRef; + assert_eq!(&res, &expected); + } + #[test] fn test_cast_string_to_timestamp() { let a1 = Arc::new(StringArray::from(vec![ diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index 50e9fda672f6..72942af8394a 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -438,7 +438,12 @@ macro_rules! parser_primitive { ($t:ty) => { impl Parser for $t { fn parse(string: &str) -> Option { - lexical_core::parse::(string.as_bytes()).ok() + match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked( + string.as_bytes(), + ) { + (Some(n), x) if x == string.len() => Some(n), + _ => None, + } } } };