From efe648f60bd520e61e7ebcf9ea2fd51772e2ad7e Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Thu, 6 Jun 2024 06:54:26 -0700 Subject: [PATCH] Allow trailing spaces in cast(varchar as date) (#10077) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10077 Presto CAST(varchar AS date) allows leading and trailing spaces. Velox used to allow leading spaces, but not trailing spaces. date(x) function is an alias for CAST. Update the implementation of date(x) to match CAST. from_iso8601_date(x) function is different from CAST. It doesn't allow leading or trailing whitespaces, but allows partial dates, e.g. '2024', '2024-01'. Velox used to allow leading spaces. Here are some examples of CAST and from_iso8601_date: ``` presto> select -> x, try(from_iso8601_date(x)) as from_iso, try(cast(x as date)) as "cast" -> from (values -> '', -> '2024-01-12', -> '2024-01', -> '2024', -> '2024-1-2', -> '2024-1', -> '2024', -> ' 2024-1-3', -> ' 2024-1-3 ', -> '2024-1-3 ' -> ) as t(x) -> ; x | from_iso | cast --------------+------------+------------ | NULL | NULL 2024-01-12 | 2024-01-12 | 2024-01-12 2024-01 | 2024-01-01 | NULL 2024 | 2024-01-01 | NULL 2024-1-2 | 2024-01-02 | 2024-01-02 2024-1 | 2024-01-01 | NULL 2024 | 2024-01-01 | NULL 2024-1-3 | NULL | 2024-01-03 2024-1-3 | NULL | 2024-01-03 2024-1-3 | NULL | 2024-01-03 (10 rows) ``` Fixes https://github.com/facebookincubator/velox/issues/10076 Fixes https://github.com/facebookincubator/velox/issues/10061 Reviewed By: xiaoxmeng, pedroerp Differential Revision: D58217666 fbshipit-source-id: 7e8f3e7076eb0ae8c38314ddeab2e87d20f2ed4b --- velox/expression/tests/CastExprTest.cpp | 16 +++------------- velox/functions/prestosql/DateTimeFunctions.h | 13 +++++++++---- .../prestosql/tests/DateTimeFunctionsTest.cpp | 10 ++++++++-- velox/type/TimestampConversion.cpp | 11 +++++++++-- velox/type/TimestampConversion.h | 3 ++- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/velox/expression/tests/CastExprTest.cpp b/velox/expression/tests/CastExprTest.cpp index 1ec1bed68a0a..1b593f5e90e0 100644 --- a/velox/expression/tests/CastExprTest.cpp +++ b/velox/expression/tests/CastExprTest.cpp @@ -802,9 +802,9 @@ TEST_F(CastExprTest, date) { testCast( "date", {"1970-01-01", - "2020-01-01", - "2135-11-09", - "1969-12-27", + " 2020-01-01", + "2135-11-09 ", + " 1969-12-27 ", "1812-04-15", "1920-01-02", "12345-12-18", @@ -909,16 +909,6 @@ TEST_F(CastExprTest, invalidDate) { {"2015-03-18 (BC)"}, "Unable to parse date value: \"2015-03-18 (BC)\"", VARCHAR()); - testInvalidCast( - "date", - {"1970-01-01 "}, - "Unable to parse date value: \"1970-01-01 \"", - VARCHAR()); - testInvalidCast( - "date", - {" 1970-01-01 "}, - "Unable to parse date value: \" 1970-01-01 \"", - VARCHAR()); } TEST_F(CastExprTest, primitiveInvalidCornerCases) { diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index 0ddc4bbddfd4..b6afed07143a 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -122,10 +122,15 @@ struct DateFunction : public TimestampWithTimezoneSupport { // Do nothing. Session timezone doesn't affect the result. } - FOLLY_ALWAYS_INLINE void call( - out_type& result, - const arg_type& date) { - result = DATE()->toDays(date); + FOLLY_ALWAYS_INLINE Status + call(out_type& result, const arg_type& date) { + auto days = util::castFromDateString(date, util::ParseMode::kStandardCast); + if (days.hasError()) { + return days.error(); + } + + result = days.value(); + return Status::OK(); } FOLLY_ALWAYS_INLINE void call( diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index ddf6379c1b68..62aaadff6219 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3409,9 +3409,10 @@ TEST_F(DateTimeFunctionsTest, fromIso8601Date) { EXPECT_EQ(-31, fromIso("1969-12")); EXPECT_EQ(-31, fromIso("1969-12-1")); EXPECT_EQ(-31, fromIso("1969-12-01")); - EXPECT_EQ(-31, fromIso(" 1969-12-01 ")); EXPECT_EQ(-719862, fromIso("-1-2-1")); + VELOX_ASSERT_THROW(fromIso(" 2024-01-12"), "Unable to parse date value"); + VELOX_ASSERT_THROW(fromIso("2024-01-12 "), "Unable to parse date value"); VELOX_ASSERT_THROW(fromIso("2024-01-xx"), "Unable to parse date value"); VELOX_ASSERT_THROW( fromIso("2024-01-02T12:31:00"), "Unable to parse date value"); @@ -3540,10 +3541,15 @@ TEST_F(DateTimeFunctionsTest, dateFunctionVarchar) { // Date(-18297) is 1919-11-28. EXPECT_EQ(-18297, dateFunction("1919-11-28")); + // Allow leading and trailing spaces. + EXPECT_EQ(18297, dateFunction(" 2020-02-05")); + EXPECT_EQ(18297, dateFunction(" 2020-02-05 ")); + EXPECT_EQ(18297, dateFunction("2020-02-05 ")); + // Illegal date format. VELOX_ASSERT_THROW( dateFunction("2020-02-05 11:00"), - "Unable to parse date value: \"2020-02-05 11:00\", expected format is (YYYY-MM-DD)"); + "Unable to parse date value: \"2020-02-05 11:00\""); } TEST_F(DateTimeFunctionsTest, dateFunctionTimestamp) { diff --git a/velox/type/TimestampConversion.cpp b/velox/type/TimestampConversion.cpp index 9ee92eda247a..df3fd7a0b005 100644 --- a/velox/type/TimestampConversion.cpp +++ b/velox/type/TimestampConversion.cpp @@ -172,7 +172,9 @@ bool tryParseDateString( int32_t year = 0; bool yearneg = false; int sep; - skipSpaces(buf, len, pos); + if (mode != ParseMode::kNonStandardNoTimeCast) { + skipSpaces(buf, len, pos); + } if (pos >= len) { return false; @@ -268,11 +270,16 @@ bool tryParseDateString( return false; } - if (mode == ParseMode::kStandardCast) { + if (mode == ParseMode::kStandardCast || + mode == ParseMode::kNonStandardNoTimeCast) { if (!daysSinceEpochFromDate(year, month, day, daysSinceEpoch).ok()) { return false; } + if (mode == ParseMode::kStandardCast) { + skipSpaces(buf, len, pos); + } + if (pos == len) { return validDate(daysSinceEpoch); } diff --git a/velox/type/TimestampConversion.h b/velox/type/TimestampConversion.h index 5aa62518e10e..5df8c7be0381 100644 --- a/velox/type/TimestampConversion.h +++ b/velox/type/TimestampConversion.h @@ -68,7 +68,8 @@ enum class ParseMode { // `[+-][Y]Y*-[M]M*-[D]DT*` kNonStandardCast, - // Like kNonStandardCast but does not permit inclusion of timestamp. + // ISO-8601 format. No leading or trailing spaces allowed. + // // Supported formats: // `[+-][Y]Y*` // `[+-][Y]Y*-[M]M`