Skip to content

Commit

Permalink
Allow trailing spaces in cast(varchar as date) (facebookincubator#10077)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#10077

Presto CAST(varchar AS date) allows leading and trailing spaces. Velox used to
allow leading spaces, but not trailing spaces.

date(x) function is an alias for CAST. Update the implementation of date(x) to
match CAST.

from_iso8601_date(x) function is different from CAST. It doesn't allow leading
or trailing whitespaces, but allows partial dates, e.g. '2024', '2024-01'. Velox
used to allow leading spaces.

Here are some examples of CAST and from_iso8601_date:

```
presto> select
        ->   x, try(from_iso8601_date(x)) as from_iso, try(cast(x as date)) as "cast"
        -> from (values
        ->   '',
        ->   '2024-01-12',
        ->   '2024-01',
        ->   '2024',
        ->   '2024-1-2',
        ->   '2024-1',
        ->   '2024',
        ->   ' 2024-1-3',
        ->   '  2024-1-3  ',
        ->   '2024-1-3   '
        -> ) as t(x)
        -> ;
      x       |  from_iso  |    cast
--------------+------------+------------
              | NULL       | NULL
 2024-01-12   | 2024-01-12 | 2024-01-12
 2024-01      | 2024-01-01 | NULL
 2024         | 2024-01-01 | NULL
 2024-1-2     | 2024-01-02 | 2024-01-02
 2024-1       | 2024-01-01 | NULL
 2024         | 2024-01-01 | NULL
  2024-1-3    | NULL       | 2024-01-03
   2024-1-3   | NULL       | 2024-01-03
 2024-1-3     | NULL       | 2024-01-03
(10 rows)
```

Fixes facebookincubator#10076

Fixes facebookincubator#10061

Reviewed By: xiaoxmeng, pedroerp

Differential Revision: D58217666

fbshipit-source-id: 7e8f3e7076eb0ae8c38314ddeab2e87d20f2ed4b
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Jun 6, 2024
1 parent 7637d56 commit efe648f
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 22 deletions.
16 changes: 3 additions & 13 deletions velox/expression/tests/CastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -802,9 +802,9 @@ TEST_F(CastExprTest, date) {
testCast<std::string, int32_t>(
"date",
{"1970-01-01",
"2020-01-01",
"2135-11-09",
"1969-12-27",
" 2020-01-01",
"2135-11-09 ",
" 1969-12-27 ",
"1812-04-15",
"1920-01-02",
"12345-12-18",
Expand Down Expand Up @@ -909,16 +909,6 @@ TEST_F(CastExprTest, invalidDate) {
{"2015-03-18 (BC)"},
"Unable to parse date value: \"2015-03-18 (BC)\"",
VARCHAR());
testInvalidCast<std::string>(
"date",
{"1970-01-01 "},
"Unable to parse date value: \"1970-01-01 \"",
VARCHAR());
testInvalidCast<std::string>(
"date",
{" 1970-01-01 "},
"Unable to parse date value: \" 1970-01-01 \"",
VARCHAR());
}

TEST_F(CastExprTest, primitiveInvalidCornerCases) {
Expand Down
13 changes: 9 additions & 4 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,15 @@ struct DateFunction : public TimestampWithTimezoneSupport<T> {
// Do nothing. Session timezone doesn't affect the result.
}

FOLLY_ALWAYS_INLINE void call(
out_type<Date>& result,
const arg_type<Varchar>& date) {
result = DATE()->toDays(date);
FOLLY_ALWAYS_INLINE Status
call(out_type<Date>& result, const arg_type<Varchar>& date) {
auto days = util::castFromDateString(date, util::ParseMode::kStandardCast);
if (days.hasError()) {
return days.error();
}

result = days.value();
return Status::OK();
}

FOLLY_ALWAYS_INLINE void call(
Expand Down
10 changes: 8 additions & 2 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3409,9 +3409,10 @@ TEST_F(DateTimeFunctionsTest, fromIso8601Date) {
EXPECT_EQ(-31, fromIso("1969-12"));
EXPECT_EQ(-31, fromIso("1969-12-1"));
EXPECT_EQ(-31, fromIso("1969-12-01"));
EXPECT_EQ(-31, fromIso(" 1969-12-01 "));
EXPECT_EQ(-719862, fromIso("-1-2-1"));

VELOX_ASSERT_THROW(fromIso(" 2024-01-12"), "Unable to parse date value");
VELOX_ASSERT_THROW(fromIso("2024-01-12 "), "Unable to parse date value");
VELOX_ASSERT_THROW(fromIso("2024-01-xx"), "Unable to parse date value");
VELOX_ASSERT_THROW(
fromIso("2024-01-02T12:31:00"), "Unable to parse date value");
Expand Down Expand Up @@ -3540,10 +3541,15 @@ TEST_F(DateTimeFunctionsTest, dateFunctionVarchar) {
// Date(-18297) is 1919-11-28.
EXPECT_EQ(-18297, dateFunction("1919-11-28"));

// Allow leading and trailing spaces.
EXPECT_EQ(18297, dateFunction(" 2020-02-05"));
EXPECT_EQ(18297, dateFunction(" 2020-02-05 "));
EXPECT_EQ(18297, dateFunction("2020-02-05 "));

// Illegal date format.
VELOX_ASSERT_THROW(
dateFunction("2020-02-05 11:00"),
"Unable to parse date value: \"2020-02-05 11:00\", expected format is (YYYY-MM-DD)");
"Unable to parse date value: \"2020-02-05 11:00\"");
}

TEST_F(DateTimeFunctionsTest, dateFunctionTimestamp) {
Expand Down
11 changes: 9 additions & 2 deletions velox/type/TimestampConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ bool tryParseDateString(
int32_t year = 0;
bool yearneg = false;
int sep;
skipSpaces(buf, len, pos);
if (mode != ParseMode::kNonStandardNoTimeCast) {
skipSpaces(buf, len, pos);
}

if (pos >= len) {
return false;
Expand Down Expand Up @@ -268,11 +270,16 @@ bool tryParseDateString(
return false;
}

if (mode == ParseMode::kStandardCast) {
if (mode == ParseMode::kStandardCast ||
mode == ParseMode::kNonStandardNoTimeCast) {
if (!daysSinceEpochFromDate(year, month, day, daysSinceEpoch).ok()) {
return false;
}

if (mode == ParseMode::kStandardCast) {
skipSpaces(buf, len, pos);
}

if (pos == len) {
return validDate(daysSinceEpoch);
}
Expand Down
3 changes: 2 additions & 1 deletion velox/type/TimestampConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ enum class ParseMode {
// `[+-][Y]Y*-[M]M*-[D]DT*`
kNonStandardCast,

// Like kNonStandardCast but does not permit inclusion of timestamp.
// ISO-8601 format. No leading or trailing spaces allowed.
//
// Supported formats:
// `[+-][Y]Y*`
// `[+-][Y]Y*-[M]M`
Expand Down

0 comments on commit efe648f

Please sign in to comment.