From 96fdade76078dc3ff4855708d7af786ad822d789 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Wed, 12 Jun 2024 09:43:28 +0800 Subject: [PATCH] [Feature][Transforms] Support is_date function of sql (#6966) --- docs/en/transform-v2/sql-functions.md | 9 +++++++++ .../test/resources/sql_transform/func_datetime.conf | 12 ++++++++++-- .../transform/sql/zeta/ZetaSQLFunction.java | 3 +++ .../seatunnel/transform/sql/zeta/ZetaSQLType.java | 1 + .../sql/zeta/functions/DateTimeFunction.java | 9 +++++++++ 5 files changed, 32 insertions(+), 2 deletions(-) diff --git a/docs/en/transform-v2/sql-functions.md b/docs/en/transform-v2/sql-functions.md index ab98c2d2bfa..e1c541ef1c9 100644 --- a/docs/en/transform-v2/sql-functions.md +++ b/docs/en/transform-v2/sql-functions.md @@ -795,6 +795,15 @@ Example: MONTHNAME(CREATED) +### IS_DATE + +```IS_DATE(string, formatString)``` +Parses a string and returns a boolean value. The most important format characters are: y year, M month, d day, H hour, m minute, s second. For details of the format, see java.time.format.DateTimeFormatter. + +Example: + +CALL IS_DATE('2021-04-08 13:34:45','yyyy-MM-dd HH:mm:ss') + ### PARSEDATETIME / TO_DATE ```PARSEDATETIME | TO_DATE(string, formatString)``` diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_datetime.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_datetime.conf index 042b66ad25c..05f8f7935f0 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_datetime.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_datetime.conf @@ -37,10 +37,11 @@ source { c4 = "timestamp" c5 = "string" c6 = "string" + c7 = "string" } } rows = [ - {fields = [1, "Joy Ding", "2021-04-15T13:34:45", "2022-01-23T12:34:56", "2021-04-15T13:34:45.235", "2021-04-08T13:34:45.235", "2021-04-08 13:34:45.235", "2021-04-08"], kind = INSERT} + {fields = [1, "Joy Ding", "2021-04-15T13:34:45", "2022-01-23T12:34:56", "2021-04-15T13:34:45.235", "2021-04-08T13:34:45.235", "2021-04-08 13:34:45.235", "2021-04-08", "2021-04-08 13:34:45.235"], kind = INSERT} ] } } @@ -49,7 +50,7 @@ transform { Sql { source_table_name = "fake" result_table_name = "fake1" - query = "select current_date as cd, current_timestamp as ct2, dateadd(c1, 1) as c1_1, dateadd(c1, 40, 'DAY') as c1_2, dateadd(c1, 2, 'YEAR') as c1_3, dateadd(c1, 10, 'MONTH') as c1_4, dateadd(c1, 13, 'HOUR') as c1_5, dateadd(c1, 40, 'MINUTE') as c1_6, dateadd(c1, 30, 'SECOND') as c1_7, datediff(c1, c2) as test, datediff(c1, c2, 'DAY') as c2_1, datediff(c1, c2, 'YEAR') as c2_2, datediff(c1, c2, 'MONTH') as c2_3, datediff(c1, c2, 'HOUR') as c2_4, datediff(c1, c2, 'MINUTE') as c2_5, datediff(c1, c2, 'SECOND') as c2_6, date_trunc(c3, 'YEAR') as c3_1, date_trunc(c3, 'MONTH') as c3_2, date_trunc(c3, 'DAY') as c3_3, date_trunc(c3, 'HOUR') as c3_4, date_trunc(c3, 'MINUTE') as c3_5, date_trunc(c3, 'SECOND') as c3_6, dayname(c3) as c3_7, day_of_week(c3) c3_8, day_of_year(c3) c3_9, extract(YEAR FROM c3) c3_10, extract(MONTH FROM c3) c3_11, extract(DAY FROM c3) c3_12, extract(HOUR FROM c3) c3_13, extract(MINUTE from c3) c3_14, extract(SECOND from c3) c3_15, extract(MILLISECOND from c3) c3_16, extract(DAYOFWEEK FROM c3) c3_17, extract(DAYOFYEAR FROM c3) c3_18, formatdatetime(c4,'yyyy-MM-dd HH:mm:ss.S') c4_1, formatdatetime(c4,'yyyy-MM-dd') c4_2, formatdatetime(c4,'HH:mm:ss.SSS') c4_3, hour(c4) c4_4, minute(c4) c4_5, month(c4) c4_6, monthname(c4) c4_7, parsedatetime(c5,'yyyy-MM-dd HH:mm:ss.SSS') c5_1, to_date(c6,'yyyy-MM-dd') c6_1, quarter(c4) c4_8, second(c4) c4_9, week(c4) c4_10, year(c4) c4_11 from fake" + query = "select current_date as cd, current_timestamp as ct2, dateadd(c1, 1) as c1_1, dateadd(c1, 40, 'DAY') as c1_2, dateadd(c1, 2, 'YEAR') as c1_3, dateadd(c1, 10, 'MONTH') as c1_4, dateadd(c1, 13, 'HOUR') as c1_5, dateadd(c1, 40, 'MINUTE') as c1_6, dateadd(c1, 30, 'SECOND') as c1_7, datediff(c1, c2) as test, datediff(c1, c2, 'DAY') as c2_1, datediff(c1, c2, 'YEAR') as c2_2, datediff(c1, c2, 'MONTH') as c2_3, datediff(c1, c2, 'HOUR') as c2_4, datediff(c1, c2, 'MINUTE') as c2_5, datediff(c1, c2, 'SECOND') as c2_6, date_trunc(c3, 'YEAR') as c3_1, date_trunc(c3, 'MONTH') as c3_2, date_trunc(c3, 'DAY') as c3_3, date_trunc(c3, 'HOUR') as c3_4, date_trunc(c3, 'MINUTE') as c3_5, date_trunc(c3, 'SECOND') as c3_6, dayname(c3) as c3_7, day_of_week(c3) c3_8, day_of_year(c3) c3_9, extract(YEAR FROM c3) c3_10, extract(MONTH FROM c3) c3_11, extract(DAY FROM c3) c3_12, extract(HOUR FROM c3) c3_13, extract(MINUTE from c3) c3_14, extract(SECOND from c3) c3_15, extract(MILLISECOND from c3) c3_16, extract(DAYOFWEEK FROM c3) c3_17, extract(DAYOFYEAR FROM c3) c3_18, formatdatetime(c4,'yyyy-MM-dd HH:mm:ss.S') c4_1, formatdatetime(c4,'yyyy-MM-dd') c4_2, formatdatetime(c4,'HH:mm:ss.SSS') c4_3, hour(c4) c4_4, minute(c4) c4_5, month(c4) c4_6, monthname(c4) c4_7, parsedatetime(c5,'yyyy-MM-dd HH:mm:ss.SSS') c5_1, to_date(c6,'yyyy-MM-dd') c6_1, quarter(c4) c4_8, second(c4) c4_9, week(c4) c4_10, year(c4) c4_11, case when c7 is not null and is_date(c7, 'yyyy-MM-dd HH:mm:ss.SSS') then to_date(c7,'yyyy-MM-dd HH:mm:ss.SSS') else null end as c7_1 from fake" } } @@ -387,6 +388,13 @@ sink { field_value = [ {equals_to = 2021} ] + }, + { + field_name = "c7_1" + field_type = "timestamp" + field_value = [ + {equals_to = "2021-04-08T13:34:45.235"} + ] } ] } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java index 7a8b83d4dbd..44b9ca20b7c 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java @@ -159,6 +159,7 @@ public class ZetaSQLFunction { public static final String MONTHNAME = "MONTHNAME"; public static final String PARSEDATETIME = "PARSEDATETIME"; public static final String TO_DATE = "TO_DATE"; + public static final String IS_DATE = "IS_DATE"; public static final String QUARTER = "QUARTER"; public static final String SECOND = "SECOND"; public static final String WEEK = "WEEK"; @@ -476,6 +477,8 @@ public Object executeFunctionExpr(String functionName, List args) { case PARSEDATETIME: case TO_DATE: return DateTimeFunction.parsedatetime(args); + case IS_DATE: + return DateTimeFunction.isDate(args); case QUARTER: return DateTimeFunction.quarter(args); case SECOND: diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java index 64ad04ee3ed..934cd883080 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java @@ -391,6 +391,7 @@ private SeaTunnelDataType getFunctionType(Function function) { case ZetaSQLFunction.DATEDIFF: return BasicType.LONG_TYPE; case ZetaSQLFunction.REGEXP_LIKE: + case ZetaSQLFunction.IS_DATE: return BasicType.BOOLEAN_TYPE; case ZetaSQLFunction.ACOS: case ZetaSQLFunction.ASIN: diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/DateTimeFunction.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/DateTimeFunction.java index 2bd8fa28143..1fa459e4bf1 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/DateTimeFunction.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/DateTimeFunction.java @@ -472,6 +472,15 @@ public static String monthname(List args) { return getMonthsAndWeeks(0)[dow - 1]; } + public static boolean isDate(List args) { + try { + parsedatetime(args); + return true; + } catch (Throwable e) { + return false; + } + } + public static Temporal parsedatetime(List args) { String str = (String) args.get(0); if (str == null) {