From a161c7cbb76067efbdef5d06246c97f858579855 Mon Sep 17 00:00:00 2001 From: Sai Aditya Mukkamala Date: Fri, 19 May 2023 13:32:11 +0530 Subject: [PATCH 01/50] testing --- .../wrangler/api/RelationalDirective.java | 30 +++++++++++ .../main/java/io/cdap/wrangler/Wrangler.java | 50 +++++++++++++------ .../widgets/Wrangler-transform.json | 18 +++++++ 3 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java new file mode 100644 index 000000000..ac7e82cd4 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java @@ -0,0 +1,30 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api; + +/** + * Directive interface which supports Relational transformations + */ +public interface RelationalDirective extends Directive { + + /** + * returns sql expression + * @return expression + */ + String getExpression(); + +} diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index d5e57ae69..5219aec28 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -569,23 +569,33 @@ private RecipeParser getRecipeParser(StageContext context) @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - if (PRECONDITION_LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage()) - && checkPreconditionNotEmpty(true)) { - - if (!Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext)) { - throw new RuntimeException("SQL Precondition feature is not available"); - } + if (!(PRECONDITION_LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage()) + && checkPreconditionNotEmpty(true))) { + return new InvalidRelation("Plugin is not configured for relational transformation"); + } - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); - if (!expressionFactory.isPresent()) { - return new InvalidRelation("Cannot find an Expression Factory"); - } + if (!Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext)) { + throw new RuntimeException("SQL Precondition feature is not available"); + } - Expression filterExpression = expressionFactory.get().compile(config.getPreconditionSQL()); - return relation.filter(filterExpression); + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); } - return new InvalidRelation("Plugin is not configured for relational transformation"); + Expression filterExpression = expressionFactory.get().compile(config.getPreconditionSQL()); + Relation filteredRelation = relation.filter(filterExpression); + + ExpressionFactory expFactory = expressionFactory.get(); + String relationalDirectives = config.getRelationalDirectives(); + String[] sqls = relationalDirectives.split("\n"); + for (String sql : sqls) { + // Expression exp = expFactory.compile(sql); + // currently supporting only drop column + String column = sql.split(" ")[2]; // add validation later + filteredRelation = filteredRelation.dropColumn(column); + } + return filteredRelation; } private Optional> getExpressionFactory(RelationalTranformContext ctx) { @@ -642,6 +652,7 @@ public static class Config extends PluginConfig { static final String NAME_PRECONDITION_LANGUAGE = "expressionLanguage"; static final String NAME_FIELD = "field"; static final String NAME_DIRECTIVES = "directives"; + static final String NAME_RELATIONAL_DIRECTIVES = "relationalDirectives"; static final String NAME_UDD = "udd"; static final String NAME_SCHEMA = "schema"; static final String NAME_ON_ERROR = "on-error"; @@ -672,6 +683,12 @@ public static class Config extends PluginConfig { @Nullable private String directives; + @Name(NAME_RELATIONAL_DIRECTIVES) + @Description("Recipe for wrangling the input records") + @Macro + @Nullable + private String relationalDirectives; + @Name(NAME_UDD) @Description("List of User Defined Directives (UDD) that have to be loaded.") @Nullable @@ -694,7 +711,7 @@ public static class Config extends PluginConfig { private final String onError; public Config(String preconditionLanguage, String precondition, String directives, String udds, - String field, String schema, String onError) { + String field, String schema, String onError, String relationalDirectives) { this.preconditionLanguage = preconditionLanguage; this.precondition = precondition; this.directives = directives; @@ -703,6 +720,7 @@ public Config(String preconditionLanguage, String precondition, String directive this.field = field; this.schema = schema; this.onError = onError; + this.relationalDirectives = relationalDirectives; } /** @@ -739,6 +757,10 @@ public String getDirectives() { public String getUDDs() { return udds; } + + public String getRelationalDirectives() { + return relationalDirectives; + } } } diff --git a/wrangler-transform/widgets/Wrangler-transform.json b/wrangler-transform/widgets/Wrangler-transform.json index 9f9c7611e..51ef2b9ef 100644 --- a/wrangler-transform/widgets/Wrangler-transform.json +++ b/wrangler-transform/widgets/Wrangler-transform.json @@ -73,6 +73,24 @@ } ] }, + { + "label": "RelationalDirectives", + "properties": [ + { + "widget-type": "wrangler-relational-directives", + "label": "RelationalRecipe", + "name": "RelationalDirectives", + "widget-attributes" : { + "placeholder" : "#pragma load-directives my-directive; my-directive :body;" + } + }, + { + "widget-type": "csv", + "label": "User Defined SQL(UDS)", + "name": "uds" + } + ] + }, { "label" : "Error Handling", "properties" : [ From e1fbdd53f39f67e27fd0ca808270221357962d31 Mon Sep 17 00:00:00 2001 From: Sai Aditya Mukkamala Date: Fri, 19 May 2023 17:06:19 +0530 Subject: [PATCH 02/50] poc for running on spark sql --- .../wrangler/api/RelationalDirective.java | 2 +- .../java/io/cdap/directives/column/Drop.java | 13 ++++- .../main/java/io/cdap/wrangler/Wrangler.java | 48 +++++++++++++------ 3 files changed, 46 insertions(+), 17 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java index ac7e82cd4..c035646b0 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java @@ -25,6 +25,6 @@ public interface RelationalDirective extends Directive { * returns sql expression * @return expression */ - String getExpression(); + String getSQL(); } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java b/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java index 2114ec2cf..df01bdd65 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java @@ -24,6 +24,7 @@ import io.cdap.wrangler.api.DirectiveExecutionException; import io.cdap.wrangler.api.DirectiveParseException; import io.cdap.wrangler.api.ExecutorContext; +import io.cdap.wrangler.api.RelationalDirective; import io.cdap.wrangler.api.Row; import io.cdap.wrangler.api.annotations.Categories; import io.cdap.wrangler.api.lineage.Lineage; @@ -43,7 +44,7 @@ @Name(Drop.NAME) @Categories(categories = { "column"}) @Description("Drop one or more columns.") -public class Drop implements Directive, Lineage { +public class Drop implements RelationalDirective, Lineage { public static final String NAME = "drop"; // Columns to be dropped. @@ -88,4 +89,14 @@ public Mutation lineage() { .drop(Many.of(columns)) .build(); } + + @Override + public String getSQL() { + String sql = "DROP COLUMN "; + for (String col : columns) { + sql += col + ","; + } + sql = sql.substring(0, sql.length() - 1); + return sql; + } } diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 5219aec28..7934eae2a 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -44,6 +44,7 @@ import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.cdap.features.Feature; import io.cdap.directives.aggregates.DefaultTransientStore; +import io.cdap.directives.column.Drop; import io.cdap.wrangler.api.CompileException; import io.cdap.wrangler.api.CompileStatus; import io.cdap.wrangler.api.Compiler; @@ -53,9 +54,11 @@ import io.cdap.wrangler.api.EntityCountMetric; import io.cdap.wrangler.api.ErrorRecord; import io.cdap.wrangler.api.ExecutorContext; +import io.cdap.wrangler.api.RecipeException; import io.cdap.wrangler.api.RecipeParser; import io.cdap.wrangler.api.RecipePipeline; import io.cdap.wrangler.api.RecipeSymbol; +import io.cdap.wrangler.api.RelationalDirective; import io.cdap.wrangler.api.Row; import io.cdap.wrangler.api.TokenGroup; import io.cdap.wrangler.api.TransientStore; @@ -587,17 +590,42 @@ && checkPreconditionNotEmpty(true))) { Relation filteredRelation = relation.filter(filterExpression); ExpressionFactory expFactory = expressionFactory.get(); - String relationalDirectives = config.getRelationalDirectives(); - String[] sqls = relationalDirectives.split("\n"); - for (String sql : sqls) { + + String recipe = config.getDirectives(); + List directives = null; + try { + GrammarBasedParser parser = new GrammarBasedParser("default", new MigrateToV2(recipe).migrate(), registry); + directives = parser.parse(); + } catch (DirectiveParseException e) { + throw new RuntimeException(e); + } catch (RecipeException e) { + throw new RuntimeException(e); + } + + for (Directive directive : directives) { // Expression exp = expFactory.compile(sql); + if (!(directive instanceof RelationalDirective)) { + throw new RuntimeException("Directive is not relational Directive"); + } // currently supporting only drop column - String column = sql.split(" ")[2]; // add validation later - filteredRelation = filteredRelation.dropColumn(column); + // SQL will be returned as "DROP COLUMN col1, col2" + String sql = ((RelationalDirective) directive).getSQL(); + List cols = getColumnsOfDropSQL(sql); + for (String col : cols) { + filteredRelation = filteredRelation.dropColumn(col); + } } return filteredRelation; } + private List getColumnsOfDropSQL(String sql) { + List cols = new ArrayList<>(); + for (String col : sql.split(" ")[2].split(",")) { + cols.add(col.trim()); + } + return cols; + } + private Optional> getExpressionFactory(RelationalTranformContext ctx) { return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); } @@ -683,11 +711,6 @@ public static class Config extends PluginConfig { @Nullable private String directives; - @Name(NAME_RELATIONAL_DIRECTIVES) - @Description("Recipe for wrangling the input records") - @Macro - @Nullable - private String relationalDirectives; @Name(NAME_UDD) @Description("List of User Defined Directives (UDD) that have to be loaded.") @@ -720,7 +743,6 @@ public Config(String preconditionLanguage, String precondition, String directive this.field = field; this.schema = schema; this.onError = onError; - this.relationalDirectives = relationalDirectives; } /** @@ -757,10 +779,6 @@ public String getDirectives() { public String getUDDs() { return udds; } - - public String getRelationalDirectives() { - return relationalDirectives; - } } } From 0efb2d3cd4adab1c313601327c24207c7bcd201f Mon Sep 17 00:00:00 2001 From: Sai Aditya Mukkamala Date: Mon, 22 May 2023 12:15:34 +0530 Subject: [PATCH 03/50] add RelationalDirective interface to system directive registry --- .../java/io/cdap/wrangler/registry/SystemDirectiveRegistry.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/registry/SystemDirectiveRegistry.java b/wrangler-core/src/main/java/io/cdap/wrangler/registry/SystemDirectiveRegistry.java index d886e0017..c7d40f5b2 100644 --- a/wrangler-core/src/main/java/io/cdap/wrangler/registry/SystemDirectiveRegistry.java +++ b/wrangler-core/src/main/java/io/cdap/wrangler/registry/SystemDirectiveRegistry.java @@ -20,6 +20,7 @@ import io.cdap.cdap.api.artifact.ArtifactSummary; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveLoadException; +import io.cdap.wrangler.api.RelationalDirective; import org.reflections.Reflections; import java.util.ArrayList; @@ -84,6 +85,7 @@ public SystemDirectiveRegistry(List namespaces) throws DirectiveLoadExce try { Reflections reflections = new Reflections(namespace); Set> system = reflections.getSubTypesOf(Directive.class); + system.addAll(reflections.getSubTypesOf(RelationalDirective.class)); for (Class directive : system) { DirectiveInfo info = DirectiveInfo.fromSystem(directive); registry.put(info.name(), info); From e6603dae0d76ac12e50958dc9e7a2994f1e4c21f Mon Sep 17 00:00:00 2001 From: Sai Aditya Mukkamala Date: Wed, 24 May 2023 12:06:40 +0530 Subject: [PATCH 04/50] fixing bug --- wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 7934eae2a..c4af734c4 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -193,7 +193,7 @@ public void configurePipeline(PipelineConfigurer configurer) { if (!config.containsMacro(Config.NAME_PRECONDITION_SQL)) { validatePrecondition(config.getPreconditionSQL(), true, collector); } - validateSQLModeDirectives(collector); + //validateSQLModeDirectives(collector); } else { if (!config.containsMacro(Config.NAME_PRECONDITION)) { validatePrecondition(config.getPreconditionJEXL(), false, collector); From 400892ace9a2e5eae992b4229c8a311f0cd407e6 Mon Sep 17 00:00:00 2001 From: Sai Aditya Mukkamala Date: Wed, 24 May 2023 12:23:24 +0530 Subject: [PATCH 05/50] fixing registry nullPointer exception --- .../src/main/java/io/cdap/wrangler/Wrangler.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index c4af734c4..fe272ddd1 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -592,6 +592,14 @@ && checkPreconditionNotEmpty(true))) { ExpressionFactory expFactory = expressionFactory.get(); String recipe = config.getDirectives(); + + registry = SystemDirectiveRegistry.INSTANCE; + try { + registry.reload("default"); + } catch (DirectiveLoadException e) { + throw new RuntimeException(e); + } + List directives = null; try { GrammarBasedParser parser = new GrammarBasedParser("default", new MigrateToV2(recipe).migrate(), registry); From b4dd875d797baac38ae47c8c15bfcce339e30938 Mon Sep 17 00:00:00 2001 From: Sai Aditya Mukkamala Date: Tue, 30 May 2023 13:39:34 +0530 Subject: [PATCH 06/50] move relation execution into directive --- .../java/io/cdap/wrangler/api/Directive.java | 13 ++++++++++++- .../cdap/wrangler/api/RelationalDirective.java | 18 +++++++++++------- .../java/io/cdap/directives/column/Drop.java | 15 ++++++++------- .../main/java/io/cdap/wrangler/Wrangler.java | 15 ++------------- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java index 2a199263b..3dddd19a0 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java @@ -16,6 +16,9 @@ package io.cdap.wrangler.api; +import io.cdap.cdap.etl.api.relational.LinearRelationalTransform; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.parser.UsageDefinition; import java.util.List; @@ -51,7 +54,8 @@ * } * */ -public interface Directive extends Executor, List>, EntityMetrics { +public interface Directive extends Executor, List>, EntityMetrics, + LinearRelationalTransform { /** * This defines a interface variable that is static and final for specify * the {@code type} of the plugin this interface would provide. @@ -126,4 +130,11 @@ default List getCountMetrics() { // no op return null; } + + @Override + default Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + // no-op + return relation; + } } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java index c035646b0..b66ae2c78 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/RelationalDirective.java @@ -16,15 +16,19 @@ package io.cdap.wrangler.api; +import io.cdap.cdap.etl.api.relational.LinearRelationalTransform; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; + /** * Directive interface which supports Relational transformations */ -public interface RelationalDirective extends Directive { - - /** - * returns sql expression - * @return expression - */ - String getSQL(); +public interface RelationalDirective extends Directive, LinearRelationalTransform { + @Override + default Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + // no-op + return relation; + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java b/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java index df01bdd65..7d21b9109 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java @@ -19,6 +19,8 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -44,7 +46,7 @@ @Name(Drop.NAME) @Categories(categories = { "column"}) @Description("Drop one or more columns.") -public class Drop implements RelationalDirective, Lineage { +public class Drop implements Directive, Lineage { public static final String NAME = "drop"; // Columns to be dropped. @@ -91,12 +93,11 @@ public Mutation lineage() { } @Override - public String getSQL() { - String sql = "DROP COLUMN "; - for (String col : columns) { - sql += col + ","; + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + for (String col: columns) { + relation = relation.dropColumn(col); } - sql = sql.substring(0, sql.length() - 1); - return sql; + return relation; } } diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index fe272ddd1..562ec7394 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -589,8 +589,6 @@ && checkPreconditionNotEmpty(true))) { Expression filterExpression = expressionFactory.get().compile(config.getPreconditionSQL()); Relation filteredRelation = relation.filter(filterExpression); - ExpressionFactory expFactory = expressionFactory.get(); - String recipe = config.getDirectives(); registry = SystemDirectiveRegistry.INSTANCE; @@ -611,17 +609,8 @@ && checkPreconditionNotEmpty(true))) { } for (Directive directive : directives) { - // Expression exp = expFactory.compile(sql); - if (!(directive instanceof RelationalDirective)) { - throw new RuntimeException("Directive is not relational Directive"); - } - // currently supporting only drop column - // SQL will be returned as "DROP COLUMN col1, col2" - String sql = ((RelationalDirective) directive).getSQL(); - List cols = getColumnsOfDropSQL(sql); - for (String col : cols) { - filteredRelation = filteredRelation.dropColumn(col); - } + filteredRelation = directive + .transform(relationalTranformContext, filteredRelation); } return filteredRelation; } From aa99ccdca19bc5cb097984e0501d3206ddac030f Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Mon, 19 Jun 2023 11:01:46 +0000 Subject: [PATCH 07/50] Case transform directives --- .../cdap/directives/transformation/Lower.java | 26 +++++++++++++++++- .../directives/transformation/TitleCase.java | 20 ++++++++++++++ .../cdap/directives/transformation/Upper.java | 27 +++++++++++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java index e96af4c27..9d7f94a7a 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java @@ -19,6 +19,12 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -32,7 +38,11 @@ import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Optional; + /** * A directive for lower casing the 'column' value of type String. @@ -72,7 +82,7 @@ public List execute(List rows, ExecutorContext context) throws Directi if (object instanceof String) { if (object != null) { String value = (String) object; - row.setValue(idx, value.toLowerCase()); + //row.setValue(idx, value.toLowerCase()); } } } @@ -87,4 +97,18 @@ public Mutation lineage() { .relation(column, column) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(column, expressionFactory.get().compile("LOWER(" + column + ")")); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java index 0be5bf378..32543b345 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java @@ -19,6 +19,12 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -34,6 +40,7 @@ import org.apache.commons.lang.WordUtils; import java.util.List; +import java.util.Optional; /** * A directive for title casing the 'column' value of type String. @@ -88,4 +95,17 @@ public Mutation lineage() { .relation(column, column) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(column, expressionFactory.get().compile("initcap(" + column + ")")); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java index 0eb09dc94..3c0ef580d 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java @@ -19,6 +19,12 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -29,10 +35,18 @@ import io.cdap.wrangler.api.lineage.Lineage; import io.cdap.wrangler.api.lineage.Mutation; import io.cdap.wrangler.api.parser.ColumnName; +import io.cdap.wrangler.api.parser.ColumnNameList; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; /** * A Wrangler step for upper casing the 'column' value of type String. @@ -87,4 +101,17 @@ public Mutation lineage() { .relation(column, column) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(column, expressionFactory.get().compile("UPPER(" + column + ")")); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } From 6a736c9f445002c6ca32deda6ac554b588371d0d Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Mon, 19 Jun 2023 16:08:16 +0000 Subject: [PATCH 08/50] Implement trim space directives --- .../directives/transformation/LeftTrim.java | 19 +++++++++++++++++++ .../directives/transformation/RightTrim.java | 19 +++++++++++++++++++ .../cdap/directives/transformation/Trim.java | 18 ++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java index f739d003e..c5571b16b 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -33,6 +38,7 @@ import io.cdap.wrangler.api.parser.UsageDefinition; import java.util.List; +import java.util.Optional; /** * A directive for trimming whitespace from left side of a string @@ -87,4 +93,17 @@ public Mutation lineage() { .relation(col, col) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(col, expressionFactory.get().compile("LTRIM(" + col + ")")); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java index 3a000a631..870a3f984 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -33,6 +38,7 @@ import io.cdap.wrangler.api.parser.UsageDefinition; import java.util.List; +import java.util.Optional; /** @@ -88,4 +94,17 @@ public Mutation lineage() { .relation(column, column) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(column, expressionFactory.get().compile("RTRIM(" + column + ")")); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java index 9f1e4aed1..32dbf6b42 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java @@ -20,6 +20,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -34,6 +39,7 @@ import io.cdap.wrangler.api.parser.UsageDefinition; import java.util.List; +import java.util.Optional; /** * A directive for trimming whitespace from both sides of a string @@ -88,4 +94,16 @@ public Mutation lineage() { .relation(column, column) .build(); } + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(column, expressionFactory.get().compile("TRIM(" + column + ")")); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } From 9e492e387f64dfd1ad88e27f880b5c9b4d05ca45 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Mon, 19 Jun 2023 17:08:21 +0000 Subject: [PATCH 09/50] Implement rename directive --- .../java/io/cdap/directives/column/Rename.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java index 06bd831f7..f7d08a8e0 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -82,4 +87,17 @@ public Mutation lineage() { .relation(source, target) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(target.value(), expressionFactory.get().compile(source.value())); + } + + private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } From ddf5f7a2bf6445479e1b74ff8c38a83cc282180a Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Tue, 20 Jun 2023 04:56:55 +0000 Subject: [PATCH 10/50] implement keep and copy directives --- .../java/io/cdap/directives/column/Copy.java | 18 +++++++++++ .../java/io/cdap/directives/column/Keep.java | 30 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java index a0b6db07e..76288e2a7 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -110,4 +115,17 @@ public Mutation lineage() { .conditional(source.value(), destination.value()) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(destination.value(), expressionFactory.get().compile(source.value())); + } + + private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java index fbeb067cd..56980981a 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java @@ -19,6 +19,12 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -34,8 +40,12 @@ import io.cdap.wrangler.api.parser.UsageDefinition; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** * This class Keep implements a directive that @@ -93,4 +103,24 @@ public Mutation lineage() { keep.forEach(column -> builder.relation(column, column)); return builder.build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + Map keepCol = generateColumnExpMap(keep.stream().collect(Collectors.toList()), + expressionFactory.get()); + return relation.select(keepCol); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } + private Map generateColumnExpMap(List columns, ExpressionFactory factory) { + Map columnExpMap = new LinkedHashMap<>(); + columns.forEach((colName)-> columnExpMap.put(colName, factory.compile(colName))); + return columnExpMap; + } } From 1e0eda07d649cfb53a6e8418c17223cce2eff2d5 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Tue, 20 Jun 2023 05:19:15 +0000 Subject: [PATCH 11/50] Implement merge directive --- .../java/io/cdap/directives/column/Merge.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java index 0bcd212e3..a9f403d8b 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -37,6 +42,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Optional; /** * A directive for merging two columns and creates a third column. @@ -108,4 +114,17 @@ public Mutation lineage() { .relation(Many.columns(col1, col2), Many.of(col1, col2, dest)) .build(); } + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(dest, expressionFactory.get() + .compile("CONCAT(" + col1 + ",'" + delimiter + "'," + col2 + ")")); + } + + private Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } From 2cdc2b3fe0292ee8dc7ee3212efba222a8cf8773 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Tue, 20 Jun 2023 08:27:36 +0000 Subject: [PATCH 12/50] Undo changes in Lower.execute --- .../src/main/java/io/cdap/directives/transformation/Lower.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java index 9d7f94a7a..cb90d31d7 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java @@ -82,7 +82,7 @@ public List execute(List rows, ExecutorContext context) throws Directi if (object instanceof String) { if (object != null) { String value = (String) object; - //row.setValue(idx, value.toLowerCase()); + row.setValue(idx, value.toLowerCase()); } } } From cd4c680aa1f57bfaa31aa943d0f0f740a8662df8 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Sat, 24 Jun 2023 14:52:26 +0000 Subject: [PATCH 13/50] Implement set-type directive --- .../io/cdap/directives/column/SetType.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java index f143493c0..c4051706c 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -36,6 +41,7 @@ import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; import io.cdap.wrangler.utils.ColumnConverter; +import io.cdap.wrangler.utils.ColumnTypeExpression; import java.math.RoundingMode; import java.util.List; @@ -108,4 +114,19 @@ public Mutation lineage() { .relation(col, col) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + String expression = ColumnTypeExpression.getColumnTypeExp(type, col, scale); + return relation.setColumn(col, expressionFactory.get().compile(expression)); + } + + private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine() + .getExpressionFactory(StringExpressionFactoryType.SQL); + } } From b25accf073cc3947d19eff25e141ca3b47b25904 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Sat, 24 Jun 2023 14:53:22 +0000 Subject: [PATCH 14/50] Add utility class for set-type sql expression --- .../wrangler/utils/ColumnTypeExpression.java | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 wrangler-core/src/main/java/io/cdap/wrangler/utils/ColumnTypeExpression.java diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/utils/ColumnTypeExpression.java b/wrangler-core/src/main/java/io/cdap/wrangler/utils/ColumnTypeExpression.java new file mode 100644 index 000000000..7bd592186 --- /dev/null +++ b/wrangler-core/src/main/java/io/cdap/wrangler/utils/ColumnTypeExpression.java @@ -0,0 +1,84 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.utils; + +/** + * Utility class that returns a string of SQL expression for the given data type. + */ +public final class ColumnTypeExpression { + + public static String getColumnTypeExp(String toType, String column, int scale) { + toType = toType.toUpperCase(); + String expression; + switch (toType) { + case "INTEGER": + case "I64": + case "INT": { + expression = "CAST(" + column + " AS INT)"; + return expression; + } + + case "I32": + case "SHORT": { + expression = "CAST(" + column + " AS SMALLINT)"; + return expression; + } + + case "LONG": { + expression = "CAST(" + column + " AS BIGINT)"; + return expression; + } + + case "BOOL": + case "BOOLEAN": { + expression = "CAST(" + column + " AS BOOLEAN)"; + return expression; + } + + case "STRING": { + expression = "CAST(" + column + " AS STRING)"; + return expression; + } + + case "FLOAT": { + expression = "CAST(" + column + " AS FLOAT)"; + return expression; + } + + case "DECIMAL": { + expression = "CAST(" + column + " AS DECIMAL(38," + scale + "))"; + return expression; + } + + case "DOUBLE": { + expression = "CAST(" + column + " AS DOUBLE)"; + return expression; + } + + case "BYTES": { + expression = "CAST(" + column + " AS TINYINT)"; + return expression; + } + + default: + return column; +// throw new DirectiveExecutionException(String.format( +// "Column '%s' is of unsupported type '%s'. Supported types are: " + +// "int, short, long, double, decimal, boolean, string, bytes", column, toType)); + } + } +} From 2bb3fe2a09f7e2c8d0a0e2410b6c40ac5d38cb7b Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Sun, 25 Jun 2023 10:01:13 +0000 Subject: [PATCH 15/50] Implement ChangeColumnCase Directive --- .../directives/column/ChangeColCaseNames.java | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java index 4f6cedb89..a35d1e549 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java @@ -19,6 +19,13 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.api.data.schema.Schema; +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -34,7 +41,10 @@ import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; /** * This class ChangeColCaseNames converts the case of the columns @@ -94,5 +104,43 @@ public Mutation lineage() { .all(Many.of()) .build(); } + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + List columnNames = generateListCols(relationalTranformContext); + Map colmap = generateColumnCaseMap(columnNames, expressionFactory.get()); + return relation.select(colmap); + } + + private List generateListCols(RelationalTranformContext relationalTranformContext) { + List colnames = new ArrayList(); + java.util.Set s = relationalTranformContext.getInputRelationNames(); + for (String inp : s) { + Schema schema = relationalTranformContext.getInputSchema(inp); + List fields = schema.getFields(); + for (Schema.Field field: fields) { + colnames.add(field.getName()); + } + } + return colnames; + } + private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } + + private Map generateColumnCaseMap(List columns, ExpressionFactory factory) { + Map columnExpMap = new LinkedHashMap<>(); + if (toLower) { + columns.forEach((colName) -> columnExpMap.put(colName.toLowerCase(), factory.compile(colName))); + } else { + columns.forEach((colName) -> columnExpMap.put(colName.toUpperCase(), factory.compile(colName))); + } + return columnExpMap; + } + } From f7c5bd3163cda358f5b6cca33a461e604a0fd8d4 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Sun, 25 Jun 2023 11:12:27 +0000 Subject: [PATCH 16/50] Move getExpressionfactory() to the Directive interface --- .../java/io/cdap/wrangler/api/Directive.java | 20 ++++++++++++++++++- .../directives/column/ChangeColCaseNames.java | 3 --- .../java/io/cdap/directives/column/Copy.java | 3 --- .../java/io/cdap/directives/column/Keep.java | 4 +--- .../java/io/cdap/directives/column/Merge.java | 3 --- .../io/cdap/directives/column/Rename.java | 7 ++----- .../io/cdap/directives/column/SetType.java | 4 ---- .../directives/transformation/LeftTrim.java | 4 ---- .../cdap/directives/transformation/Lower.java | 4 ---- .../directives/transformation/RightTrim.java | 3 --- .../directives/transformation/TitleCase.java | 3 --- .../cdap/directives/transformation/Trim.java | 3 --- .../cdap/directives/transformation/Upper.java | 3 --- 13 files changed, 22 insertions(+), 42 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java index 3dddd19a0..2ffeecc8c 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java @@ -16,12 +16,16 @@ package io.cdap.wrangler.api; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; import io.cdap.cdap.etl.api.relational.LinearRelationalTransform; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.RelationalTransform; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.parser.UsageDefinition; import java.util.List; +import java.util.Optional; /** * Directive is a user defined directive. DIE - Define, Initialize & Execute - Pattern @@ -55,7 +59,7 @@ * */ public interface Directive extends Executor, List>, EntityMetrics, - LinearRelationalTransform { + LinearRelationalTransform { /** * This defines a interface variable that is static and final for specify * the {@code type} of the plugin this interface would provide. @@ -137,4 +141,18 @@ default Relation transform(RelationalTranformContext relationalTranformContext, // no-op return relation; } + + default Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } } + + + + + + + + + + diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java index a35d1e549..ea96a2e55 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java @@ -128,9 +128,6 @@ private List generateListCols(RelationalTranformContext relationalTranfo } return colnames; } - private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } private Map generateColumnCaseMap(List columns, ExpressionFactory factory) { Map columnExpMap = new LinkedHashMap<>(); diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java index 76288e2a7..fa8767de9 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java @@ -125,7 +125,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(destination.value(), expressionFactory.get().compile(source.value())); } - private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java index 56980981a..acdb3f9c0 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java @@ -115,12 +115,10 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.select(keepCol); } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } private Map generateColumnExpMap(List columns, ExpressionFactory factory) { Map columnExpMap = new LinkedHashMap<>(); columns.forEach((colName)-> columnExpMap.put(colName, factory.compile(colName))); return columnExpMap; } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java index a9f403d8b..3a6e8945c 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java @@ -124,7 +124,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, .compile("CONCAT(" + col1 + ",'" + delimiter + "'," + col2 + ")")); } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java index f7d08a8e0..f7baf8e45 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java @@ -23,7 +23,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -38,6 +37,7 @@ import io.cdap.wrangler.utils.ColumnConverter; import java.util.List; +import java.util.Optional; /** * A directive for renaming columns. @@ -90,14 +90,11 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn(target.value(), expressionFactory.get().compile(source.value())); } - private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java index c4051706c..283a612ad 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java @@ -125,8 +125,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(col, expressionFactory.get().compile(expression)); } - private java.util.Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine() - .getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java index c5571b16b..e33770885 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java @@ -102,8 +102,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, } return relation.setColumn(col, expressionFactory.get().compile("LTRIM(" + col + ")")); } - - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java index cb90d31d7..5f00e2854 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java @@ -107,8 +107,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("LOWER(" + column + ")")); } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } - } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java index 870a3f984..ce86edc3a 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java @@ -104,7 +104,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("RTRIM(" + column + ")")); } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java index 32543b345..b7759c17b 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java @@ -105,7 +105,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("initcap(" + column + ")")); } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java index 32dbf6b42..c7e3ddf12 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java @@ -103,7 +103,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("TRIM(" + column + ")")); } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java index 3c0ef580d..7b7849b48 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java @@ -111,7 +111,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("UPPER(" + column + ")")); } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } } From 6d4093bb8ca87793425def041cfb91dafec377eb Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Tue, 27 Jun 2023 10:35:10 +0000 Subject: [PATCH 17/50] Move generateColumnExpMap to Directive.java --- .../src/main/java/io/cdap/wrangler/api/Directive.java | 10 +++++++++- .../src/main/java/io/cdap/directives/column/Keep.java | 6 +----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java index 2ffeecc8c..6e4025063 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java @@ -16,15 +16,17 @@ package io.cdap.wrangler.api; +import io.cdap.cdap.etl.api.relational.Expression; import io.cdap.cdap.etl.api.relational.ExpressionFactory; import io.cdap.cdap.etl.api.relational.LinearRelationalTransform; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.RelationalTransform; import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.parser.UsageDefinition; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -145,6 +147,12 @@ default Relation transform(RelationalTranformContext relationalTranformContext, default Optional> getExpressionFactory(RelationalTranformContext ctx) { return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); } + + default Map generateColumnExpMap(List columns, ExpressionFactory factory) { + Map columnExpMap = new LinkedHashMap<>(); + columns.forEach((colName)-> columnExpMap.put(colName, factory.compile(colName))); + return columnExpMap; + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java index acdb3f9c0..ea8a7923e 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java @@ -115,10 +115,6 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.select(keepCol); } - private Map generateColumnExpMap(List columns, ExpressionFactory factory) { - Map columnExpMap = new LinkedHashMap<>(); - columns.forEach((colName)-> columnExpMap.put(colName, factory.compile(colName))); - return columnExpMap; - } + } From 376e313d07c71227fa7a4ff4edae1ac65fdecdb1 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Wed, 28 Jun 2023 05:52:56 +0000 Subject: [PATCH 18/50] Clean up code --- .../java/io/cdap/wrangler/api/Directive.java | 27 +------------------ .../java/io/cdap/directives/column/Copy.java | 10 +++---- .../java/io/cdap/directives/column/Keep.java | 12 ++++----- .../java/io/cdap/directives/column/Merge.java | 7 ++--- .../io/cdap/directives/column/Rename.java | 4 ++- .../io/cdap/directives/column/SetType.java | 14 +++++++--- .../directives/transformation/LeftTrim.java | 5 ++-- .../cdap/directives/transformation/Lower.java | 8 +++--- .../directives/transformation/RightTrim.java | 7 ++--- .../directives/transformation/TitleCase.java | 8 +++--- .../cdap/directives/transformation/Trim.java | 7 ++--- .../cdap/directives/transformation/Upper.java | 15 +++-------- 12 files changed, 50 insertions(+), 74 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java index 6e4025063..19f2aacc1 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java @@ -16,18 +16,12 @@ package io.cdap.wrangler.api; -import io.cdap.cdap.etl.api.relational.Expression; -import io.cdap.cdap.etl.api.relational.ExpressionFactory; import io.cdap.cdap.etl.api.relational.LinearRelationalTransform; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.parser.UsageDefinition; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; -import java.util.Optional; /** * Directive is a user defined directive. DIE - Define, Initialize & Execute - Pattern @@ -144,23 +138,4 @@ default Relation transform(RelationalTranformContext relationalTranformContext, return relation; } - default Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } - - default Map generateColumnExpMap(List columns, ExpressionFactory factory) { - Map columnExpMap = new LinkedHashMap<>(); - columns.forEach((colName)-> columnExpMap.put(colName, factory.compile(colName))); - return columnExpMap; - } -} - - - - - - - - - - +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java index fa8767de9..f2d0d296b 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java @@ -23,23 +23,22 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; import io.cdap.wrangler.api.DirectiveParseException; import io.cdap.wrangler.api.ExecutorContext; -import io.cdap.wrangler.api.Optional; import io.cdap.wrangler.api.Row; import io.cdap.wrangler.api.annotations.Categories; import io.cdap.wrangler.api.lineage.Lineage; -import io.cdap.wrangler.api.lineage.Many; import io.cdap.wrangler.api.lineage.Mutation; import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; +import java.util.Optional; /** * A directive for copying value of one column to another. @@ -118,11 +117,12 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn(destination.value(), expressionFactory.get().compile(source.value())); } -} +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java index ea8a7923e..c23d85c26 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java @@ -24,7 +24,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -38,9 +37,9 @@ import io.cdap.wrangler.api.parser.ColumnNameList; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.HashSet; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -106,15 +105,14 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } - Map keepCol = generateColumnExpMap(keep.stream().collect(Collectors.toList()), - expressionFactory.get()); + Map keepCol = SqlExpressionGenerator + .generateColumnExpMap(keep.stream().collect(Collectors.toList()), expressionFactory.get()); return relation.select(keepCol); } - - } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java index 3a6e8945c..04d0a831a 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java @@ -23,7 +23,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -38,6 +37,7 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import org.apache.commons.lang3.StringEscapeUtils; import java.util.ArrayList; @@ -116,7 +116,8 @@ public Mutation lineage() { } public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } @@ -124,4 +125,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, .compile("CONCAT(" + col1 + ",'" + delimiter + "'," + col2 + ")")); } -} +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java index f7baf8e45..78f67a3b9 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java @@ -35,6 +35,7 @@ import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; import io.cdap.wrangler.utils.ColumnConverter; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; import java.util.Optional; @@ -90,7 +91,8 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java index 283a612ad..6ec847a76 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java @@ -23,7 +23,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -42,6 +41,7 @@ import io.cdap.wrangler.api.parser.UsageDefinition; import io.cdap.wrangler.utils.ColumnConverter; import io.cdap.wrangler.utils.ColumnTypeExpression; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.math.RoundingMode; import java.util.List; @@ -117,12 +117,18 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + java.util.Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } - String expression = ColumnTypeExpression.getColumnTypeExp(type, col, scale); + String expression; + if (scale == null) { + expression = ColumnTypeExpression.getColumnTypeExp(type, col); + } else { + expression = ColumnTypeExpression.getColumnTypeExp(type, col, scale); + } return relation.setColumn(col, expressionFactory.get().compile(expression)); } -} +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java index e33770885..fc772d9d0 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java @@ -23,7 +23,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -36,6 +35,7 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; import java.util.Optional; @@ -96,7 +96,8 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java index 5f00e2854..dd8587044 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Lower.java @@ -19,12 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.relational.Expression; import io.cdap.cdap.etl.api.relational.ExpressionFactory; import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -37,10 +35,9 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import java.util.Optional; @@ -100,7 +97,8 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java index ce86edc3a..6ef616bc5 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java @@ -23,7 +23,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -36,6 +35,7 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; import java.util.Optional; @@ -97,11 +97,12 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn(column, expressionFactory.get().compile("RTRIM(" + column + ")")); } -} +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java index b7759c17b..f58484796 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java @@ -19,12 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.relational.Expression; import io.cdap.cdap.etl.api.relational.ExpressionFactory; import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -37,6 +35,7 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import org.apache.commons.lang.WordUtils; import java.util.List; @@ -98,11 +97,12 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn(column, expressionFactory.get().compile("initcap(" + column + ")")); } -} +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java index c7e3ddf12..f4efa957e 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java @@ -24,7 +24,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -37,6 +36,7 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; import java.util.Optional; @@ -96,11 +96,12 @@ public Mutation lineage() { } public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn(column, expressionFactory.get().compile("TRIM(" + column + ")")); } -} +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java index 7b7849b48..713dbf606 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java @@ -19,12 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.relational.Expression; import io.cdap.cdap.etl.api.relational.ExpressionFactory; import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -35,18 +33,12 @@ import io.cdap.wrangler.api.lineage.Lineage; import io.cdap.wrangler.api.lineage.Mutation; import io.cdap.wrangler.api.parser.ColumnName; -import io.cdap.wrangler.api.parser.ColumnNameList; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; /** * A Wrangler step for upper casing the 'column' value of type String. @@ -104,11 +96,12 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn(column, expressionFactory.get().compile("UPPER(" + column + ")")); } -} +} \ No newline at end of file From c3002efd349d9db05752c05876ea8943a226f879 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Wed, 28 Jun 2023 05:53:56 +0000 Subject: [PATCH 19/50] Move sql expression generator functions to a new util class --- .../utils/SqlExpressionGenerator.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java new file mode 100644 index 000000000..0f636dc30 --- /dev/null +++ b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java @@ -0,0 +1,25 @@ +package io.cdap.wrangler.utils; + +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; + + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class SqlExpressionGenerator { + + public static Optional> getExpressionFactory(RelationalTranformContext ctx) { + return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); + } + + public static Map generateColumnExpMap(List columns, ExpressionFactory factory) { + Map columnExpMap = new LinkedHashMap<>(); + columns.forEach((colName)-> columnExpMap.put(colName, factory.compile(colName))); + return columnExpMap; + } +} From 51d3d0565c4657ccb4a6b77c66929a4450b01e1b Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Wed, 28 Jun 2023 06:01:19 +0000 Subject: [PATCH 20/50] Change util class name --- .../directives/column/ChangeColCaseNames.java | 15 ++++---- .../io/cdap/directives/column/SetType.java | 6 +-- ...pression.java => SqlTypeExpGenerator.java} | 37 ++++++++++--------- 3 files changed, 30 insertions(+), 28 deletions(-) rename wrangler-core/src/main/java/io/cdap/wrangler/utils/{ColumnTypeExpression.java => SqlTypeExpGenerator.java} (63%) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java index ea96a2e55..d0e0ba73c 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java @@ -25,7 +25,6 @@ import io.cdap.cdap.etl.api.relational.InvalidRelation; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -40,11 +39,13 @@ import io.cdap.wrangler.api.parser.Identifier; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; /** * This class ChangeColCaseNames converts the case of the columns @@ -107,7 +108,8 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - java.util.Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + java.util.Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } @@ -118,9 +120,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, private List generateListCols(RelationalTranformContext relationalTranformContext) { List colnames = new ArrayList(); - java.util.Set s = relationalTranformContext.getInputRelationNames(); - for (String inp : s) { - Schema schema = relationalTranformContext.getInputSchema(inp); + Set inputRelationNames = relationalTranformContext.getInputRelationNames(); + for (String inputRelationName : inputRelationNames) { + Schema schema = relationalTranformContext.getInputSchema(inputRelationName); List fields = schema.getFields(); for (Schema.Field field: fields) { colnames.add(field.getName()); @@ -139,5 +141,4 @@ private Map generateColumnCaseMap(List columns, Expr return columnExpMap; } -} - +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java index 6ec847a76..edf38073b 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java @@ -40,8 +40,8 @@ import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; import io.cdap.wrangler.utils.ColumnConverter; -import io.cdap.wrangler.utils.ColumnTypeExpression; import io.cdap.wrangler.utils.SqlExpressionGenerator; +import io.cdap.wrangler.utils.SqlTypeExpGenerator; import java.math.RoundingMode; import java.util.List; @@ -124,9 +124,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, } String expression; if (scale == null) { - expression = ColumnTypeExpression.getColumnTypeExp(type, col); + expression = SqlTypeExpGenerator.getColumnTypeExp(type, col); } else { - expression = ColumnTypeExpression.getColumnTypeExp(type, col, scale); + expression = SqlTypeExpGenerator.getColumnTypeExp(type, col, scale); } return relation.setColumn(col, expressionFactory.get().compile(expression)); } diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/utils/ColumnTypeExpression.java b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java similarity index 63% rename from wrangler-core/src/main/java/io/cdap/wrangler/utils/ColumnTypeExpression.java rename to wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java index 7bd592186..2e22334e8 100644 --- a/wrangler-core/src/main/java/io/cdap/wrangler/utils/ColumnTypeExpression.java +++ b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java @@ -19,66 +19,67 @@ /** * Utility class that returns a string of SQL expression for the given data type. */ -public final class ColumnTypeExpression { +public final class SqlTypeExpGenerator { public static String getColumnTypeExp(String toType, String column, int scale) { toType = toType.toUpperCase(); String expression; + if (!toType.equals("DECIMAL")) { + return column; + } + expression = ("CAST(" + column + " AS DECIMAL(38," + scale + "))"); + return expression; + } + public static String getColumnTypeExp(String toType, String column) { + toType = toType.toUpperCase(); + String expression = ""; switch (toType) { case "INTEGER": case "I64": case "INT": { - expression = "CAST(" + column + " AS INT)"; + expression += "CAST(" + column + " AS INT)"; return expression; } case "I32": case "SHORT": { - expression = "CAST(" + column + " AS SMALLINT)"; + expression += "CAST(" + column + " AS SMALLINT)"; return expression; } case "LONG": { - expression = "CAST(" + column + " AS BIGINT)"; + expression += "CAST(" + column + " AS BIGINT)"; return expression; } case "BOOL": case "BOOLEAN": { - expression = "CAST(" + column + " AS BOOLEAN)"; + expression += "CAST(" + column + " AS BOOLEAN)"; return expression; } case "STRING": { - expression = "CAST(" + column + " AS STRING)"; + expression += "CAST(" + column + " AS STRING)"; return expression; } case "FLOAT": { - expression = "CAST(" + column + " AS FLOAT)"; - return expression; - } - - case "DECIMAL": { - expression = "CAST(" + column + " AS DECIMAL(38," + scale + "))"; + expression += "CAST(" + column + " AS FLOAT)"; return expression; } case "DOUBLE": { - expression = "CAST(" + column + " AS DOUBLE)"; + expression += "CAST(" + column + " AS DOUBLE)"; return expression; } case "BYTES": { - expression = "CAST(" + column + " AS TINYINT)"; + expression += "CAST(" + column + " AS TINYINT)"; return expression; } default: return column; -// throw new DirectiveExecutionException(String.format( -// "Column '%s' is of unsupported type '%s'. Supported types are: " + -// "int, short, long, double, decimal, boolean, string, bytes", column, toType)); } } -} +} \ No newline at end of file From 31e6f84a799b7256ce3dd3692841e41ebfb3012c Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Wed, 28 Jun 2023 06:41:07 +0000 Subject: [PATCH 21/50] Fix checkstyle errors --- .../java/io/cdap/wrangler/api/Directive.java | 2 +- .../directives/column/ChangeColCaseNames.java | 2 +- .../java/io/cdap/directives/column/Copy.java | 6 ++--- .../java/io/cdap/directives/column/Keep.java | 3 +-- .../java/io/cdap/directives/column/Merge.java | 2 +- .../io/cdap/directives/column/SetType.java | 2 +- .../directives/transformation/RightTrim.java | 2 +- .../directives/transformation/TitleCase.java | 2 +- .../cdap/directives/transformation/Trim.java | 2 +- .../cdap/directives/transformation/Upper.java | 2 +- .../utils/SqlExpressionGenerator.java | 26 ++++++++++++++++--- .../wrangler/utils/SqlTypeExpGenerator.java | 2 +- .../main/java/io/cdap/wrangler/Wrangler.java | 13 +++++----- 13 files changed, 41 insertions(+), 25 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java index 19f2aacc1..6585ddf13 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java @@ -138,4 +138,4 @@ default Relation transform(RelationalTranformContext relationalTranformContext, return relation; } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java index d0e0ba73c..e1c0056dd 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java @@ -141,4 +141,4 @@ private Map generateColumnCaseMap(List columns, Expr return columnExpMap; } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java index f2d0d296b..9f52f32ce 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java @@ -28,6 +28,7 @@ import io.cdap.wrangler.api.DirectiveExecutionException; import io.cdap.wrangler.api.DirectiveParseException; import io.cdap.wrangler.api.ExecutorContext; +import io.cdap.wrangler.api.Optional; import io.cdap.wrangler.api.Row; import io.cdap.wrangler.api.annotations.Categories; import io.cdap.wrangler.api.lineage.Lineage; @@ -38,7 +39,6 @@ import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; -import java.util.Optional; /** * A directive for copying value of one column to another. @@ -117,7 +117,7 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - Optional> expressionFactory = SqlExpressionGenerator + java.util.Optional> expressionFactory = SqlExpressionGenerator .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); @@ -125,4 +125,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(destination.value(), expressionFactory.get().compile(source.value())); } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java index c23d85c26..de2bbb123 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java @@ -44,7 +44,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.stream.Collectors; /** * This class Keep implements a directive that @@ -111,7 +110,7 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return new InvalidRelation("Cannot find an Expression Factory"); } Map keepCol = SqlExpressionGenerator - .generateColumnExpMap(keep.stream().collect(Collectors.toList()), expressionFactory.get()); + .generateColumnExpMap(keep, expressionFactory.get()); return relation.select(keepCol); } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java index 04d0a831a..fa1035846 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java @@ -125,4 +125,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, .compile("CONCAT(" + col1 + ",'" + delimiter + "'," + col2 + ")")); } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java index edf38073b..675d7a30d 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java @@ -131,4 +131,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(col, expressionFactory.get().compile(expression)); } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java index 6ef616bc5..2f1b4f7f7 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java @@ -105,4 +105,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("RTRIM(" + column + ")")); } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java index f58484796..bfb4437f8 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java @@ -105,4 +105,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("initcap(" + column + ")")); } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java index f4efa957e..dc0585407 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java @@ -104,4 +104,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("TRIM(" + column + ")")); } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java index 713dbf606..9bb68da1e 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java @@ -104,4 +104,4 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("UPPER(" + column + ")")); } -} \ No newline at end of file +} diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java index 0f636dc30..463bd386e 100644 --- a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java +++ b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java @@ -1,3 +1,19 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + package io.cdap.wrangler.utils; import io.cdap.cdap.etl.api.relational.Expression; @@ -5,21 +21,23 @@ import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; - +import java.util.Collection; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; import java.util.Optional; +/** + * Utility class that contains methods for sql expression generation. + */ public class SqlExpressionGenerator { public static Optional> getExpressionFactory(RelationalTranformContext ctx) { return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); } - public static Map generateColumnExpMap(List columns, ExpressionFactory factory) { + public static Map generateColumnExpMap(Collection columns, ExpressionFactory factory) { Map columnExpMap = new LinkedHashMap<>(); - columns.forEach((colName)-> columnExpMap.put(colName, factory.compile(colName))); + columns.forEach((colName)-> columnExpMap.put((String) colName, factory.compile(colName.toString()))); return columnExpMap; } } diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java index 2e22334e8..3b5374446 100644 --- a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java +++ b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java @@ -82,4 +82,4 @@ public static String getColumnTypeExp(String toType, String column) { return column; } } -} \ No newline at end of file +} diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 562ec7394..84fbd30e6 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -75,7 +75,9 @@ import io.cdap.wrangler.registry.DirectiveRegistry; import io.cdap.wrangler.registry.SystemDirectiveRegistry; import io.cdap.wrangler.registry.UserDirectiveRegistry; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import io.cdap.wrangler.utils.StructuredToRowTransformer; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -581,7 +583,8 @@ && checkPreconditionNotEmpty(true))) { throw new RuntimeException("SQL Precondition feature is not available"); } - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } @@ -600,7 +603,8 @@ && checkPreconditionNotEmpty(true))) { List directives = null; try { - GrammarBasedParser parser = new GrammarBasedParser("default", new MigrateToV2(recipe).migrate(), registry); + GrammarBasedParser parser = new GrammarBasedParser("default", + new MigrateToV2(recipe).migrate(), registry); directives = parser.parse(); } catch (DirectiveParseException e) { throw new RuntimeException(e); @@ -623,10 +627,6 @@ private List getColumnsOfDropSQL(String sql) { return cols; } - private Optional> getExpressionFactory(RelationalTranformContext ctx) { - return ctx.getEngine().getExpressionFactory(StringExpressionFactoryType.SQL); - } - /** * This method emits all metrics for the given list of directives * @@ -778,4 +778,3 @@ public String getUDDs() { } } } - From e21735864f1e20ffb7d2a0835b5fcf883b860085 Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Fri, 30 Jun 2023 13:21:44 +0000 Subject: [PATCH 22/50] Changes to set type util function --- .../java/io/cdap/directives/column/Merge.java | 2 +- .../io/cdap/directives/column/SetType.java | 8 +- .../utils/SqlExpressionGenerator.java | 65 ++++++++++++++ .../wrangler/utils/SqlTypeExpGenerator.java | 85 ------------------- 4 files changed, 67 insertions(+), 93 deletions(-) delete mode 100644 wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java index fa1035846..ce6257b5f 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java @@ -122,7 +122,7 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn(dest, expressionFactory.get() - .compile("CONCAT(" + col1 + ",'" + delimiter + "'," + col2 + ")")); + .compile(String.format("CONCAT(%s,'%s',%s)", col1, delimiter, col2))); } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java index 675d7a30d..9c4042b6d 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java @@ -41,7 +41,6 @@ import io.cdap.wrangler.api.parser.UsageDefinition; import io.cdap.wrangler.utils.ColumnConverter; import io.cdap.wrangler.utils.SqlExpressionGenerator; -import io.cdap.wrangler.utils.SqlTypeExpGenerator; import java.math.RoundingMode; import java.util.List; @@ -122,12 +121,7 @@ public Relation transform(RelationalTranformContext relationalTranformContext, if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } - String expression; - if (scale == null) { - expression = SqlTypeExpGenerator.getColumnTypeExp(type, col); - } else { - expression = SqlTypeExpGenerator.getColumnTypeExp(type, col, scale); - } + String expression = SqlExpressionGenerator.getColumnTypeExp(type, col, scale); return relation.setColumn(col, expressionFactory.get().compile(expression)); } diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java index 463bd386e..b249663bc 100644 --- a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java +++ b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlExpressionGenerator.java @@ -26,6 +26,8 @@ import java.util.Map; import java.util.Optional; +import javax.annotation.Nullable; + /** * Utility class that contains methods for sql expression generation. */ @@ -40,4 +42,67 @@ public static Map generateColumnExpMap(Collection columns, E columns.forEach((colName)-> columnExpMap.put((String) colName, factory.compile(colName.toString()))); return columnExpMap; } + + public static String getColumnTypeExp(String toType, String column, @Nullable Integer scale) { + toType = toType.toUpperCase(); + String expression; + switch (toType) { + case "INTEGER": + case "I64": + case "INT": { + expression = "CAST(" + column + " AS INT)"; + return expression; + } + + case "I32": + case "SHORT": { + expression = "CAST(" + column + " AS SMALLINT)"; + return expression; + } + + case "LONG": { + expression = "CAST(" + column + " AS BIGINT)"; + return expression; + } + + case "BOOL": + case "BOOLEAN": { + expression = "CAST(" + column + " AS BOOLEAN)"; + return expression; + } + + case "STRING": { + expression = "CAST(" + column + " AS STRING)"; + return expression; + } + + case "FLOAT": { + expression = "CAST(" + column + " AS FLOAT)"; + return expression; + } + + case "DOUBLE": { + expression = "CAST(" + column + " AS DOUBLE)"; + return expression; + } + + case "DECIMAL": { + if (scale != null) { + expression = String.format("CAST(%s AS DECIMAL(38,%d))", column, scale); + return expression; + } else { + expression = String.format("CAST(%s AS DECIMAL)", column); + } + return expression; + } + + case "BYTES": { + expression = "CAST(" + column + " AS TINYINT)"; + return expression; + } + + default: + return column; + } + } } diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java b/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java deleted file mode 100644 index 3b5374446..000000000 --- a/wrangler-core/src/main/java/io/cdap/wrangler/utils/SqlTypeExpGenerator.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright © 2017-2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package io.cdap.wrangler.utils; - -/** - * Utility class that returns a string of SQL expression for the given data type. - */ -public final class SqlTypeExpGenerator { - - public static String getColumnTypeExp(String toType, String column, int scale) { - toType = toType.toUpperCase(); - String expression; - if (!toType.equals("DECIMAL")) { - return column; - } - expression = ("CAST(" + column + " AS DECIMAL(38," + scale + "))"); - return expression; - } - public static String getColumnTypeExp(String toType, String column) { - toType = toType.toUpperCase(); - String expression = ""; - switch (toType) { - case "INTEGER": - case "I64": - case "INT": { - expression += "CAST(" + column + " AS INT)"; - return expression; - } - - case "I32": - case "SHORT": { - expression += "CAST(" + column + " AS SMALLINT)"; - return expression; - } - - case "LONG": { - expression += "CAST(" + column + " AS BIGINT)"; - return expression; - } - - case "BOOL": - case "BOOLEAN": { - expression += "CAST(" + column + " AS BOOLEAN)"; - return expression; - } - - case "STRING": { - expression += "CAST(" + column + " AS STRING)"; - return expression; - } - - case "FLOAT": { - expression += "CAST(" + column + " AS FLOAT)"; - return expression; - } - - case "DOUBLE": { - expression += "CAST(" + column + " AS DOUBLE)"; - return expression; - } - - case "BYTES": { - expression += "CAST(" + column + " AS TINYINT)"; - return expression; - } - - default: - return column; - } - } -} From c5165f16b34d9057b76f57fa1edb67df03bb1c9c Mon Sep 17 00:00:00 2001 From: Shruti Verma Date: Tue, 4 Jul 2023 05:16:09 +0000 Subject: [PATCH 23/50] Fix rename directive implementation --- .../src/main/java/io/cdap/directives/column/Rename.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java index 78f67a3b9..cc52be488 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java @@ -96,7 +96,8 @@ public Relation transform(RelationalTranformContext relationalTranformContext, if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } - return relation.setColumn(target.value(), expressionFactory.get().compile(source.value())); + relation = relation.setColumn(target.value(), expressionFactory.get().compile(source.value())); + return relation.dropColumn(source.value()); } } From 7cb8aa3d87e249cff2a49024fbb186bc90a21461 Mon Sep 17 00:00:00 2001 From: shrverma Date: Tue, 11 Jul 2023 12:31:16 +0000 Subject: [PATCH 24/50] Add UI toggle to wrangler --- .../main/java/io/cdap/wrangler/Wrangler.java | 56 ++++++++++++------- .../widgets/Wrangler-transform.json | 45 +++++++++++++-- 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 562ec7394..748ed244a 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -44,7 +44,6 @@ import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.cdap.features.Feature; import io.cdap.directives.aggregates.DefaultTransientStore; -import io.cdap.directives.column.Drop; import io.cdap.wrangler.api.CompileException; import io.cdap.wrangler.api.CompileStatus; import io.cdap.wrangler.api.Compiler; @@ -58,7 +57,6 @@ import io.cdap.wrangler.api.RecipeParser; import io.cdap.wrangler.api.RecipePipeline; import io.cdap.wrangler.api.RecipeSymbol; -import io.cdap.wrangler.api.RelationalDirective; import io.cdap.wrangler.api.Row; import io.cdap.wrangler.api.TokenGroup; import io.cdap.wrangler.api.TransientStore; @@ -124,8 +122,8 @@ public class Wrangler extends Transform impl public static final String DIRECTIVE_ENTITY_TYPE = "directive"; // Precondition languages - private static final String PRECONDITION_LANGUAGE_JEXL = "jexl"; - private static final String PRECONDITION_LANGUAGE_SQL = "sql"; + private static final String LANGUAGE_JEXL = "jexl"; + private static final String LANGUAGE_SQL = "sql"; // Plugin configuration. private final Config config; @@ -187,9 +185,13 @@ public void configurePipeline(PipelineConfigurer configurer) { directives = String.format("#pragma load-directives %s;%s", config.getUDDs(), config.getDirectives()); } } - + if (LANGUAGE_SQL.equalsIgnoreCase(config.getExecutionLanguage()) + && Strings.isNullOrEmpty(config.preconditionLanguage)) { + config.preconditionLanguage = LANGUAGE_SQL; + } if (!config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (PRECONDITION_LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage())) { + if (LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage()) + || LANGUAGE_SQL.equalsIgnoreCase(config.getExecutionLanguage())) { if (!config.containsMacro(Config.NAME_PRECONDITION_SQL)) { validatePrecondition(config.getPreconditionSQL(), true, collector); } @@ -263,8 +265,9 @@ public void configurePipeline(PipelineConfigurer configurer) { // Check if jexl pre-condition is not null or empty and if so compile expression. if (!config.containsMacro(Config.NAME_PRECONDITION) && !config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (PRECONDITION_LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) - && checkPreconditionNotEmpty(false)) { + if (LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) + && checkPreconditionNotEmpty(false) + && LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage())) { try { new Precondition(config.getPreconditionJEXL()); } catch (PreconditionException e) { @@ -356,7 +359,7 @@ public void initialize(TransformContext context) throws Exception { // Check if jexl pre-condition is not null or empty and if so compile expression. if (!config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (PRECONDITION_LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) + if (LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage()) && checkPreconditionNotEmpty(false)) { try { condition = new Precondition(config.getPreconditionJEXL()); @@ -414,7 +417,7 @@ public void transform(StructuredRecord input, Emitter emitter) } // If pre-condition is set, then evaluate the precondition - if (PRECONDITION_LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) + if (LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage()) && checkPreconditionNotEmpty(false)) { boolean skip = condition.apply(row); if (skip) { @@ -572,13 +575,14 @@ private RecipeParser getRecipeParser(StageContext context) @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - if (!(PRECONDITION_LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage()) - && checkPreconditionNotEmpty(true))) { - return new InvalidRelation("Plugin is not configured for relational transformation"); + if (LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) + && LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage())) { + return new InvalidRelation("Plugin is not configured for relational transformation"); } - if (!Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext)) { - throw new RuntimeException("SQL Precondition feature is not available"); + if (!(Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext) + || Feature.WRANGLER_EXECUTION_SQL.isEnabled(relationalTranformContext))) { + throw new RuntimeException("SQL execution feature is not available"); } Optional> expressionFactory = getExpressionFactory(relationalTranformContext); @@ -675,6 +679,7 @@ public static class Config extends PluginConfig { static final String NAME_PRECONDITION = "precondition"; static final String NAME_PRECONDITION_SQL = "preconditionSQL"; static final String NAME_PRECONDITION_LANGUAGE = "expressionLanguage"; + static final String NAME_EXECUTION_LANGUAGE = "executionLanguage"; static final String NAME_FIELD = "field"; static final String NAME_DIRECTIVES = "directives"; static final String NAME_RELATIONAL_DIRECTIVES = "relationalDirectives"; @@ -688,6 +693,12 @@ public static class Config extends PluginConfig { @Nullable private String preconditionLanguage; + @Name(NAME_EXECUTION_LANGUAGE) + @Description("Toggle to configure execution language between JEXL and SQL") + @Macro + @Nullable + private String executionLanguage; + @Name(NAME_PRECONDITION) @Description("JEXL Precondition expression specifying filtering before applying directives (true to filter)") @Macro @@ -730,9 +741,10 @@ public static class Config extends PluginConfig { @Nullable private final String onError; - public Config(String preconditionLanguage, String precondition, String directives, String udds, - String field, String schema, String onError, String relationalDirectives) { + public Config(String preconditionLanguage, String executionLanguage, String precondition, String directives, + String udds, String field, String schema, String onError, String relationalDirectives) { this.preconditionLanguage = preconditionLanguage; + this.executionLanguage = executionLanguage; this.precondition = precondition; this.directives = directives; this.udds = udds; @@ -752,10 +764,17 @@ public String getOnError() { public String getPreconditionLanguage() { if (Strings.isNullOrEmpty(preconditionLanguage)) { // due to backward compatibility... - return PRECONDITION_LANGUAGE_JEXL; + return LANGUAGE_JEXL; } return preconditionLanguage; } + public String getExecutionLanguage() { + if (Strings.isNullOrEmpty(executionLanguage)) { + // due to backward compatibility... + return LANGUAGE_JEXL; + } + return executionLanguage; + } public String getPreconditionJEXL() { return precondition; @@ -778,4 +797,3 @@ public String getUDDs() { } } } - diff --git a/wrangler-transform/widgets/Wrangler-transform.json b/wrangler-transform/widgets/Wrangler-transform.json index 51ef2b9ef..b3291289a 100644 --- a/wrangler-transform/widgets/Wrangler-transform.json +++ b/wrangler-transform/widgets/Wrangler-transform.json @@ -37,6 +37,25 @@ ] } }, + { + "widget-type": "radio-group", + "name": "executionLanguage", + "label": "Execution Language", + "widget-attributes": { + "layout": "inline", + "default": "jexl", + "options": [ + { + "id": "jexl", + "label": "JEXL" + }, + { + "id": "sql", + "label": "SQL" + } + ] + } + }, { "widget-type": "textbox", "label": "Precondition (JEXL)", @@ -50,7 +69,7 @@ "label": "Precondition (SQL)", "name": "preconditionSQL", "widget-attributes" : { - "default" : "false" + "default" : "true" } } ] @@ -126,7 +145,7 @@ { "name": "PreconditionValueNotSQL", "condition": { - "expression": "expressionLanguage != 'sql'" + "expression": "expressionLanguage == 'jexl' && executionLanguage == 'jexl'" }, "show": [ { @@ -138,7 +157,7 @@ { "name": "preconditionValueSQL", "condition": { - "expression": "expressionLanguage == 'sql'" + "expression": "expressionLanguage == 'sql' || executionLanguage == 'sql'" }, "show": [ { @@ -150,9 +169,27 @@ { "name": "preconditionSQLEnabled", "condition": { - "expression": "featureFlags['wrangler.precondition.sql.enabled'] == true" + "expression": "featureFlags['wrangler.precondition.sql.enabled'] == true && featureFlags['wrangler.execution.sql.enabled'] == false" + }, + "show": [ + { + "type": "properties", + "name": "expressionLanguage" + } + ] + }, + { + "name": "preconditionSQLEnabled", + "condition": { + "expression": "featureFlags['wrangler.execution.sql.enabled'] == true" }, "show": [ + { + "type": "properties", + "name": "executionLanguage" + } + ], + "hide": [ { "type": "properties", "name": "expressionLanguage" From 1adbb2df10eeb3a7f6d25ba81fab16cbe5d7e451 Mon Sep 17 00:00:00 2001 From: shrverma Date: Tue, 11 Jul 2023 12:33:55 +0000 Subject: [PATCH 25/50] Fix checkstyle error --- wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 748ed244a..994812080 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -577,7 +577,7 @@ private RecipeParser getRecipeParser(StageContext context) public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { if (LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) && LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage())) { - return new InvalidRelation("Plugin is not configured for relational transformation"); + return new InvalidRelation("Plugin is not configured for relational transformation"); } if (!(Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext) From 2a1c2b79c7bfb1be60c43737fb8124bcaae75396 Mon Sep 17 00:00:00 2001 From: shrverma Date: Wed, 12 Jul 2023 12:34:59 +0000 Subject: [PATCH 26/50] Implement swap directive --- .../java/io/cdap/directives/column/Swap.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java b/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java index b35da3283..55ea1b27f 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -32,8 +36,10 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; +import java.util.Optional; /** * A directive for swapping the column names. @@ -93,4 +99,16 @@ public Mutation lineage() { .relation(Many.of(left, right), Many.of(right, left)) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + Relation tempRel = relation.setColumn(left, expressionFactory.get().compile(right)); + return tempRel.setColumn(right, expressionFactory.get().compile(left)); + } } From 19518e65f69dbb4fac968797e233b85b57df7aaf Mon Sep 17 00:00:00 2001 From: shrverma Date: Fri, 14 Jul 2023 05:27:40 +0000 Subject: [PATCH 27/50] Refactor execution logic --- .../main/java/io/cdap/wrangler/Wrangler.java | 69 ++++++++++--------- .../widgets/Wrangler-transform.json | 2 +- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 994812080..61596f9ee 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -149,6 +149,8 @@ public class Wrangler extends Transform impl // on error strategy private String onErrorStrategy; + private boolean isSqlExecutionEnabled; + // This is used only for tests, otherwise this is being injected by the ingestion framework. public Wrangler(Config config) { this.config = config; @@ -185,17 +187,15 @@ public void configurePipeline(PipelineConfigurer configurer) { directives = String.format("#pragma load-directives %s;%s", config.getUDDs(), config.getDirectives()); } } - if (LANGUAGE_SQL.equalsIgnoreCase(config.getExecutionLanguage()) - && Strings.isNullOrEmpty(config.preconditionLanguage)) { - config.preconditionLanguage = LANGUAGE_SQL; - } + + isSqlExecutionEnabled = checkSQLExecution(config); + if (!config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage()) - || LANGUAGE_SQL.equalsIgnoreCase(config.getExecutionLanguage())) { + if (isSqlExecutionEnabled) { if (!config.containsMacro(Config.NAME_PRECONDITION_SQL)) { validatePrecondition(config.getPreconditionSQL(), true, collector); } - //validateSQLModeDirectives(collector); + validateSQLModeDirectives(collector); } else { if (!config.containsMacro(Config.NAME_PRECONDITION)) { validatePrecondition(config.getPreconditionJEXL(), false, collector); @@ -265,9 +265,7 @@ public void configurePipeline(PipelineConfigurer configurer) { // Check if jexl pre-condition is not null or empty and if so compile expression. if (!config.containsMacro(Config.NAME_PRECONDITION) && !config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) - && checkPreconditionNotEmpty(false) - && LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage())) { + if (!isSqlExecutionEnabled && checkPreconditionNotEmpty(false)) { try { new Precondition(config.getPreconditionJEXL()); } catch (PreconditionException e) { @@ -359,8 +357,7 @@ public void initialize(TransformContext context) throws Exception { // Check if jexl pre-condition is not null or empty and if so compile expression. if (!config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage()) - && checkPreconditionNotEmpty(false)) { + if (!isSqlExecutionEnabled && checkPreconditionNotEmpty(false)) { try { condition = new Precondition(config.getPreconditionJEXL()); } catch (PreconditionException e) { @@ -417,8 +414,7 @@ public void transform(StructuredRecord input, Emitter emitter) } // If pre-condition is set, then evaluate the precondition - if (LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage()) - && checkPreconditionNotEmpty(false)) { + if (!isSqlExecutionEnabled && checkPreconditionNotEmpty(false)) { boolean skip = condition.apply(row); if (skip) { getContext().getMetrics().count("precondition.filtered", 1); @@ -527,11 +523,6 @@ private void validatePrecondition(String precondition, Boolean isConditionSQL, F } private void validateSQLModeDirectives(FailureCollector collector) { - if (!Strings.isNullOrEmpty(config.getDirectives())) { - collector.addFailure("Directives are not supported for precondition of type SQL", null) - .withConfigProperty(Config.NAME_DIRECTIVES); - } - if (!Strings.isNullOrEmpty(config.getUDDs())) { collector.addFailure("UDDs are not supported for precondition of type SQL", null) .withConfigProperty(Config.NAME_UDD); @@ -550,6 +541,27 @@ private boolean checkPreconditionNotEmpty(Boolean isConditionSQL) { return false; } + private boolean checkSQLExecution(Config config) { + if (!(Feature.WRANGLER_PRECONDITION_SQL.isEnabled(getContext()) + || Feature.WRANGLER_EXECUTION_SQL.isEnabled(getContext()))) { + // disabling SQL execution for precondition and directives + return false; + } + + if (!Strings.isNullOrEmpty(config.getPreconditionLanguage())) { + if (LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage())) { + return true; + } + } + + if (!Strings.isNullOrEmpty(config.getExecutionLanguage())) { + return LANGUAGE_SQL.equalsIgnoreCase(config.getExecutionLanguage()) ? true : false; + } + + // for backwards compatibility + return false; + } + /** * This method creates a {@link CompositeDirectiveRegistry} and initializes the {@link RecipeParser} * with {@link NoOpDirectiveContext} @@ -575,15 +587,14 @@ private RecipeParser getRecipeParser(StageContext context) @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - if (LANGUAGE_JEXL.equalsIgnoreCase(config.getPreconditionLanguage()) - && LANGUAGE_JEXL.equalsIgnoreCase(config.getExecutionLanguage())) { + if (!isSqlExecutionEnabled) { return new InvalidRelation("Plugin is not configured for relational transformation"); } - if (!(Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext) - || Feature.WRANGLER_EXECUTION_SQL.isEnabled(relationalTranformContext))) { - throw new RuntimeException("SQL execution feature is not available"); - } +// if (!(Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext) +// || Feature.WRANGLER_EXECUTION_SQL.isEnabled(relationalTranformContext))) { +// throw new RuntimeException("SQL execution feature is not available"); +// } Optional> expressionFactory = getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { @@ -762,17 +773,9 @@ public String getOnError() { } public String getPreconditionLanguage() { - if (Strings.isNullOrEmpty(preconditionLanguage)) { - // due to backward compatibility... - return LANGUAGE_JEXL; - } return preconditionLanguage; } public String getExecutionLanguage() { - if (Strings.isNullOrEmpty(executionLanguage)) { - // due to backward compatibility... - return LANGUAGE_JEXL; - } return executionLanguage; } diff --git a/wrangler-transform/widgets/Wrangler-transform.json b/wrangler-transform/widgets/Wrangler-transform.json index b3291289a..1c53ac52a 100644 --- a/wrangler-transform/widgets/Wrangler-transform.json +++ b/wrangler-transform/widgets/Wrangler-transform.json @@ -179,7 +179,7 @@ ] }, { - "name": "preconditionSQLEnabled", + "name": "executionSQLEnabled", "condition": { "expression": "featureFlags['wrangler.execution.sql.enabled'] == true" }, From 35fbf9349106af5272c65b0bbc412fcbf8b22f2d Mon Sep 17 00:00:00 2001 From: shrverma Date: Fri, 14 Jul 2023 07:00:57 +0000 Subject: [PATCH 28/50] Implement filter directives --- .../directives/row/RecordConditionFilter.java | 16 ++++++++++++++++ .../row/RecordMissingOrNullFilter.java | 17 +++++++++++++++++ .../cdap/directives/row/RecordRegexFilter.java | 17 +++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java b/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java index b3eb4adb2..c327e55f9 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java +++ b/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java @@ -20,6 +20,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -38,6 +42,7 @@ import io.cdap.wrangler.expression.EL; import io.cdap.wrangler.expression.ELContext; import io.cdap.wrangler.expression.ELException; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.List; @@ -129,4 +134,15 @@ public List getCountMetrics() { EntityCountMetric jexlCategoryMetric = getJexlCategoryMetric(el.getScriptParsedText()); return (jexlCategoryMetric == null) ? null : ImmutableList.of(jexlCategoryMetric); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.filter(expressionFactory.get().compile(el.getScriptParsedText())); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java b/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java index e6a6c8d8d..c4bb509ea 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java +++ b/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -31,10 +35,12 @@ import io.cdap.wrangler.api.parser.ColumnNameList; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Optional; /** * Filters records if they don't have all the columns specified or they have null values or combination. @@ -97,4 +103,15 @@ public Mutation lineage() { cols.forEach(column -> builder.relation(column, column)); return builder.build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.filter(expressionFactory.get().compile("nvl(" + columns[0] + ", false)")); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/row/RecordRegexFilter.java b/wrangler-core/src/main/java/io/cdap/directives/row/RecordRegexFilter.java index 5e7a6d7de..916575a1a 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/row/RecordRegexFilter.java +++ b/wrangler-core/src/main/java/io/cdap/directives/row/RecordRegexFilter.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -33,10 +37,12 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import org.json.JSONObject; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.regex.Pattern; /** @@ -147,5 +153,16 @@ private boolean matchPattern(String value) { } return matches; } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.filter(expressionFactory.get().compile("rlike(" + column + ", '" + pattern + "')")); + } } From 0158b38774e9d74a20280409ef52cee8d07119be Mon Sep 17 00:00:00 2001 From: shrverma Date: Fri, 14 Jul 2023 15:08:12 +0000 Subject: [PATCH 29/50] Change UI toggle --- .../main/java/io/cdap/wrangler/Wrangler.java | 43 +++++++++---------- .../widgets/Wrangler-transform.json | 26 +++++------ 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 61596f9ee..009d5aa69 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -122,8 +122,11 @@ public class Wrangler extends Transform impl public static final String DIRECTIVE_ENTITY_TYPE = "directive"; // Precondition languages - private static final String LANGUAGE_JEXL = "jexl"; - private static final String LANGUAGE_SQL = "sql"; + private static final String JEXL = "jexl"; + private static final String SQL = "sql"; + + // Sql execution value + private static final String SQL_ENABLED = "yes"; // Plugin configuration. private final Config config; @@ -149,8 +152,6 @@ public class Wrangler extends Transform impl // on error strategy private String onErrorStrategy; - private boolean isSqlExecutionEnabled; - // This is used only for tests, otherwise this is being injected by the ingestion framework. public Wrangler(Config config) { this.config = config; @@ -188,10 +189,8 @@ public void configurePipeline(PipelineConfigurer configurer) { } } - isSqlExecutionEnabled = checkSQLExecution(config); - if (!config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (isSqlExecutionEnabled) { + if (checkSQLExecution(config)) { if (!config.containsMacro(Config.NAME_PRECONDITION_SQL)) { validatePrecondition(config.getPreconditionSQL(), true, collector); } @@ -265,7 +264,7 @@ public void configurePipeline(PipelineConfigurer configurer) { // Check if jexl pre-condition is not null or empty and if so compile expression. if (!config.containsMacro(Config.NAME_PRECONDITION) && !config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (!isSqlExecutionEnabled && checkPreconditionNotEmpty(false)) { + if (!checkSQLExecution(config) && checkPreconditionNotEmpty(false)) { try { new Precondition(config.getPreconditionJEXL()); } catch (PreconditionException e) { @@ -357,7 +356,7 @@ public void initialize(TransformContext context) throws Exception { // Check if jexl pre-condition is not null or empty and if so compile expression. if (!config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (!isSqlExecutionEnabled && checkPreconditionNotEmpty(false)) { + if (!checkSQLExecution(config) && checkPreconditionNotEmpty(false)) { try { condition = new Precondition(config.getPreconditionJEXL()); } catch (PreconditionException e) { @@ -414,7 +413,7 @@ public void transform(StructuredRecord input, Emitter emitter) } // If pre-condition is set, then evaluate the precondition - if (!isSqlExecutionEnabled && checkPreconditionNotEmpty(false)) { + if (!checkSQLExecution(config) && checkPreconditionNotEmpty(false)) { boolean skip = condition.apply(row); if (skip) { getContext().getMetrics().count("precondition.filtered", 1); @@ -549,13 +548,11 @@ private boolean checkSQLExecution(Config config) { } if (!Strings.isNullOrEmpty(config.getPreconditionLanguage())) { - if (LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage())) { - return true; - } + return SQL.equalsIgnoreCase(config.getPreconditionLanguage()); } - if (!Strings.isNullOrEmpty(config.getExecutionLanguage())) { - return LANGUAGE_SQL.equalsIgnoreCase(config.getExecutionLanguage()) ? true : false; + if (!Strings.isNullOrEmpty(config.getSqlExecution())) { + return SQL_ENABLED.equalsIgnoreCase(config.getSqlExecution()); } // for backwards compatibility @@ -587,7 +584,7 @@ private RecipeParser getRecipeParser(StageContext context) @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { - if (!isSqlExecutionEnabled) { + if (!checkSQLExecution(config)) { return new InvalidRelation("Plugin is not configured for relational transformation"); } @@ -690,7 +687,7 @@ public static class Config extends PluginConfig { static final String NAME_PRECONDITION = "precondition"; static final String NAME_PRECONDITION_SQL = "preconditionSQL"; static final String NAME_PRECONDITION_LANGUAGE = "expressionLanguage"; - static final String NAME_EXECUTION_LANGUAGE = "executionLanguage"; + static final String NAME_SQL_EXECUTION = "sqlExecution"; static final String NAME_FIELD = "field"; static final String NAME_DIRECTIVES = "directives"; static final String NAME_RELATIONAL_DIRECTIVES = "relationalDirectives"; @@ -704,11 +701,11 @@ public static class Config extends PluginConfig { @Nullable private String preconditionLanguage; - @Name(NAME_EXECUTION_LANGUAGE) + @Name(NAME_SQL_EXECUTION) @Description("Toggle to configure execution language between JEXL and SQL") @Macro @Nullable - private String executionLanguage; + private String sqlExecution; @Name(NAME_PRECONDITION) @Description("JEXL Precondition expression specifying filtering before applying directives (true to filter)") @@ -752,10 +749,10 @@ public static class Config extends PluginConfig { @Nullable private final String onError; - public Config(String preconditionLanguage, String executionLanguage, String precondition, String directives, + public Config(String preconditionLanguage, String sqlExecution, String precondition, String directives, String udds, String field, String schema, String onError, String relationalDirectives) { this.preconditionLanguage = preconditionLanguage; - this.executionLanguage = executionLanguage; + this.sqlExecution = sqlExecution; this.precondition = precondition; this.directives = directives; this.udds = udds; @@ -775,8 +772,8 @@ public String getOnError() { public String getPreconditionLanguage() { return preconditionLanguage; } - public String getExecutionLanguage() { - return executionLanguage; + public String getSqlExecution() { + return sqlExecution; } public String getPreconditionJEXL() { diff --git a/wrangler-transform/widgets/Wrangler-transform.json b/wrangler-transform/widgets/Wrangler-transform.json index 1c53ac52a..6a7738215 100644 --- a/wrangler-transform/widgets/Wrangler-transform.json +++ b/wrangler-transform/widgets/Wrangler-transform.json @@ -39,19 +39,19 @@ }, { "widget-type": "radio-group", - "name": "executionLanguage", - "label": "Execution Language", + "name": "sqlExecution", + "label": "Enable SQL Execution", "widget-attributes": { "layout": "inline", - "default": "jexl", + "default": "no", "options": [ { - "id": "jexl", - "label": "JEXL" + "id": "yes", + "label": "Yes" }, { - "id": "sql", - "label": "SQL" + "id": "no", + "label": "No" } ] } @@ -145,7 +145,7 @@ { "name": "PreconditionValueNotSQL", "condition": { - "expression": "expressionLanguage == 'jexl' && executionLanguage == 'jexl'" + "expression": "expressionLanguage == 'jexl' && sqlExecution == 'no'" }, "show": [ { @@ -157,7 +157,7 @@ { "name": "preconditionValueSQL", "condition": { - "expression": "expressionLanguage == 'sql' || executionLanguage == 'sql'" + "expression": "expressionLanguage == 'sql' || sqlExecution == 'yes'" }, "show": [ { @@ -186,13 +186,7 @@ "show": [ { "type": "properties", - "name": "executionLanguage" - } - ], - "hide": [ - { - "type": "properties", - "name": "expressionLanguage" + "name": "sqlExecution" } ] } From cb5eb7f36c7cf98d1d50d9acb2b2872f5a721bda Mon Sep 17 00:00:00 2001 From: shrverma Date: Fri, 14 Jul 2023 15:11:52 +0000 Subject: [PATCH 30/50] Move feature flag checks to separate function --- .../src/main/java/io/cdap/wrangler/Wrangler.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 009d5aa69..e186c2bad 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -588,11 +588,6 @@ public Relation transform(RelationalTranformContext relationalTranformContext, R return new InvalidRelation("Plugin is not configured for relational transformation"); } -// if (!(Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext) -// || Feature.WRANGLER_EXECUTION_SQL.isEnabled(relationalTranformContext))) { -// throw new RuntimeException("SQL execution feature is not available"); -// } - Optional> expressionFactory = getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); From 8f926a59baa874cd9e5d96547cf99ce55a3f0064 Mon Sep 17 00:00:00 2001 From: shrverma Date: Sat, 15 Jul 2023 09:49:40 +0000 Subject: [PATCH 31/50] Implement SetRecordDelimiter directive --- .../directives/row/SetRecordDelimiter.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/row/SetRecordDelimiter.java b/wrangler-core/src/main/java/io/cdap/directives/row/SetRecordDelimiter.java index 5b4f57f58..cbdf290b2 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/row/SetRecordDelimiter.java +++ b/wrangler-core/src/main/java/io/cdap/directives/row/SetRecordDelimiter.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -35,9 +40,13 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; /** * A directive for parsing a string into record using the record delimiter. @@ -112,4 +121,18 @@ public Mutation lineage() { .relation(column, column) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + Map columnExpMap = new LinkedHashMap<>(); + columnExpMap.put(column, expressionFactory.get().compile( + String.format("explode(split(%s, \"%s\", %d))", column, delimiter, limit))); + return relation.select(columnExpMap); + } } From 68785b5daa37497f2a7a3615edb03af93f1870df Mon Sep 17 00:00:00 2001 From: shrverma Date: Sat, 15 Jul 2023 12:15:14 +0000 Subject: [PATCH 32/50] Implement split-email directive --- .../directives/transformation/SplitEmail.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java index 60a359f05..d7976cc42 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java @@ -19,6 +19,11 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.Expression; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -33,8 +38,10 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; +import java.util.Optional; /** * A directive to split email address into account and domain. @@ -128,4 +135,24 @@ private Pair extractDomainAndAccount(String emailId) { return new Pair<>(emailId.substring(0, lastidx), emailId.substring(lastidx + 1)); } } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + + Relation accountRelation = relation.setColumn(generatedAccountCol, getExpression(expressionFactory)); + return accountRelation.setColumn(generatedDomainCol, + expressionFactory.get().compile(String.format("substring_index(%s, '@', -1)", column))); + } + + Expression getExpression(Optional> expfactory) { + return expfactory.get().compile(String + .format("substring(%s, 1, char_length(%s) - locate('@', reverse(%s)))", column, column, column)); + } } From 6404c8d7f91ef45d0b948bbacf9d92992021d360 Mon Sep 17 00:00:00 2001 From: shrverma Date: Mon, 17 Jul 2023 11:17:32 +0000 Subject: [PATCH 33/50] Implement transformation directives --- .../cdap/directives/column/CreateRecord.java | 21 +++++++++++++++++++ .../directives/column/SplitToColumns.java | 20 ++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java b/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java index 2654f8730..5d0e6dd40 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -32,6 +36,7 @@ import io.cdap.wrangler.api.parser.ColumnNameList; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.Arrays; @@ -101,4 +106,20 @@ public Mutation lineage() { .relation(Many.columns(columns), targetColumn) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(targetColumn, expressionFactory.get().compile(String + .format("struct(%s)", getColumnString(columns)))); + } + + public String getColumnString(String[] columns) { + return String.join(",", columns); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java b/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java index 6ade24fdb..fa8bf695d 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -33,9 +37,11 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.List; +import java.util.Optional; /** * A directive for splitting the columns into multiple columns. @@ -123,5 +129,19 @@ public Mutation lineage() { String.format("%s_%d", column, 10))) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + + return relation.setColumn(column, expressionFactory.get() + .compile(String.format("explode(split(%s, '%s'))", column, regex))); + } } From ca890344adcc3b766fea562b64af40ed4dc5e292 Mon Sep 17 00:00:00 2001 From: shrverma Date: Sat, 22 Jul 2023 08:34:06 +0000 Subject: [PATCH 34/50] Refactor code --- .../directives/column/SplitToColumns.java | 20 ------------------- .../java/io/cdap/directives/column/Swap.java | 7 +++++-- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java b/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java index fa8bf695d..6ade24fdb 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SplitToColumns.java @@ -19,10 +19,6 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.relational.ExpressionFactory; -import io.cdap.cdap.etl.api.relational.InvalidRelation; -import io.cdap.cdap.etl.api.relational.Relation; -import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -37,11 +33,9 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; -import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.List; -import java.util.Optional; /** * A directive for splitting the columns into multiple columns. @@ -129,19 +123,5 @@ public Mutation lineage() { String.format("%s_%d", column, 10))) .build(); } - - @Override - public Relation transform(RelationalTranformContext relationalTranformContext, - Relation relation) { - - Optional> expressionFactory = SqlExpressionGenerator - .getExpressionFactory(relationalTranformContext); - if (!expressionFactory.isPresent()) { - return new InvalidRelation("Cannot find an Expression Factory"); - } - - return relation.setColumn(column, expressionFactory.get() - .compile(String.format("explode(split(%s, '%s'))", column, regex))); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java b/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java index 55ea1b27f..62a90119c 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Swap.java @@ -103,12 +103,15 @@ public Mutation lineage() { @Override public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator .getExpressionFactory(relationalTranformContext); if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } - Relation tempRel = relation.setColumn(left, expressionFactory.get().compile(right)); - return tempRel.setColumn(right, expressionFactory.get().compile(left)); + + Relation tempRel = relation.setColumn("tempColumn", expressionFactory.get().compile(right)); + tempRel = tempRel.setColumn(right, expressionFactory.get().compile(left)); + return tempRel.setColumn(left, expressionFactory.get().compile("tempColumn")); } } From 09aa10e0325150e22a7c26ff925a7d4a21c7415b Mon Sep 17 00:00:00 2001 From: shrverma Date: Sat, 22 Jul 2023 08:44:59 +0000 Subject: [PATCH 35/50] Implement UUID, split-rows and JSON-object directives --- .../io/cdap/directives/row/SplitToRows.java | 20 ++++++++++++++++++ .../transformation/GenerateUUID.java | 20 ++++++++++++++++++ .../directives/writer/WriteAsJsonObject.java | 21 +++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/row/SplitToRows.java b/wrangler-core/src/main/java/io/cdap/directives/row/SplitToRows.java index 8848ed2a9..75645dc4a 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/row/SplitToRows.java +++ b/wrangler-core/src/main/java/io/cdap/directives/row/SplitToRows.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -33,9 +37,11 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.List; +import java.util.Optional; /** * A directive for splitting the string into multiple {@link Row}s. @@ -109,5 +115,19 @@ public Mutation lineage() { .relation(Many.columns(column), Many.columns(column)) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + + return relation.setColumn(column, expressionFactory.get() + .compile(String.format("explode(split(%s, '%s'))", column, regex))); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/GenerateUUID.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/GenerateUUID.java index 46656f5a5..808b6f300 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/GenerateUUID.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/GenerateUUID.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -31,8 +35,10 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; +import java.util.Optional; import java.util.Random; import java.util.UUID; @@ -87,4 +93,18 @@ public Mutation lineage() { .relation(column, column) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + + return relation.setColumn(column, expressionFactory.get() + .compile(String.format("uuid()"))); + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java b/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java index b25cd8a65..3d69fd55e 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java +++ b/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java @@ -23,6 +23,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -38,6 +42,7 @@ import io.cdap.wrangler.api.parser.ColumnNameList; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.List; @@ -115,4 +120,20 @@ public Mutation lineage() { columns.forEach(column -> builder.relation(column, column)); return builder.build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn( + column, expressionFactory.get().compile(String.format("struct(%s)", getJSONColumns(columns)))); + } + + String getJSONColumns(List columnList) { + return String.join(",", columnList); + } } From 278a30eb654b8b3d194a373ab9db12ef40c45539 Mon Sep 17 00:00:00 2001 From: shrverma Date: Sat, 22 Jul 2023 09:00:40 +0000 Subject: [PATCH 36/50] Remove row filter directive implementation --- .../directives/row/RecordConditionFilter.java | 16 ---------------- .../row/RecordMissingOrNullFilter.java | 17 ----------------- 2 files changed, 33 deletions(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java b/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java index c327e55f9..b3eb4adb2 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java +++ b/wrangler-core/src/main/java/io/cdap/directives/row/RecordConditionFilter.java @@ -20,10 +20,6 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.relational.ExpressionFactory; -import io.cdap.cdap.etl.api.relational.InvalidRelation; -import io.cdap.cdap.etl.api.relational.Relation; -import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -42,7 +38,6 @@ import io.cdap.wrangler.expression.EL; import io.cdap.wrangler.expression.ELContext; import io.cdap.wrangler.expression.ELException; -import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.List; @@ -134,15 +129,4 @@ public List getCountMetrics() { EntityCountMetric jexlCategoryMetric = getJexlCategoryMetric(el.getScriptParsedText()); return (jexlCategoryMetric == null) ? null : ImmutableList.of(jexlCategoryMetric); } - - @Override - public Relation transform(RelationalTranformContext relationalTranformContext, - Relation relation) { - java.util.Optional> expressionFactory = SqlExpressionGenerator - .getExpressionFactory(relationalTranformContext); - if (!expressionFactory.isPresent()) { - return new InvalidRelation("Cannot find an Expression Factory"); - } - return relation.filter(expressionFactory.get().compile(el.getScriptParsedText())); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java b/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java index c4bb509ea..e6a6c8d8d 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java +++ b/wrangler-core/src/main/java/io/cdap/directives/row/RecordMissingOrNullFilter.java @@ -19,10 +19,6 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.relational.ExpressionFactory; -import io.cdap.cdap.etl.api.relational.InvalidRelation; -import io.cdap.cdap.etl.api.relational.Relation; -import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -35,12 +31,10 @@ import io.cdap.wrangler.api.parser.ColumnNameList; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; -import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Optional; /** * Filters records if they don't have all the columns specified or they have null values or combination. @@ -103,15 +97,4 @@ public Mutation lineage() { cols.forEach(column -> builder.relation(column, column)); return builder.build(); } - - @Override - public Relation transform(RelationalTranformContext relationalTranformContext, - Relation relation) { - Optional> expressionFactory = SqlExpressionGenerator - .getExpressionFactory(relationalTranformContext); - if (!expressionFactory.isPresent()) { - return new InvalidRelation("Cannot find an Expression Factory"); - } - return relation.filter(expressionFactory.get().compile("nvl(" + columns[0] + ", false)")); - } } From 480f2aa047505622c431c83b273845abb0e7ebbe Mon Sep 17 00:00:00 2001 From: shrverma Date: Sat, 22 Jul 2023 09:07:04 +0000 Subject: [PATCH 37/50] Implement fill-null-or-empty --- .../transformation/FillNullOrEmpty.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/FillNullOrEmpty.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/FillNullOrEmpty.java index 45542b254..5532620ba 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/FillNullOrEmpty.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/FillNullOrEmpty.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -32,9 +36,11 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import org.json.JSONObject; import java.util.List; +import java.util.Optional; /** * A directive to fill null or empty column values with a fixed value. @@ -104,4 +110,17 @@ public Mutation lineage() { .relation(column, column) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn(column, expressionFactory.get().compile(String + .format("nvl2(%s, if(length(%s) == 0, \"%s\", %s), \"%s\")", + column, column, value, column, value))); + } } From 7f3f3cb14fa92b4f19e1c86a9c5ef8306fcaae0a Mon Sep 17 00:00:00 2001 From: shrverma Date: Sat, 22 Jul 2023 09:20:33 +0000 Subject: [PATCH 38/50] Implement URL encoding and decoding directives --- .../directives/transformation/UrlDecode.java | 19 +++++++++++++++++++ .../directives/transformation/UrlEncode.java | 18 ++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java index 0ae4df352..d7ef6003e 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -31,10 +35,12 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.List; +import java.util.Optional; /** * A Executor to decodes a column with url encoding. @@ -101,4 +107,17 @@ public Mutation lineage() { .relation(column, column) .build(); } + + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn( + column, expressionFactory.get().compile( + String.format("reflect('java.net.url.Decoder', 'decode', %s, 'utf-8')", column))); + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java index 204e380c7..dcdd27e49 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -31,10 +35,12 @@ import io.cdap.wrangler.api.parser.ColumnName; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.List; +import java.util.Optional; /** * A Executor to encode a column with url encoding. @@ -101,4 +107,16 @@ public List execute(List rows, ExecutorContext context) throws Directi } return rows; } + + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + return relation.setColumn( + column, expressionFactory.get().compile( + String.format("reflect('java.net.url.Encoder', 'encode', %s, 'utf-8')", column))); + } } From 63a593200316b8f98febeed1cea2872bee777096 Mon Sep 17 00:00:00 2001 From: shrverma Date: Mon, 24 Jul 2023 05:07:02 +0000 Subject: [PATCH 39/50] Move partially supported directives --- .../directives/column/ChangeColCaseNames.java | 47 ------------------- .../io/cdap/directives/column/SetType.java | 17 ------- 2 files changed, 64 deletions(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java index e1c0056dd..1129d7bf2 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java @@ -19,12 +19,6 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.api.data.schema.Schema; -import io.cdap.cdap.etl.api.relational.Expression; -import io.cdap.cdap.etl.api.relational.ExpressionFactory; -import io.cdap.cdap.etl.api.relational.InvalidRelation; -import io.cdap.cdap.etl.api.relational.Relation; -import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -39,13 +33,8 @@ import io.cdap.wrangler.api.parser.Identifier; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; -import io.cdap.wrangler.utils.SqlExpressionGenerator; -import java.util.ArrayList; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; -import java.util.Set; /** * This class ChangeColCaseNames converts the case of the columns @@ -105,40 +94,4 @@ public Mutation lineage() { .all(Many.of()) .build(); } - @Override - public Relation transform(RelationalTranformContext relationalTranformContext, - Relation relation) { - java.util.Optional> expressionFactory = SqlExpressionGenerator - .getExpressionFactory(relationalTranformContext); - if (!expressionFactory.isPresent()) { - return new InvalidRelation("Cannot find an Expression Factory"); - } - List columnNames = generateListCols(relationalTranformContext); - Map colmap = generateColumnCaseMap(columnNames, expressionFactory.get()); - return relation.select(colmap); - } - - private List generateListCols(RelationalTranformContext relationalTranformContext) { - List colnames = new ArrayList(); - Set inputRelationNames = relationalTranformContext.getInputRelationNames(); - for (String inputRelationName : inputRelationNames) { - Schema schema = relationalTranformContext.getInputSchema(inputRelationName); - List fields = schema.getFields(); - for (Schema.Field field: fields) { - colnames.add(field.getName()); - } - } - return colnames; - } - - private Map generateColumnCaseMap(List columns, ExpressionFactory factory) { - Map columnExpMap = new LinkedHashMap<>(); - if (toLower) { - columns.forEach((colName) -> columnExpMap.put(colName.toLowerCase(), factory.compile(colName))); - } else { - columns.forEach((colName) -> columnExpMap.put(colName.toUpperCase(), factory.compile(colName))); - } - return columnExpMap; - } - } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java index 9c4042b6d..f143493c0 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/SetType.java @@ -19,10 +19,6 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.relational.ExpressionFactory; -import io.cdap.cdap.etl.api.relational.InvalidRelation; -import io.cdap.cdap.etl.api.relational.Relation; -import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -40,7 +36,6 @@ import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; import io.cdap.wrangler.utils.ColumnConverter; -import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.math.RoundingMode; import java.util.List; @@ -113,16 +108,4 @@ public Mutation lineage() { .relation(col, col) .build(); } - @Override - public Relation transform(RelationalTranformContext relationalTranformContext, - Relation relation) { - java.util.Optional> expressionFactory = SqlExpressionGenerator - .getExpressionFactory(relationalTranformContext); - if (!expressionFactory.isPresent()) { - return new InvalidRelation("Cannot find an Expression Factory"); - } - String expression = SqlExpressionGenerator.getColumnTypeExp(type, col, scale); - return relation.setColumn(col, expressionFactory.get().compile(expression)); - } - } From 69e852737c9b5f36c9e24c206ccd3822462c5a74 Mon Sep 17 00:00:00 2001 From: shrverma Date: Mon, 24 Jul 2023 09:24:23 +0000 Subject: [PATCH 40/50] Implement fixed-length-parser --- .../directives/parser/FixedLengthParser.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java b/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java index 525f7ba7a..f9cdf569d 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java +++ b/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java @@ -19,6 +19,10 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.relational.ExpressionFactory; +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; import io.cdap.wrangler.api.Arguments; import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveExecutionException; @@ -37,6 +41,7 @@ import io.cdap.wrangler.api.parser.Text; import io.cdap.wrangler.api.parser.TokenType; import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.utils.SqlExpressionGenerator; import java.util.ArrayList; import java.util.List; @@ -148,4 +153,32 @@ public Mutation lineage() { .all(Many.of(col), Many.of(col)) .build(); } + + @Override + public Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + java.util.Optional> expressionFactory = SqlExpressionGenerator + .getExpressionFactory(relationalTranformContext); + if (!expressionFactory.isPresent()) { + return new InvalidRelation("Cannot find an Expression Factory"); + } + + int currentpos = 1; + int columncounter = 1; + + for (int width : widths) { + relation = relation.setColumn(String.format("%s_%d", col, columncounter), + expressionFactory.get().compile(String + .format("replace(substr(%s, %d, %d), '%s', \"\")" + , col, currentpos, width, padding))) + .filter(expressionFactory.get().compile(String.format("%d <= (length(%s) - %d + 1)", + width, col, currentpos))); + + currentpos += width; + columncounter++; + } + + return relation; + } + } From 3ee71d7034d4c3cceb292abfc7ea9bbd05421199 Mon Sep 17 00:00:00 2001 From: shrverma <134497369+shrverma@users.noreply.github.com> Date: Tue, 25 Jul 2023 11:30:09 +0530 Subject: [PATCH 41/50] Update Directive.java --- wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java index 6585ddf13..3dddd19a0 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java @@ -55,7 +55,7 @@ * */ public interface Directive extends Executor, List>, EntityMetrics, - LinearRelationalTransform { + LinearRelationalTransform { /** * This defines a interface variable that is static and final for specify * the {@code type} of the plugin this interface would provide. @@ -137,5 +137,4 @@ default Relation transform(RelationalTranformContext relationalTranformContext, // no-op return relation; } - } From 016c989f9cff5f566d941b1e4bef48cb539657f4 Mon Sep 17 00:00:00 2001 From: shrverma <134497369+shrverma@users.noreply.github.com> Date: Tue, 25 Jul 2023 11:31:21 +0530 Subject: [PATCH 42/50] Update ChangeColCaseNames.java --- .../main/java/io/cdap/directives/column/ChangeColCaseNames.java | 1 + 1 file changed, 1 insertion(+) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java index 1129d7bf2..4f6cedb89 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/ChangeColCaseNames.java @@ -95,3 +95,4 @@ public Mutation lineage() { .build(); } } + From 7ebc4c304fc6f89cbb47ed525154ea90fb019046 Mon Sep 17 00:00:00 2001 From: shrverma Date: Tue, 25 Jul 2023 10:36:41 +0000 Subject: [PATCH 43/50] Add directiverelationaltransform interface --- .../java/io/cdap/wrangler/api/Directive.java | 8 +-- .../api/DirectiveRelationalTransform.java | 54 +++++++++++++++++++ .../java/io/cdap/directives/column/Copy.java | 5 ++ .../java/io/cdap/directives/column/Drop.java | 5 ++ .../java/io/cdap/directives/column/Keep.java | 5 ++ .../java/io/cdap/directives/column/Merge.java | 5 ++ .../io/cdap/directives/column/Rename.java | 5 ++ .../directives/transformation/LeftTrim.java | 6 +++ .../directives/transformation/RightTrim.java | 5 ++ .../directives/transformation/TitleCase.java | 5 ++ .../cdap/directives/transformation/Trim.java | 5 ++ .../cdap/directives/transformation/Upper.java | 5 ++ .../main/java/io/cdap/wrangler/Wrangler.java | 31 ++++++++++- 13 files changed, 136 insertions(+), 8 deletions(-) create mode 100644 wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java index 3dddd19a0..b06d94dd0 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Directive.java @@ -55,7 +55,7 @@ * */ public interface Directive extends Executor, List>, EntityMetrics, - LinearRelationalTransform { + DirectiveRelationalTransform { /** * This defines a interface variable that is static and final for specify * the {@code type} of the plugin this interface would provide. @@ -131,10 +131,4 @@ default List getCountMetrics() { return null; } - @Override - default Relation transform(RelationalTranformContext relationalTranformContext, - Relation relation) { - // no-op - return relation; - } } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java new file mode 100644 index 000000000..5b1382422 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java @@ -0,0 +1,54 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api; + +import io.cdap.cdap.etl.api.relational.InvalidRelation; +import io.cdap.cdap.etl.api.relational.LinearRelationalTransform; +import io.cdap.cdap.etl.api.relational.Relation; +import io.cdap.cdap.etl.api.relational.RelationalTranformContext; +import io.cdap.cdap.etl.api.relational.RelationalTransform; + +/** + * {@link DirectiveRelationalTransform} provides relational transform support for + * wrangler directives. + */ +public interface DirectiveRelationalTransform extends LinearRelationalTransform { + + /** + * Implementation of linear relational transform for each supported directive. + * + * @param relationalTranformContext transformation context with engine, input and output parameters + * @param relation input relation upon which the transformation is applied. + * @return transformed relation as the output relation. By default, returns an Invalid relation + * for unsupported directives. + */ + default Relation transform(RelationalTranformContext relationalTranformContext, + Relation relation) { + return new InvalidRelation("SQL execution for the directive is currently not supported."); + } + + /** + * Indicates whether the directive is supported by relational transformation or not. + * + * @return boolean value for the directive SQL support. + * By default, returns false, indicating that the directive is currently not supported. + */ + default boolean isSQLSupported() { + return false; + } + +} diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java index 9f52f32ce..8dfa91f4c 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Copy.java @@ -125,4 +125,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(destination.value(), expressionFactory.get().compile(source.value())); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java b/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java index 7d21b9109..3d415ca8b 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Drop.java @@ -100,4 +100,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, } return relation; } + + @Override + public boolean isSQLSupported() { + return true; + } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java index de2bbb123..72cc1d1c4 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Keep.java @@ -114,4 +114,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.select(keepCol); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java index ce6257b5f..24e9883ec 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Merge.java @@ -125,4 +125,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, .compile(String.format("CONCAT(%s,'%s',%s)", col1, delimiter, col2))); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java index cc52be488..dbb5a23d7 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/Rename.java @@ -100,4 +100,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.dropColumn(source.value()); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java index fc772d9d0..2e7dee5e1 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/LeftTrim.java @@ -103,4 +103,10 @@ public Relation transform(RelationalTranformContext relationalTranformContext, } return relation.setColumn(col, expressionFactory.get().compile("LTRIM(" + col + ")")); } + + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java index 2f1b4f7f7..2635b4e04 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/RightTrim.java @@ -105,4 +105,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("RTRIM(" + column + ")")); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java index bfb4437f8..0d0850c4a 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/TitleCase.java @@ -105,4 +105,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("initcap(" + column + ")")); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java index dc0585407..3c20bfcd2 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Trim.java @@ -104,4 +104,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("TRIM(" + column + ")")); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java index 9bb68da1e..d664f265f 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/Upper.java @@ -104,4 +104,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return relation.setColumn(column, expressionFactory.get().compile("UPPER(" + column + ")")); } + @Override + public boolean isSQLSupported() { + return true; + } + } diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 7c3251c8c..30b814f50 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -50,6 +50,7 @@ import io.cdap.wrangler.api.Directive; import io.cdap.wrangler.api.DirectiveLoadException; import io.cdap.wrangler.api.DirectiveParseException; +import io.cdap.wrangler.api.DirectiveRelationalTransform; import io.cdap.wrangler.api.EntityCountMetric; import io.cdap.wrangler.api.ErrorRecord; import io.cdap.wrangler.api.ExecutorContext; @@ -106,7 +107,7 @@ @Plugin(type = "transform") @Name("Wrangler") @Description("Wrangler - A interactive tool for data cleansing and transformation.") -public class Wrangler extends Transform implements LinearRelationalTransform { +public class Wrangler extends Transform implements DirectiveRelationalTransform { private static final Logger LOG = LoggerFactory.getLogger(Wrangler.class); // Configuration specifying the dataprep application and service name. @@ -524,6 +525,34 @@ private void validatePrecondition(String precondition, Boolean isConditionSQL, F } private void validateSQLModeDirectives(FailureCollector collector) { + + String recipe = config.getDirectives(); + + registry = SystemDirectiveRegistry.INSTANCE; + try { + registry.reload("default"); + } catch (DirectiveLoadException e) { + throw new RuntimeException(e); + } + + List directives = null; + try { + GrammarBasedParser parser = new GrammarBasedParser("default", + new MigrateToV2(recipe).migrate(), registry); + directives = parser.parse(); + } catch (DirectiveParseException e) { + throw new RuntimeException(e); + } catch (RecipeException e) { + throw new RuntimeException(e); + } + + for (Directive directive :directives) { + if (!directive.isSQLSupported()) { + collector.addFailure(String.format("%s directive is not supported by SQL execution.", + directive.define().getDirectiveName()), null) + .withConfigProperty(Config.NAME_DIRECTIVES); + } + } if (!Strings.isNullOrEmpty(config.getUDDs())) { collector.addFailure("UDDs are not supported for precondition of type SQL", null) .withConfigProperty(Config.NAME_UDD); From 6f7d14d20c400483026f52d6503054e80ad006b8 Mon Sep 17 00:00:00 2001 From: shrverma Date: Tue, 25 Jul 2023 11:06:38 +0000 Subject: [PATCH 44/50] Refactor execution logic --- .../src/main/java/io/cdap/wrangler/Wrangler.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 30b814f50..729d2c291 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -131,6 +131,10 @@ public class Wrangler extends Transform impl // Sql execution value private static final String SQL_ENABLED = "yes"; + // wrangler sql execution mode enabled or not + + public boolean isSqlenabled = false; + // Plugin configuration. private final Config config; @@ -356,10 +360,12 @@ public void initialize(TransformContext context) throws Exception { context.getStageName()), e ); } - + // initialize the wrangler sql mode + isSqlenabled = checkSQLExecution(config); + // Check if jexl pre-condition is not null or empty and if so compile expression. if (!config.containsMacro(Config.NAME_PRECONDITION_LANGUAGE)) { - if (!checkSQLExecution(config) && checkPreconditionNotEmpty(false)) { + if (!isSqlenabled && checkPreconditionNotEmpty(false)) { try { condition = new Precondition(config.getPreconditionJEXL()); } catch (PreconditionException e) { @@ -416,7 +422,7 @@ public void transform(StructuredRecord input, Emitter emitter) } // If pre-condition is set, then evaluate the precondition - if (!checkSQLExecution(config) && checkPreconditionNotEmpty(false)) { + if (!isSqlenabled && checkPreconditionNotEmpty(false)) { boolean skip = condition.apply(row); if (skip) { getContext().getMetrics().count("precondition.filtered", 1); From 29eda87d40f449539a2d516fb6e623d232719d59 Mon Sep 17 00:00:00 2001 From: shrverma Date: Tue, 25 Jul 2023 18:06:26 +0000 Subject: [PATCH 45/50] Refactor code --- .../api/DirectiveRelationalTransform.java | 20 +++++------ .../main/java/io/cdap/wrangler/Wrangler.java | 36 ++++++------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java index 5b1382422..80f92dd05 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/DirectiveRelationalTransform.java @@ -1,17 +1,17 @@ /* - * Copyright © 2017-2019 Cask Data, Inc. + * Copyright © 2023 Cask Data, Inc. * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. */ package io.cdap.wrangler.api; diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 729d2c291..e46f8064d 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -38,10 +38,8 @@ import io.cdap.cdap.etl.api.relational.Expression; import io.cdap.cdap.etl.api.relational.ExpressionFactory; import io.cdap.cdap.etl.api.relational.InvalidRelation; -import io.cdap.cdap.etl.api.relational.LinearRelationalTransform; import io.cdap.cdap.etl.api.relational.Relation; import io.cdap.cdap.etl.api.relational.RelationalTranformContext; -import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; import io.cdap.cdap.features.Feature; import io.cdap.directives.aggregates.DefaultTransientStore; import io.cdap.wrangler.api.CompileException; @@ -201,7 +199,7 @@ public void configurePipeline(PipelineConfigurer configurer) { if (!config.containsMacro(Config.NAME_PRECONDITION_SQL)) { validatePrecondition(config.getPreconditionSQL(), true, collector); } - validateSQLModeDirectives(collector); + validateSQLModeDirectives(collector, getDirectivesList(config)); } else { if (!config.containsMacro(Config.NAME_PRECONDITION)) { validatePrecondition(config.getPreconditionJEXL(), false, collector); @@ -530,28 +528,7 @@ private void validatePrecondition(String precondition, Boolean isConditionSQL, F } } - private void validateSQLModeDirectives(FailureCollector collector) { - - String recipe = config.getDirectives(); - - registry = SystemDirectiveRegistry.INSTANCE; - try { - registry.reload("default"); - } catch (DirectiveLoadException e) { - throw new RuntimeException(e); - } - - List directives = null; - try { - GrammarBasedParser parser = new GrammarBasedParser("default", - new MigrateToV2(recipe).migrate(), registry); - directives = parser.parse(); - } catch (DirectiveParseException e) { - throw new RuntimeException(e); - } catch (RecipeException e) { - throw new RuntimeException(e); - } - + private void validateSQLModeDirectives(FailureCollector collector, List directives) { for (Directive directive :directives) { if (!directive.isSQLSupported()) { collector.addFailure(String.format("%s directive is not supported by SQL execution.", @@ -596,6 +573,15 @@ private boolean checkSQLExecution(Config config) { return false; } + List getDirectivesList(Config config) throws Exception{ + String recipe = config.getDirectives(); + List directives = null; + GrammarBasedParser parser = new GrammarBasedParser("default", + new MigrateToV2(recipe).migrate(), registry); + directives = parser.parse(); + return directives; + } + /** * This method creates a {@link CompositeDirectiveRegistry} and initializes the {@link RecipeParser} * with {@link NoOpDirectiveContext} From a656f47f20576c3a899539d6e1582a414a67d82c Mon Sep 17 00:00:00 2001 From: shrverma Date: Wed, 26 Jul 2023 05:31:20 +0000 Subject: [PATCH 46/50] Add sql directive validation --- .../main/java/io/cdap/wrangler/Wrangler.java | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index e46f8064d..713c7b579 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -46,6 +46,7 @@ import io.cdap.wrangler.api.CompileStatus; import io.cdap.wrangler.api.Compiler; import io.cdap.wrangler.api.Directive; +import io.cdap.wrangler.api.DirectiveExecutionException; import io.cdap.wrangler.api.DirectiveLoadException; import io.cdap.wrangler.api.DirectiveParseException; import io.cdap.wrangler.api.DirectiveRelationalTransform; @@ -199,7 +200,7 @@ public void configurePipeline(PipelineConfigurer configurer) { if (!config.containsMacro(Config.NAME_PRECONDITION_SQL)) { validatePrecondition(config.getPreconditionSQL(), true, collector); } - validateSQLModeDirectives(collector, getDirectivesList(config)); + validateSQLUDDs(collector); } else { if (!config.containsMacro(Config.NAME_PRECONDITION)) { validatePrecondition(config.getPreconditionJEXL(), false, collector); @@ -251,6 +252,14 @@ public void configurePipeline(PipelineConfigurer configurer) { } } } + + // check if the directive is supported by SQL + if (checkSQLExecution(config)) { + List sqlDirectives = null; + sqlDirectives = getDirectivesList(config); + validateSQLModeDirectives(collector, sqlDirectives); + } + } catch (CompileException e) { collector.addFailure("Compilation error occurred : " + e.getMessage(), null); } catch (DirectiveParseException e) { @@ -536,9 +545,12 @@ private void validateSQLModeDirectives(FailureCollector collector, List getDirectivesList(Config config) throws Exception{ + List getDirectivesList(Config config) throws DirectiveParseException, RecipeException { String recipe = config.getDirectives(); List directives = null; GrammarBasedParser parser = new GrammarBasedParser("default", @@ -620,8 +632,6 @@ public Relation transform(RelationalTranformContext relationalTranformContext, R Expression filterExpression = expressionFactory.get().compile(config.getPreconditionSQL()); Relation filteredRelation = relation.filter(filterExpression); - String recipe = config.getDirectives(); - registry = SystemDirectiveRegistry.INSTANCE; try { registry.reload("default"); @@ -631,12 +641,8 @@ public Relation transform(RelationalTranformContext relationalTranformContext, R List directives = null; try { - GrammarBasedParser parser = new GrammarBasedParser("default", - new MigrateToV2(recipe).migrate(), registry); - directives = parser.parse(); - } catch (DirectiveParseException e) { - throw new RuntimeException(e); - } catch (RecipeException e) { + directives = getDirectivesList(config); + } catch (DirectiveParseException | RecipeException e) { throw new RuntimeException(e); } From 5ce676d0b621990dd456d8f279a41728560b0469 Mon Sep 17 00:00:00 2001 From: shrverma Date: Wed, 26 Jul 2023 11:11:30 +0000 Subject: [PATCH 47/50] Refactor code --- .../src/main/java/io/cdap/wrangler/Wrangler.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java index 713c7b579..7d0bec24d 100644 --- a/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java +++ b/wrangler-transform/src/main/java/io/cdap/wrangler/Wrangler.java @@ -131,7 +131,6 @@ public class Wrangler extends Transform impl private static final String SQL_ENABLED = "yes"; // wrangler sql execution mode enabled or not - public boolean isSqlenabled = false; // Plugin configuration. @@ -255,9 +254,7 @@ public void configurePipeline(PipelineConfigurer configurer) { // check if the directive is supported by SQL if (checkSQLExecution(config)) { - List sqlDirectives = null; - sqlDirectives = getDirectivesList(config); - validateSQLModeDirectives(collector, sqlDirectives); + validateSQLModeDirectives(collector, getDirectivesList(config)); } } catch (CompileException e) { @@ -538,7 +535,7 @@ private void validatePrecondition(String precondition, Boolean isConditionSQL, F } private void validateSQLModeDirectives(FailureCollector collector, List directives) { - for (Directive directive :directives) { + for (Directive directive : directives) { if (!directive.isSQLSupported()) { collector.addFailure(String.format("%s directive is not supported by SQL execution.", directive.define().getDirectiveName()), null) @@ -550,7 +547,7 @@ private void validateSQLModeDirectives(FailureCollector collector, List getDirectivesList(Config config) throws DirectiveParseException, RecipeException { String recipe = config.getDirectives(); - List directives = null; GrammarBasedParser parser = new GrammarBasedParser("default", new MigrateToV2(recipe).migrate(), registry); - directives = parser.parse(); + List directives = parser.parse(); return directives; } From b5ce52967092752faa6e5a4c34a00d395868b772 Mon Sep 17 00:00:00 2001 From: shrverma Date: Wed, 26 Jul 2023 11:25:34 +0000 Subject: [PATCH 48/50] Refactor code --- .../cdap/directives/transformation/SplitEmail.java | 12 ++++++------ .../io/cdap/directives/writer/WriteAsJsonObject.java | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java index d7976cc42..a0438dabf 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/SplitEmail.java @@ -146,13 +146,13 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return new InvalidRelation("Cannot find an Expression Factory"); } - Relation accountRelation = relation.setColumn(generatedAccountCol, getExpression(expressionFactory)); + String accountExpression = String + .format("substring(%s, 1, char_length(%s) - locate('@', reverse(%s)))", column, column, column); + String domainExpression = String.format("substring_index(%s, '@', -1)", column); + Relation accountRelation = relation + .setColumn(generatedAccountCol, expressionFactory.get().compile(accountExpression)); return accountRelation.setColumn(generatedDomainCol, - expressionFactory.get().compile(String.format("substring_index(%s, '@', -1)", column))); + expressionFactory.get().compile(domainExpression)); } - Expression getExpression(Optional> expfactory) { - return expfactory.get().compile(String - .format("substring(%s, 1, char_length(%s) - locate('@', reverse(%s)))", column, column, column)); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java b/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java index 3d69fd55e..e0f3d0fd8 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java +++ b/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java @@ -130,10 +130,8 @@ public Relation transform(RelationalTranformContext relationalTranformContext, return new InvalidRelation("Cannot find an Expression Factory"); } return relation.setColumn( - column, expressionFactory.get().compile(String.format("struct(%s)", getJSONColumns(columns)))); + column, expressionFactory.get() + .compile(String.format("struct(%s)", String.join(",", columns)))); } - String getJSONColumns(List columnList) { - return String.join(",", columnList); - } } From ed0e9a439d8ffd1f5ac3407c273ecd34df513c54 Mon Sep 17 00:00:00 2001 From: shrverma Date: Wed, 26 Jul 2023 13:08:53 +0000 Subject: [PATCH 49/50] Fix class not found error --- .../main/java/io/cdap/directives/transformation/UrlDecode.java | 2 +- .../main/java/io/cdap/directives/transformation/UrlEncode.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java index d7ef6003e..98ca135cd 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlDecode.java @@ -117,7 +117,7 @@ public Relation transform(RelationalTranformContext relationalTranformContext, } return relation.setColumn( column, expressionFactory.get().compile( - String.format("reflect('java.net.url.Decoder', 'decode', %s, 'utf-8')", column))); + String.format("reflect('java.net.URLDecoder', 'decode', %s, 'utf-8')", column))); } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java index dcdd27e49..b693672f4 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java +++ b/wrangler-core/src/main/java/io/cdap/directives/transformation/UrlEncode.java @@ -117,6 +117,6 @@ public Relation transform(RelationalTranformContext relationalTranformContext, } return relation.setColumn( column, expressionFactory.get().compile( - String.format("reflect('java.net.url.Encoder', 'encode', %s, 'utf-8')", column))); + String.format("reflect('java.net.URLEncoder', 'encode', %s, 'utf-8')", column))); } } From bec96796463cccccef69861affb74d9b65c5fbd2 Mon Sep 17 00:00:00 2001 From: shrverma Date: Tue, 1 Aug 2023 05:17:39 +0000 Subject: [PATCH 50/50] Remove extra function --- .../java/io/cdap/directives/column/CreateRecord.java | 6 ++---- .../io/cdap/directives/parser/FixedLengthParser.java | 11 ++++++----- .../io/cdap/directives/writer/WriteAsJsonObject.java | 6 +++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java b/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java index 5d0e6dd40..d49d88625 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java +++ b/wrangler-core/src/main/java/io/cdap/directives/column/CreateRecord.java @@ -115,11 +115,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } + String getColumnString = String.join(",", columns); return relation.setColumn(targetColumn, expressionFactory.get().compile(String - .format("struct(%s)", getColumnString(columns)))); + .format("struct(%s)", getColumnString))); } - public String getColumnString(String[] columns) { - return String.join(",", columns); - } } diff --git a/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java b/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java index f9cdf569d..65a8255c8 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java +++ b/wrangler-core/src/main/java/io/cdap/directives/parser/FixedLengthParser.java @@ -167,12 +167,13 @@ public Relation transform(RelationalTranformContext relationalTranformContext, int columncounter = 1; for (int width : widths) { + String fixedLengthParseExpression = String.format("replace(substr(%s, %d, %d), '%s', \"\")", + col, currentpos, width, padding); + String filterExcessLengthExpression = String.format("%d <= (length(%s) - %d + 1)", width, col, currentpos); + relation = relation.setColumn(String.format("%s_%d", col, columncounter), - expressionFactory.get().compile(String - .format("replace(substr(%s, %d, %d), '%s', \"\")" - , col, currentpos, width, padding))) - .filter(expressionFactory.get().compile(String.format("%d <= (length(%s) - %d + 1)", - width, col, currentpos))); + expressionFactory.get().compile(fixedLengthParseExpression)) + .filter(expressionFactory.get().compile(filterExcessLengthExpression)); currentpos += width; columncounter++; diff --git a/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java b/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java index e0f3d0fd8..5a7ef7139 100644 --- a/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java +++ b/wrangler-core/src/main/java/io/cdap/directives/writer/WriteAsJsonObject.java @@ -129,9 +129,9 @@ public Relation transform(RelationalTranformContext relationalTranformContext, if (!expressionFactory.isPresent()) { return new InvalidRelation("Cannot find an Expression Factory"); } - return relation.setColumn( - column, expressionFactory.get() - .compile(String.format("struct(%s)", String.join(",", columns)))); + String getColumnString = String.join(",", columns); + return relation.setColumn(column, expressionFactory.get() + .compile(String.format("struct(%s)", getColumnString))); } }