-
Notifications
You must be signed in to change notification settings - Fork 56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Relational directives #641
base: develop
Are you sure you want to change the base?
Changes from 5 commits
a161c7c
e1fbdd5
0efb2d3
e6603da
400892a
b4dd875
aa99ccd
6a736c9
9e492e3
ddf5f7a
1e0eda0
2cdc2b3
cd4c680
b25accf
2bb3fe2
f7c5bd3
6d4093b
376e313
c3002ef
51d3d05
31e6f84
e217358
c5165f1
67755cf
7cb8aa3
1adbb2d
2a1c2b7
19518e6
35fbf93
0158b38
cb5eb7f
8f926a5
68785b5
6404c8d
ca89034
09aa10e
278a30e
480f2aa
7f3f3cb
63a5932
69e8527
3ee71d7
016c989
5899112
ff57c83
1e1d898
7ebc4c3
6f7d14d
29eda87
a656f47
5ce676d
b5ce529
ed0e9a4
36326f4
bec9679
39e360f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright © 2017-2019 Cask Data, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
* use this file except in compliance with the License. You may obtain a copy of | ||
* the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
* License for the specific language governing permissions and limitations under | ||
* the License. | ||
*/ | ||
|
||
package io.cdap.wrangler.api; | ||
|
||
/** | ||
* Directive interface which supports Relational transformations | ||
*/ | ||
public interface RelationalDirective extends Directive { | ||
|
||
/** | ||
* returns sql expression | ||
* @return expression | ||
*/ | ||
String getSQL(); | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType; | ||
import io.cdap.cdap.features.Feature; | ||
import io.cdap.directives.aggregates.DefaultTransientStore; | ||
import io.cdap.directives.column.Drop; | ||
import io.cdap.wrangler.api.CompileException; | ||
import io.cdap.wrangler.api.CompileStatus; | ||
import io.cdap.wrangler.api.Compiler; | ||
|
@@ -53,9 +54,11 @@ | |
import io.cdap.wrangler.api.EntityCountMetric; | ||
import io.cdap.wrangler.api.ErrorRecord; | ||
import io.cdap.wrangler.api.ExecutorContext; | ||
import io.cdap.wrangler.api.RecipeException; | ||
import io.cdap.wrangler.api.RecipeParser; | ||
import io.cdap.wrangler.api.RecipePipeline; | ||
import io.cdap.wrangler.api.RecipeSymbol; | ||
import io.cdap.wrangler.api.RelationalDirective; | ||
import io.cdap.wrangler.api.Row; | ||
import io.cdap.wrangler.api.TokenGroup; | ||
import io.cdap.wrangler.api.TransientStore; | ||
|
@@ -190,7 +193,7 @@ public void configurePipeline(PipelineConfigurer configurer) { | |
if (!config.containsMacro(Config.NAME_PRECONDITION_SQL)) { | ||
validatePrecondition(config.getPreconditionSQL(), true, collector); | ||
} | ||
validateSQLModeDirectives(collector); | ||
//validateSQLModeDirectives(collector); | ||
} else { | ||
if (!config.containsMacro(Config.NAME_PRECONDITION)) { | ||
validatePrecondition(config.getPreconditionJEXL(), false, collector); | ||
|
@@ -569,23 +572,66 @@ private RecipeParser getRecipeParser(StageContext context) | |
|
||
@Override | ||
public Relation transform(RelationalTranformContext relationalTranformContext, Relation relation) { | ||
if (PRECONDITION_LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage()) | ||
&& checkPreconditionNotEmpty(true)) { | ||
if (!(PRECONDITION_LANGUAGE_SQL.equalsIgnoreCase(config.getPreconditionLanguage()) | ||
&& checkPreconditionNotEmpty(true))) { | ||
return new InvalidRelation("Plugin is not configured for relational transformation"); | ||
} | ||
|
||
if (!Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext)) { | ||
throw new RuntimeException("SQL Precondition feature is not available"); | ||
} | ||
if (!Feature.WRANGLER_PRECONDITION_SQL.isEnabled(relationalTranformContext)) { | ||
throw new RuntimeException("SQL Precondition feature is not available"); | ||
} | ||
|
||
Optional<ExpressionFactory<String>> expressionFactory = getExpressionFactory(relationalTranformContext); | ||
if (!expressionFactory.isPresent()) { | ||
return new InvalidRelation("Cannot find an Expression Factory"); | ||
} | ||
Optional<ExpressionFactory<String>> expressionFactory = getExpressionFactory(relationalTranformContext); | ||
if (!expressionFactory.isPresent()) { | ||
return new InvalidRelation("Cannot find an Expression Factory"); | ||
} | ||
|
||
Expression filterExpression = expressionFactory.get().compile(config.getPreconditionSQL()); | ||
Relation filteredRelation = relation.filter(filterExpression); | ||
|
||
ExpressionFactory<String> expFactory = expressionFactory.get(); | ||
|
||
String recipe = config.getDirectives(); | ||
|
||
registry = SystemDirectiveRegistry.INSTANCE; | ||
try { | ||
registry.reload("default"); | ||
} catch (DirectiveLoadException e) { | ||
throw new RuntimeException(e); | ||
} | ||
|
||
Expression filterExpression = expressionFactory.get().compile(config.getPreconditionSQL()); | ||
return relation.filter(filterExpression); | ||
List<Directive> directives = null; | ||
try { | ||
GrammarBasedParser parser = new GrammarBasedParser("default", new MigrateToV2(recipe).migrate(), registry); | ||
directives = parser.parse(); | ||
} catch (DirectiveParseException e) { | ||
throw new RuntimeException(e); | ||
} catch (RecipeException e) { | ||
throw new RuntimeException(e); | ||
} | ||
|
||
for (Directive directive : directives) { | ||
// Expression exp = expFactory.compile(sql); | ||
if (!(directive instanceof RelationalDirective)) { | ||
throw new RuntimeException("Directive is not relational Directive"); | ||
} | ||
// currently supporting only drop column | ||
// SQL will be returned as "DROP COLUMN col1, col2" | ||
String sql = ((RelationalDirective) directive).getSQL(); | ||
List<String> cols = getColumnsOfDropSQL(sql); | ||
for (String col : cols) { | ||
filteredRelation = filteredRelation.dropColumn(col); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should move this logic into the directive. SparkSQLDataset should be returned from each Directive and passed onto the next one. SparkSQLEngine.transform should be invoked in each directive for executing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. These changes are for poc. will move the logic to directives. |
||
} | ||
} | ||
return filteredRelation; | ||
} | ||
|
||
return new InvalidRelation("Plugin is not configured for relational transformation"); | ||
private List<String> getColumnsOfDropSQL(String sql) { | ||
List<String> cols = new ArrayList<>(); | ||
for (String col : sql.split(" ")[2].split(",")) { | ||
cols.add(col.trim()); | ||
} | ||
return cols; | ||
} | ||
|
||
private Optional<ExpressionFactory<String>> getExpressionFactory(RelationalTranformContext ctx) { | ||
|
@@ -642,6 +688,7 @@ public static class Config extends PluginConfig { | |
static final String NAME_PRECONDITION_LANGUAGE = "expressionLanguage"; | ||
static final String NAME_FIELD = "field"; | ||
static final String NAME_DIRECTIVES = "directives"; | ||
static final String NAME_RELATIONAL_DIRECTIVES = "relationalDirectives"; | ||
static final String NAME_UDD = "udd"; | ||
static final String NAME_SCHEMA = "schema"; | ||
static final String NAME_ON_ERROR = "on-error"; | ||
|
@@ -672,6 +719,7 @@ public static class Config extends PluginConfig { | |
@Nullable | ||
private String directives; | ||
|
||
|
||
@Name(NAME_UDD) | ||
@Description("List of User Defined Directives (UDD) that have to be loaded.") | ||
@Nullable | ||
|
@@ -694,7 +742,7 @@ public static class Config extends PluginConfig { | |
private final String onError; | ||
|
||
public Config(String preconditionLanguage, String precondition, String directives, String udds, | ||
String field, String schema, String onError) { | ||
String field, String schema, String onError, String relationalDirectives) { | ||
this.preconditionLanguage = preconditionLanguage; | ||
this.precondition = precondition; | ||
this.directives = directives; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,6 +73,24 @@ | |
} | ||
] | ||
}, | ||
{ | ||
"label": "RelationalDirectives", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you want to keep these changes by feature flag? Otherwise we have to move this code to a branch and keep syncing the branch with Develop or other future changes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We will add feature flag. until then we won't merge to develop |
||
"properties": [ | ||
{ | ||
"widget-type": "wrangler-relational-directives", | ||
"label": "RelationalRecipe", | ||
"name": "RelationalDirectives", | ||
"widget-attributes" : { | ||
"placeholder" : "#pragma load-directives my-directive; my-directive :body;" | ||
} | ||
}, | ||
{ | ||
"widget-type": "csv", | ||
"label": "User Defined SQL(UDS)", | ||
"name": "uds" | ||
} | ||
] | ||
}, | ||
{ | ||
"label" : "Error Handling", | ||
"properties" : [ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we not invoke execute on directives and offload directive execution to RecipePipelineExecutor. We can introduce a new function for relationalDirective execute
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good point. will look into offloading directive execution to RecipePipelineExecutor.