Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
AnkitCLI committed Sep 15, 2023
1 parent 26c1162 commit fc300d3
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 435 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@
<execution>
<goals>
<goal>integration-test</goal>
<goal>verify</goal>
<!-- <goal>verify</goal>-->
</goals>
</execution>
</executions>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ Feature: Wrangler - Run time scenarios
Given Open Datafusion Project to configure pipeline
Then Click on the Plus Green Button to import the pipelines
Then Select the file for importing the pipeline for the plugin "Directive_parse_excel"
Then Navigate to the properties page of plugin: "File"
Then Navigate to the properties page of plugin: "GCSFile"
Then Replace input plugin property: "project" with value: "projectId"
Then Replace input plugin property: "path" with value: "gcsSourceBucket"
Then Click on the Get Schema button
Then Click on the Validate button
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@
public class TestSetupHooks {
public static String gcsSourceBucketName = StringUtils.EMPTY;

@Before(order = 1, value = "@BQ_SOURCE_CSV_TEST")
public static void createTempSourceBQTable() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"),
PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv"));
}
@Before(order = 1, value = "@BQ_SINK_TEST")
public static void setTempTargetBQTableName() {
String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_");
Expand All @@ -73,6 +68,11 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce
/**
* Create BigQuery table.
*/
@Before(order = 1, value = "@BQ_SOURCE_CSV_TEST")
public static void createTempSourceBQTable() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"),
PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv"));
}
@Before(order = 1, value = "@BQ_SOURCE_JSON_TEST")
public static void createTempSourceBQTableJson() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileJson"),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"copiedname":"very","id":0,"name":"very","phone":"8838.0","uniquenum":"very,0"}
{"copiedname":"hello","id":2,"name":"hell","phone":"12345.0","uniquenum":"hello,2"}
{"copiedname":"very","id":0,"name":"very","phone":"8838.0","rollno":"3.0","uniquenum":"very,0"}
{"copiedname":"hello","id":2,"name":"hell","phone":"12345.0","rollno":"1.0","uniquenum":"hello,2"}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#json file path
Directive_parse_json=testData/Wrangler/parse_json_Wrangle-cdap-data-pipeline (1).json
Directive_parse_xml=testData/Wrangler/parse_xmltojson_wrangle-cdap-data-pipeline.json
Directive_parse_excel=testData/Wrangler/parse_excel_Wrangle-cdap-data-pipeline (1).json
Directive_parse_excel=testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json
Directive_parse_csv=testData/Wrangler\
/parse_csv_wrangle-cdap-data-pipeline.json
bqSourceTable=dummy
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
{
"name": "parse_excel_wrangler_copy",
"description": "Data Pipeline Application",
"artifact": {
"name": "cdap-data-pipeline",
"version": "6.10.0-SNAPSHOT",
"scope": "SYSTEM"
},
"config": {
"resources": {
"memoryMB": 2048,
"virtualCores": 1
},
"driverResources": {
"memoryMB": 2048,
"virtualCores": 1
},
"connections": [
{
"from": "GCSFile",
"to": "Wrangler"
},
{
"from": "Wrangler",
"to": "BigQuery"
}
],
"postActions": [],
"properties": {},
"processTimingEnabled": true,
"stageLoggingEnabled": true,
"stages": [
{
"name": "GCSFile",
"plugin": {
"name": "GCSFile",
"type": "batchsource",
"label": "GCSFile",
"artifact": {
"name": "google-cloud",
"version": "0.23.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
"format": "blob",
"path": "gs://00000000-e2e-0014a44f-81be-4501-8360-0ddca1c39789/test1.xlsx",
"fileEncoding": "UTF-8",
"useConnection": "false",
"referenceName": "test",
"schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}",
"project": "auto-detect",
"serviceAccountType": "filePath",
"serviceFilePath": "auto-detect",
"sampleSize": "1000",
"filenameOnly": "false",
"recursive": "false",
"ignoreNonExistingFolders": "false",
"encrypted": "false"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}"
}
],
"id": "GCSFile",
"type": "batchsource",
"label": "GCSFile",
"icon": "fa-plug",
"$$hashKey": "object:475",
"isPluginAvailable": true,
"_uiPosition": {
"left": "496px",
"top": "343px"
}
},
{
"name": "Wrangler",
"plugin": {
"name": "Wrangler",
"type": "transform",
"label": "Wrangler",
"artifact": {
"name": "wrangler-transform",
"version": "4.10.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
"directives": "parse-as-excel :body '0' true\ncopy name copiedname\nmerge name bkd uniquenum ','\nrename bkd rollno\ndrop fwd\nswap id rollno\nsplit-to-rows :name 'o'\nfilter-rows-on condition-false rollno !~ '2.0'",
"field": "*",
"precondition": "false",
"workspaceId": "667f9e85-6c36-4d38-ad48-ef85db7a04a2",
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}",
"on-error": "fail-pipeline"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
}
],
"inputSchema": [
{
"name": "GCSFile",
"schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}"
}
],
"id": "Wrangler",
"type": "transform",
"label": "Wrangler",
"icon": "icon-DataPreparation",
"$$hashKey": "object:476",
"isPluginAvailable": true,
"_uiPosition": {
"left": "796px",
"top": "343px"
}
},
{
"name": "BigQuery",
"plugin": {
"name": "BigQueryTable",
"type": "batchsink",
"label": "BigQuery",
"artifact": {
"name": "google-cloud",
"version": "0.23.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
"useConnection": "false",
"project": "auto-detect",
"serviceAccountType": "filePath",
"serviceFilePath": "auto-detect",
"dataset": "Wrangler",
"table": "extab34",
"operation": "insert",
"truncateTable": "false",
"allowSchemaRelaxation": "false",
"location": "US",
"createPartitionedTable": "false",
"partitioningType": "TIME",
"partitionFilterRequired": "false",
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
}
],
"inputSchema": [
{
"name": "Wrangler",
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
}
],
"id": "BigQuery",
"type": "batchsink",
"label": "BigQuery",
"icon": "fa-plug",
"$$hashKey": "object:477",
"isPluginAvailable": true,
"_uiPosition": {
"left": "1096px",
"top": "343px"
}
}
],
"schedule": "0 1 */1 * *",
"engine": "spark",
"numOfRecordsPreview": 100,
"rangeRecordsPreview": {
"min": 1,
"max": "5000"
},
"description": "Data Pipeline Application",
"maxConcurrentRuns": 1,
"pushdownEnabled": false,
"transformationPushdown": {}
},
"version": "2dd12daa-5395-11ee-9dac-000000d0cf32"
}

0 comments on commit fc300d3

Please sign in to comment.