From fc300d303a3c9969d8fa1923844f8685b014fb44 Mon Sep 17 00:00:00 2001 From: AnkitCLI Date: Thu, 14 Sep 2023 13:11:28 +0530 Subject: [PATCH] debug --- pom.xml | 2 +- .../features/Wrangler/ParseAsExcel.feature | 3 +- .../common/stepsdesign/TestSetupHooks.java | 10 +- .../Directive_parse_excel | 4 +- .../resources/pluginParameters.properties | 2 +- ..._excel_Wrangle-cdap-data-pipeline (1).json | 425 ------------------ ...xcel_wrangler_copy-cdap-data-pipeline.json | 186 ++++++++ 7 files changed, 197 insertions(+), 435 deletions(-) delete mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_Wrangle-cdap-data-pipeline (1).json create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json diff --git a/pom.xml b/pom.xml index b43b5c99d..172d11f52 100644 --- a/pom.xml +++ b/pom.xml @@ -492,7 +492,7 @@ integration-test - verify + diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature index 3f5a1fe2b..625ee8245 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature @@ -20,7 +20,8 @@ Feature: Wrangler - Run time scenarios Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines Then Select the file for importing the pipeline for the plugin "Directive_parse_excel" - Then Navigate to the properties page of plugin: "File" + Then Navigate to the properties page of plugin: "GCSFile" + Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "path" with value: "gcsSourceBucket" Then Click on the Get Schema button Then Click on the Validate button diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java index 97bb2d24c..3871b43f5 100644 --- a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java @@ -42,11 +42,6 @@ public class TestSetupHooks { public static String gcsSourceBucketName = StringUtils.EMPTY; - @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST") - public static void createTempSourceBQTable() throws IOException, InterruptedException { - createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); - } @Before(order = 1, value = "@BQ_SINK_TEST") public static void setTempTargetBQTableName() { String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_"); @@ -73,6 +68,11 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce /** * Create BigQuery table. */ + @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST") + public static void createTempSourceBQTable() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); + } @Before(order = 1, value = "@BQ_SOURCE_JSON_TEST") public static void createTempSourceBQTableJson() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileJson"), diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel index 82eb3967f..3c3ae5154 100644 --- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel @@ -1,2 +1,2 @@ -{"copiedname":"very","id":0,"name":"very","phone":"8838.0","uniquenum":"very,0"} -{"copiedname":"hello","id":2,"name":"hell","phone":"12345.0","uniquenum":"hello,2"} \ No newline at end of file +{"copiedname":"very","id":0,"name":"very","phone":"8838.0","rollno":"3.0","uniquenum":"very,0"} +{"copiedname":"hello","id":2,"name":"hell","phone":"12345.0","rollno":"1.0","uniquenum":"hello,2"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties index db14e54d2..e2672ad68 100644 --- a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -1,7 +1,7 @@ #json file path Directive_parse_json=testData/Wrangler/parse_json_Wrangle-cdap-data-pipeline (1).json Directive_parse_xml=testData/Wrangler/parse_xmltojson_wrangle-cdap-data-pipeline.json -Directive_parse_excel=testData/Wrangler/parse_excel_Wrangle-cdap-data-pipeline (1).json +Directive_parse_excel=testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json Directive_parse_csv=testData/Wrangler\ /parse_csv_wrangle-cdap-data-pipeline.json bqSourceTable=dummy diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_Wrangle-cdap-data-pipeline (1).json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_Wrangle-cdap-data-pipeline (1).json deleted file mode 100644 index 109a591d8..000000000 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_Wrangle-cdap-data-pipeline (1).json +++ /dev/null @@ -1,425 +0,0 @@ -{ - "name": "parse_excel_Wrangle", - "description": "Data Pipeline Application", - "artifact": { - "name": "cdap-data-pipeline", - "version": "6.10.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "config": { - "resources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "driverResources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "connections": [ - { - "from": "File", - "to": "Wrangler" - }, - { - "from": "Wrangler", - "to": "BigQuery" - } - ], - "comments": [], - "postActions": [], - "properties": {}, - "processTimingEnabled": true, - "stageLoggingEnabled": false, - "stages": [ - { - "name": "File", - "plugin": { - "name": "File", - "type": "batchsource", - "label": "File", - "artifact": { - "name": "core-plugins", - "version": "2.12.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "referenceName": "sfdsf", - "path": "gs://00000000-e2e-0014a44f-81be-4501-8360-0ddca1c39789/test1.xlsx", - "format": "blob", - "sampleSize": "1000", - "filenameOnly": "false", - "recursive": "false", - "ignoreNonExistingFolders": "false", - "fileEncoding": "UTF-8", - "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}" - } - }, - "outputSchema": [ - { - "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}" - } - ], - "id": "File", - "type": "batchsource", - "label": "File", - "icon": "icon-file", - "$$hashKey": "object:417", - "isPluginAvailable": true, - "_uiPosition": { - "left": "496px", - "top": "343px" - }, - "_backendProperties": { - "schema": { - "name": "schema", - "description": "Output schema for the source. Formats like 'avro' and 'parquet' require a schema in order to read the data.", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "copyHeader": { - "name": "copyHeader", - "description": "", - "type": "boolean", - "required": false, - "macroSupported": false, - "macroEscapingEnabled": false, - "children": [] - }, - "fileEncoding": { - "name": "fileEncoding", - "description": "File encoding for the source files. The default encoding is 'UTF-8'", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "fileRegex": { - "name": "fileRegex", - "description": "Regular expression that file paths must match in order to be included in the input. The full file path is compared, not just the file name.If no value is given, no file filtering will be done. See https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html for more information about the regular expression syntax.", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "format": { - "name": "format", - "description": "Format of the data to read. Supported formats are 'avro', 'blob', 'csv', 'delimited', 'json', 'parquet', 'text', or 'tsv'. ", - "type": "string", - "required": true, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "ignoreNonExistingFolders": { - "name": "ignoreNonExistingFolders", - "description": "Whether to allow an input that does not exist. When false, the source will fail the run if the input does not exist. When true, the run will not fail and the source will not generate any output. The default value is false.", - "type": "boolean", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "skipHeader": { - "name": "skipHeader", - "description": "Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', 'delimited'. Default value is false.", - "type": "boolean", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "sampleSize": { - "name": "sampleSize", - "description": "The maximum number of rows that will get investigated for automatic data type detection.", - "type": "long", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "pathField": { - "name": "pathField", - "description": "Output field to place the path of the file that the record was read from. If not specified, the file path will not be included in output records. If specified, the field must exist in the output schema as a string.", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "recursive": { - "name": "recursive", - "description": "Whether to recursively read directories within the input directory. The default is false.", - "type": "boolean", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "filenameOnly": { - "name": "filenameOnly", - "description": "Whether to only use the filename instead of the URI of the file path when a path field is given. The default value is false.", - "type": "boolean", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "path": { - "name": "path", - "description": "Path to file(s) to be read. If a directory is specified, terminate the path name with a '/'. For distributed file system such as HDFS, file system name should comefrom 'fs.DefaultFS' property in the 'core-site.xml'. For example, 'hdfs://mycluster.net:8020/input', where value of the property 'fs.DefaultFS' in the 'core-site.xml' is 'hdfs://mycluster.net:8020'.", - "type": "string", - "required": true, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "maxSplitSize": { - "name": "maxSplitSize", - "description": "Maximum size of each partition used to read data. Smaller partitions will increase the level of parallelism, but will require more resources and overhead.", - "type": "long", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "delimiter": { - "name": "delimiter", - "description": "The delimiter to use if the format is 'delimited'. The delimiter will be ignored if the format is anything other than 'delimited'.", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "enableQuotedValues": { - "name": "enableQuotedValues", - "description": "Whether to treat content between quotes as a value. This value will only be used if the format is 'csv', 'tsv' or 'delimited'. The default value is false.", - "type": "boolean", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "override": { - "name": "override", - "description": "A list of columns with the corresponding data types for whom the automatic data type detection gets skipped.", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "fileSystemProperties": { - "name": "fileSystemProperties", - "description": "Any additional properties to use when reading from the filesystem. This is an advanced feature that requires knowledge of the properties supported by the underlying filesystem.", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "referenceName": { - "name": "referenceName", - "description": "Name be used to uniquely identify this source for lineage, annotating metadata, etc.", - "type": "string", - "required": true, - "macroSupported": false, - "macroEscapingEnabled": false, - "children": [] - } - }, - "description": "Batch source for File Systems", - "selected": false - }, - { - "name": "Wrangler", - "plugin": { - "name": "Wrangler", - "type": "transform", - "label": "Wrangler", - "artifact": { - "name": "wrangler-transform", - "version": "4.10.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "field": "*", - "precondition": "false", - "directives": "parse-as-excel :body '0' true\ncopy name copiedname\nmerge name bkd uniquenum ','\nrename bkd rollno\ndrop fwd\nswap id rollno\nsplit-to-rows :name 'o'\nfilter-rows-on condition-false rollno !~ '2.0'", - "on-error": "fail-pipeline", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}", - "workspaceId": "0cf0176a-5f84-41ef-9411-2b4f9c7dcfc8" - } - }, - "outputSchema": [ - { - "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "inputSchema": [ - { - "name": "File", - "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}" - } - ], - "id": "Wrangler", - "type": "transform", - "label": "Wrangler", - "icon": "icon-DataPreparation", - "$$hashKey": "object:418", - "isPluginAvailable": true, - "_uiPosition": { - "left": "796px", - "top": "343px" - }, - "selected": false, - "_backendProperties": { - "schema": { - "name": "schema", - "description": "Specifies the schema that has to be output.", - "type": "string", - "required": true, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "preconditionSQL": { - "name": "preconditionSQL", - "description": "SQL Precondition expression specifying filtering before applying directives (false to filter)", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "udd": { - "name": "udd", - "description": "List of User Defined Directives (UDD) that have to be loaded.", - "type": "string", - "required": false, - "macroSupported": false, - "macroEscapingEnabled": false, - "children": [] - }, - "field": { - "name": "field", - "description": "Name of the input field to be wrangled or '*' to wrangle all the fields.", - "type": "string", - "required": true, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "on-error": { - "name": "on-error", - "description": "How to handle error in record processing", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "directives": { - "name": "directives", - "description": "Recipe for wrangling the input records", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "expressionLanguage": { - "name": "expressionLanguage", - "description": "Toggle to configure precondition language between JEXL and SQL", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - }, - "precondition": { - "name": "precondition", - "description": "JEXL Precondition expression specifying filtering before applying directives (true to filter)", - "type": "string", - "required": false, - "macroSupported": true, - "macroEscapingEnabled": false, - "children": [] - } - }, - "description": "Wrangler - A interactive tool for data cleansing and transformation." - }, - { - "name": "BigQuery", - "plugin": { - "name": "BigQueryTable", - "type": "batchsink", - "label": "BigQuery", - "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "useConnection": "false", - "project": "auto-detect", - "serviceAccountType": "filePath", - "serviceFilePath": "auto-detect", - "dataset": "Wrangler", - "table": "excelupds", - "operation": "insert", - "truncateTable": "false", - "allowSchemaRelaxation": "false", - "location": "US", - "createPartitionedTable": "false", - "partitioningType": "TIME", - "partitionFilterRequired": "false", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" - } - }, - "outputSchema": [ - { - "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "inputSchema": [ - { - "name": "Wrangler", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "id": "BigQuery", - "type": "batchsink", - "label": "BigQuery", - "icon": "fa-plug", - "$$hashKey": "object:419", - "isPluginAvailable": true, - "_uiPosition": { - "left": "1096px", - "top": "343px" - }, - "selected": false - } - ], - "schedule": "0 1 */1 * *", - "engine": "spark", - "numOfRecordsPreview": 100, - "rangeRecordsPreview": { - "min": 1, - "max": "5000" - }, - "description": "Data Pipeline Application", - "maxConcurrentRuns": 1, - "pushdownEnabled": false, - "transformationPushdown": {} - }, - "version": "5b46b464-4f37-11ee-9dbc-000000d45dd0" -} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json new file mode 100644 index 000000000..614f18fdf --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json @@ -0,0 +1,186 @@ +{ + "name": "parse_excel_wrangler_copy", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCSFile", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "GCSFile", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCSFile", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "format": "blob", + "path": "gs://00000000-e2e-0014a44f-81be-4501-8360-0ddca1c39789/test1.xlsx", + "fileEncoding": "UTF-8", + "useConnection": "false", + "referenceName": "test", + "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "sampleSize": "1000", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "encrypted": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}" + } + ], + "id": "GCSFile", + "type": "batchsource", + "label": "GCSFile", + "icon": "fa-plug", + "$$hashKey": "object:475", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-excel :body '0' true\ncopy name copiedname\nmerge name bkd uniquenum ','\nrename bkd rollno\ndrop fwd\nswap id rollno\nsplit-to-rows :name 'o'\nfilter-rows-on condition-false rollno !~ '2.0'", + "field": "*", + "precondition": "false", + "workspaceId": "667f9e85-6c36-4d38-ad48-ef85db7a04a2", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "GCSFile", + "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:476", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + } + }, + { + "name": "BigQuery", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "extab34", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery", + "type": "batchsink", + "label": "BigQuery", + "icon": "fa-plug", + "$$hashKey": "object:477", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "2dd12daa-5395-11ee-9dac-000000d0cf32" +} \ No newline at end of file