diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/DataTypeParsers.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/DataTypeParsers.feature new file mode 100644 index 000000000..e361ec590 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/DataTypeParsers.feature @@ -0,0 +1,121 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Runtime Scenarios for datatype parsers + + @BQ_SOURCE_TS_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse timestamp directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "update_date" and apply directive: "Parse" as "SIMPLEDATE" with: "yyyy-MM-dd" option + Then Expand dropdown column: "create_date" and apply directive: "Parse" as "SIMPLEDATE" with: "yyyy-MM-dd" option + Then Enter directive from CLI "parse-timestamp :time" + Then Enter directive from CLI "parse-as-currency :price :newprice" + Then Enter directive from CLI "format-as-currency :newprice :format_price" + Then Enter directive from CLI "diff-date :create_date :update_date :diff_date" + Then Enter directive from CLI "timestamp-to-datetime :update_date" + Then Enter directive from CLI "rename :newprice :id" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_Timestamp" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully + + + @BQ_SOURCE_DATETIME_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse datetime directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "timestamp" and apply directive: "Parse" with directive type: "DATETIME" and select: "Custom_Format" and enter: "yyyy-MM-dd'T'HH:mm:ssX'['z']'" + Then Enter directive from CLI "current-datetime :create_date" + Then Enter directive from CLI "datetime-to-timestamp :timestamp" + Then Enter directive from CLI "format-datetime :create_date 'y'" + Then Enter directive from CLI "format-date :timestamp yyyy-mm-dd" + Then Enter directive from CLI "rename timestamp timecolumn" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_DatetimeNew" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsCsv.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsCsv.feature new file mode 100644 index 000000000..43e661071 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsCsv.feature @@ -0,0 +1,71 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios for parse csv using UI + + @BQ_SOURCE_CSV_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse csv directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "body" and apply directive: "Parse" as "CSV" with: "Comma" option + Then Expand dropdown column: "body_3" and apply directive: "FillNullOrEmptyCells" as "shubh" + Then Enter directive from CLI "rename body_1 new_id" + Then Enter directive from CLI "quantize body_4 body_q 1:2=20,3:4=40" + Then Expand dropdown column: "body_4" and apply directive: "ChangeDataType" as "Integer" + Then Enter directive from CLI "columns-replace s/^new_//g" + Then Enter directive from CLI "set-headers :abc" + Then Enter directive from CLI "change-column-case uppercase" + Then Enter directive from CLI "cleanse-column-names " + Then Enter directive from CLI "split-to-rows :id '#'" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_csv" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsExcel.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsExcel.feature new file mode 100644 index 000000000..2e6a11ba8 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsExcel.feature @@ -0,0 +1,70 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Parse as excel + + @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse Excel directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTableExcel" + Then Verify connection datatable is displayed for the data: "bqSourceTableExcel" + Then Enter directive from CLI "parse-as-excel :body '0' true" + Then Expand dropdown column: "name" and apply directive: "CopyColumn" as "copiedname" + Then Enter directive from CLI "merge name bkd uniquenum ','" + Then Enter directive from CLI "rename bkd rollno" + Then Expand dropdown column: "fwd" and apply directive: "DeleteColumn" + Then Select checkbox on two columns: "id" and "rollno" + Then Expand dropdown column: "id" and apply directive: "SwapTwoColumnNames" + Then Enter directive from CLI "split-to-rows :name 'o'" + Then Enter directive from CLI "filter-rows-on condition-false rollno !~ '2.0'" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_excel" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsFixedLength.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsFixedLength.feature new file mode 100644 index 000000000..90a9cd17b --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsFixedLength.feature @@ -0,0 +1,71 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as fixed length + + @BQ_SOURCE_FXDLEN_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse fixedlength directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "fixedlength" and apply directive: "Parse" as "FIXEDLENGTH" with: "2,4,5,3" option + Then Enter directive from CLI "split-url url" + Then Enter directive from CLI "write-as-csv :url_protocol" + Then Enter directive from CLI "url-encode :url" + Then Enter directive from CLI "url-decode :url" + Then Expand dropdown column: "fixedlength" and apply directive: "Encode" as "Base32" + Then Expand dropdown column: "fixedlength_encode_base32" and apply directive: "Decode" as "Base32" + Then Enter directive from CLI "split-to-columns :url_query '='" + Then Enter directive from CLI "rename fixedlength_2 id" + Then Enter directive from CLI "filter-rows-on condition-true fixedlength_4 !~ 'XYZ'" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_FixedLengthnew" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsHl7.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsHl7.feature new file mode 100644 index 000000000..337978e83 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsHl7.feature @@ -0,0 +1,69 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as HL7 + + @BQ_SOURCE_HL7_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse hl7 directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "Body" and apply directive: "Parse" as "HL7" + Then Expand dropdown column: "Body" and apply directive: "Hash" as "MD5" + Then Enter directive from CLI "set-type :Body string" + Then Enter directive from CLI "keep address,Body,Body_hl7_MSH_12,Body_hl7_MSH_9_1" + Then Expand dropdown column: "address" and apply directive: "FindAndReplace" and select: "address1" and enter: "test" + Then Expand dropdown column: "Body_hl7_MSH_9_1" and apply directive: "MaskData" as "By_shuffling" + Then Expand dropdown column: "address" and apply directive: "SendToError" as "value_is_empty" + Then Enter directive from CLI "rename :Body_hl7_MSH_12 :id " + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_hl7_new" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsJson.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsJson.feature new file mode 100644 index 000000000..d4a395d5a --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsJson.feature @@ -0,0 +1,72 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as Json + + @BQ_SOURCE_JSON_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse Json directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "json" and apply directive: "Parse" as "JSON" with: "1" option + Then Expand dropdown column: "body" and apply directive: "Format" as "Trim_leading_whitespace" + Then Enter directive from CLI "set-column :desc concat(json_pet,body)" + Then Expand dropdown column: "json_name" and apply directive: "CopyColumn" as "copied" + Then Select checkbox on two columns: "json_id" and "json_age" + Then Expand dropdown column: "json_id" and apply directive: "SwapTwoColumnNames" + Then Enter directive from CLI "merge :json_id :json_name :json_id_json_name ," + Then Enter directive from CLI "mask-number :json_pet 'testing'" + Then Expand dropdown column: "json_height" and apply directive: "DeleteColumn" + Then Enter directive from CLI "write-as-json-map :json_age" + Then Enter directive from CLI "rename json_id id" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_json" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsLog.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsLog.feature new file mode 100644 index 000000000..911f1f833 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsLog.feature @@ -0,0 +1,74 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios for Parse Log + + @BQ_SOURCE_LOG_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse log directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "body" and apply directive: "Parse" as "LOG" with: "Common" option + Then Expand dropdown column: "number_connection_client_logname_last" and apply directive: "DeleteColumn" + Then Expand dropdown column: "number_connection_client_logname" and apply directive: "DeleteColumn" + Then Expand dropdown column: "http_querystring_request_firstline_uri_query" and apply directive: "DeleteColumn" + Then Expand dropdown column: "http_ref_request_firstline_uri_ref" and apply directive: "FillNullOrEmptyCells" as "no value" + Then Enter directive from CLI "filter-rows-on condition-false ip_connection_client_host =$ '1'" + Then Enter directive from CLI "filter-rows-on regex-match string_connection_client_user_last ^Tryck$" + Then Enter directive from CLI "extract-regex-groups :http_firstline_request_firstline GET*" + Then Enter directive from CLI "split-to-columns :ip_connection_client_host ," + Then Enter directive from CLI "catalog-lookup ICD-9 :Body" + Then Expand dropdown column: "Body_icd_9_description" and apply directive: "FillNullOrEmptyCells" as "Body Post Catalog lookup" + Then Enter directive from CLI "set-variable Pass string_request_status_last == 200 ? string_request_status_last : Pass" + Then Expand dropdown column: "time_day_request_receive_time_last_day" and apply directive: "ChangeDataType" as "Integer" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_log" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully diff --git a/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsXmlToJson.feature b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsXmlToJson.feature new file mode 100644 index 000000000..b71bdd519 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/WranglerUI/ParseAsXmlToJson.feature @@ -0,0 +1,69 @@ +# Copyright © 2024 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as XmlToJson + + @BQ_SOURCE_XML_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse XmlToJson directive + Given Open Wrangler connections page + Then Click plugin property: "addConnection" button + Then Click plugin property: "bqConnectionRow" + Then Enter input plugin property: "name" with value: "bqConnectionName" + Then Replace input plugin property: "projectId" with value: "projectId" + Then Enter input plugin property: "datasetProjectId" with value: "projectId" + Then Override Service account details in Wrangler connection page if set in environment variables + Then Click plugin property: "testConnection" button + Then Verify the test connection is successful + Then Click plugin property: "connectionCreate" button + Then Verify the connection with name: "bqConnectionName" is created successfully + Then Select connection data row with name: "dataset" + Then Select connection data row with name: "bqSourceTable" + Then Verify connection datatable is displayed for the data: "bqSourceTable" + Then Expand dropdown column: "xmldata" and apply directive: "Parse" as "XMLTOJSON" with: "1" option + Then Enter directive from CLI "split-email :email" + Then Enter directive from CLI "text-distance block email email_account distance" + Then Enter directive from CLI "text-metric longest-common-subsequence email email_account distance2" + Then Enter directive from CLI "write-as-json-object :email_domain distance,email_account" + Then Enter directive from CLI "stemming :email" + Then Enter directive from CLI "split-to-rows :email_account '0'" + Then Enter directive from CLI "rename :email_account id" + Then Click Create Pipeline button and choose the type of pipeline as: "Batch pipeline" + Then Verify plugin: "BigQueryTable" node is displayed on the canvas with a timeout of 120 seconds + Then Expand Plugin group in the LHS plugins list: "Sink" + Then Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Click plugin property: "useConnection" + Then Click on the Browse Connections button + Then Select connection: "bqConnectionName" + Then Enter input plugin property: "referenceName" with value: "BQSinkReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqTargetTable" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Connect plugins: "Wrangler" and "BigQuery2" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_xml" + Given Open Wrangler connections page + Then Expand connections of type: "BigQuery" + Then Open action menu for connection: "bqConnectionName" of type: "BigQuery" + Then Select action: "Delete" for connection: "bqConnectionName" of type: "BigQuery" + Then Click plugin property: "Delete" button + Then Verify connection: "bqConnectionName" of type: "BigQuery" is deleted successfully diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java index 87b0d1aec..a7cf69a0c 100644 --- a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java @@ -26,7 +26,7 @@ @CucumberOptions( features = {"src/e2e-test/features"}, glue = {"stepsdesign", "io.cdap.plugin.common.stepsdesign", "io.cdap.plugin.wrangler.stepsdesign", - "io.cdap.plugin.wrangler.locators"}, + "io.cdap.plugin.wrangler.actions"}, tags = {"@Wrangler"}, plugin = {"pretty", "html:target/cucumber-html-report/wrangler-required", "json:target/cucumber-reports/cucumber-wrangler-required.json", diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_datetimenew b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_datetimenew new file mode 100644 index 000000000..10bc48764 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_datetimenew @@ -0,0 +1,3 @@ +{"create_date":"2024","id":1,"timecolumn":"2006-03-18"} +{"create_date":"2024","id":2,"timecolumn":"2007-03-18"} +{"create_date":"2024","id":3,"timecolumn":"2008-04-19"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlengthnew b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlengthnew new file mode 100644 index 000000000..33010a877 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlengthnew @@ -0,0 +1,2 @@ +{"Url":"http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"21 10 ABCXYZ","fixedlength_1":"21","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GIYSAIBRGAQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"21 10 ABCXYZ","id":" 10","url_authority":"example.com:80","url_filename":"/docs/books/tutorial/index.html?name=networking","url_host":"example.com","url_path":"/docs/books/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} +{"Url":"http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"19 13 ABCXYZ","fixedlength_1":"19","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GE4SAIBRGMQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"19 13 ABCXYZ","id":" 13","url_authority":"geeks.com:80","url_filename":"/docs/chair/tutorial/index.html?name=networking","url_host":"geeks.com","url_path":"/docs/chair/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7new b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7new new file mode 100644 index 000000000..17724c0aa --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7new @@ -0,0 +1,2 @@ +{"Body":"000000000000000000000000000000007382871179b358959d06ed48f27fa3e9","Body_hl7_MSH_9_1":"ALM","address":"test","id":"3"} +{"Body":"00000000000000000000000000000000463cb0c31cb787f0d6234ae8d15e983a","Body_hl7_MSH_9_1":"BLM","address":"address2","id":"4"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/pluginDataCyAttributes.properties b/wrangler-transform/src/e2e-test/resources/pluginDataCyAttributes.properties new file mode 100644 index 000000000..d3890e916 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/pluginDataCyAttributes.properties @@ -0,0 +1,47 @@ +projectId=project +datasetProjectId=datasetProject +referenceName=referenceName +table=table +tableKey=relationTableKey +clusterOrder=clusteringOrder +dataset=dataset +skipHeader=switch-skipHeader +path=path +name=name +truncateTable=switch-truncateTable +truncateTableMacroInput=truncateTable +updateTableSchema=switch-allowSchemaRelaxation +updateTableSchemaMacroInput=allowSchemaRelaxation +format=select-format +formatMacroInput=format +requirePartitionFilter=switch-requirePartitionField +requirePartitionFilterMacroInput=requirePartitionField +partitioningType=partitioningType +partitionStartDate=partitionFrom +partitionEndDate=partitionTo +filter=filter +instanceId=instance +databaseName=database +tableName=table +sql=sql +jobLocation=location +storeResultsInBigQueryTable=switch-storeResults +rowAsArguments=switch-rowAsArguments +serviceAccountType=serviceAccountType +serviceAccountFilePath=serviceFilePath +serviceAccountJSON=serviceAccountJSON +outputSchemaMacroInput=Output Schema-macro-input +flexibleSchema=switch-allowFlexibleSchema +updateSchema=allowSchemaRelaxation + +## CONNECTION-MANAGEMENT-START +connection=connection +useConnection=switch-useConnection +addConnection=add-connection-button +gcsConnectionRow=connector-GCS +bqConnectionRow=connector-BigQuery +spannerConnectionRow=connector-Spanner +testConnection=connection-test-button +connectionCreate=connection-submit-button +parsingOptionConfirm=parsing-config-confirm +## CONNECTION-MANAGEMENT-END diff --git a/wrangler-transform/src/e2e-test/resources/pluginDataTestIdAttributes.properties b/wrangler-transform/src/e2e-test/resources/pluginDataTestIdAttributes.properties new file mode 100644 index 000000000..fe2cbde25 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/pluginDataTestIdAttributes.properties @@ -0,0 +1,64 @@ +ChangeDataType=changeDataType +SendToError=markAsError +SwapTwoColumnNames=swapColumns +DeleteColumn=dropColumn +Hash=hash +Parse=parse +MaskData=maskData +FindAndReplace=findAndReplace +Format=format +Calculate=calculate +CustomTransform=customTransform +Filter=filter +FillNullOrEmptyCells=fillNullOrEmpty +CopyColumn=copyColumn +KeepColumn=keepColumn +ExtractFields=extractFields +Explode=explode +DefineVariable=defineVariable +SetCounter=setCounter +Concatenate=CONCATENATE +Always=ALWAYS +Integer=integer +Comma=COMMA +Common=COMMON +Tab=TAB +Space=SPACE +Pipe=PIPE +CONTROL_A=^A +CONTROL_D=^D +CSV=csv +Avro=AVRO +EXCEL=excel +JSON=singleField +XMLTOJSON=xmlToJson +LOG=log +SIMPLEDATE=dateFormats +DATETIME=dateFormats +FIXEDLENGTH=singleField +HL7=HL7 +Decimal=decimal +lowercase=LOWERCASE +Trim_Whitespace=TRIM_WHITESPACE +Character_count=CHARCOUNT +Using_patterns=patterns +Using_delimiters=delimiters +Delimited_text=delimited +Array_(by_flattening)=arrayFlattening +Record_(by_flattening)=recordFlattening +Show_last_4_characters_only=last4Chars +Show_last_2_characters_only=last2Chars +Custom_selection=customSelection +By_shuffling=shuffling +value_is_empty=EMPTY +value_is=TEXTEXACTLY +value_contains=TEXTCONTAINS +value_starts_with=TEXTSTARTSWITH +Trim_leading_whitespace=TRIM_LEADING_WHITESPACE +Custom_Format=CUSTOM +yyyy-MM-dd=OPTION5 + + + + + diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties index 9396489e4..ea5e3efd3 100644 --- a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -12,6 +12,7 @@ Directive_parse_avro=testData/Wrangler/parseAsAvro-cdap-data-pipeline (1).json Directive_parse_log=testData/Wrangler/parse_log_wrangler_copy-cdap-data-pipeline.json Directive_GroupBy=testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json bqSourceTable=dummy +bqSourceTableExcel=excelTab bqTargetTable=dummy sourcePath=example/hello.csv gcsSourceBucket=dummy @@ -45,6 +46,7 @@ dataset=Wrangler_Test dataset2=Wrangler #expectedBQFiles ExpectedDirective_GroupBy=BQValidationExpectedFiles/Directive_wrangler_GroupBy +filterEmptyProperty=value is empty ExpectedDirective_parse_FixedLength=BQValidationExpectedFiles/Directive_parse_fixedlength ExpectedDirective_parse_hl7=BQValidationExpectedFiles/Directive_parse_hl7 @@ -56,3 +58,6 @@ ExpectedDirective_parse_excel=BQValidationExpectedFiles/Directive_parse_excel ExpectedDirective_parse_csv=BQValidationExpectedFiles/Directive_parse_csv ExpectedDirective_parse_avro=BQValidationExpectedFiles/Directive_parse_avro ExpectedDirective_parse_log=BQValidationExpectedFiles/Directive_parse_log +ExpectedDirective_parse_hl7_new=BQValidationExpectedFiles/Directive_parse_hl7new +ExpectedDirective_parse_DatetimeNew=BQValidationExpectedFiles/Directive_parse_datetimenew +ExpectedDirective_parse_FixedLengthnew=BQValidationExpectedFiles/Directive_parse_fixedlengthnew