From 9c57d249bc6e388221af55e6005ccb00d81321fb Mon Sep 17 00:00:00 2001
From: bijay27bit <root@google.com>
Date: Thu, 12 Dec 2024 12:23:30 +0000
Subject: [PATCH] All review comments are done.

---
 .../sink/BigQueryToGCSSink_WithMacro.feature  |  76 +++++++++++
 .../features/gcs/sink/GCSSink.feature         | 119 +++++++++++++++++-
 .../features/gcs/sink/GCSSinkError.feature    |  36 ++++++
 .../resources/errorMessage.properties         |   2 +-
 .../resources/pluginParameters.properties     |   7 ++
 5 files changed, 238 insertions(+), 2 deletions(-)
 create mode 100644 src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature

diff --git a/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature b/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature
new file mode 100644
index 0000000000..83d5d2d65b
--- /dev/null
+++ b/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature
@@ -0,0 +1,76 @@
+@GCS_Sink
+Feature: GCS sink - Verification of GCS Sink plugin macro scenarios
+
+  @BQ_SOURCE_DATATYPE_TEST @GCS_SINK_TEST
+  Scenario:Validate successful records transfer from BigQuery to GCS sink with macro fields
+    Given Open Datafusion Project to configure pipeline
+    Then Select plugin: "BigQuery" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Open BigQuery source properties
+    Then Enter BigQuery property reference name
+    Then Enter BigQuery property projectId "projectId"
+    Then Enter BigQuery property datasetProjectId "projectId"
+    Then Override Service account details if set in environment variables
+    Then Enter BigQuery property dataset "dataset"
+    Then Enter BigQuery source property table name
+    Then Validate output schema with expectedSchema "bqSourceSchemaDatatype"
+    Then Validate "BigQuery" plugin properties
+    Then Close the BigQuery properties
+    Then Open GCS sink properties
+    Then Override Service account details if set in environment variables
+    Then Enter the GCS sink mandatory properties
+    Then Enter GCS property "projectId" as macro argument "gcsProjectId"
+    Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType"
+    Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount"
+    Then Enter GCS property "path" as macro argument "gcsSinkPath"
+    Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix"
+    Then Enter GCS property "format" as macro argument "gcsFormat"
+    Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader"
+    Then Click on the Macro button of Property: "location" and set the value to: "gcsSinkLocation"
+    Then Click on the Macro button of Property: "contentType" and set the value to: "gcsContentType"
+    Then Click on the Macro button of Property: "outputFileNameBase" and set the value to: "OutFileNameBase"
+    Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr"
+    Then Validate "GCS" plugin properties
+    Then Close the GCS properties
+    Then Connect source as "BigQuery" and sink as "GCS" to establish connection
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Enter runtime argument value "projectId" for key "gcsProjectId"
+    Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType"
+    Then Enter runtime argument value "serviceAccount" for key "serviceAccount"
+    Then Enter runtime argument value for GCS sink property path key "gcsSinkPath"
+    Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix"
+    Then Enter runtime argument value "jsonFormat" for key "gcsFormat"
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Enter runtime argument value "contentType" for key "gcsContentType"
+    Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation"
+    Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase"
+    Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr"
+    Then Run the preview of pipeline with runtime arguments
+    Then Wait till pipeline preview is in running state
+    Then Open and capture pipeline preview logs
+    Then Verify the preview run status of pipeline in the logs is "succeeded"
+    Then Close the pipeline logs
+    Then Click on preview data for GCS sink
+    Then Verify preview output schema matches the outputSchema captured in properties
+    Then Close the preview data
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Enter runtime argument value "projectId" for key "gcsProjectId"
+    Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType"
+    Then Enter runtime argument value "serviceAccount" for key "serviceAccount"
+    Then Enter runtime argument value for GCS sink property path key "gcsSinkPath"
+    Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix"
+    Then Enter runtime argument value "jsonFormat" for key "gcsFormat"
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Enter runtime argument value "contentType" for key "gcsContentType"
+    Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation"
+    Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase"
+    Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr"
+    Then Run the Pipeline in Runtime with runtime arguments
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Verify data is transferred to target GCS bucket
+    Then Validate the values of records transferred to GCS bucket is equal to the values from source BigQuery table
diff --git a/src/e2e-test/features/gcs/sink/GCSSink.feature b/src/e2e-test/features/gcs/sink/GCSSink.feature
index 563a896e87..7b76695934 100644
--- a/src/e2e-test/features/gcs/sink/GCSSink.feature
+++ b/src/e2e-test/features/gcs/sink/GCSSink.feature
@@ -95,7 +95,7 @@ Feature: GCS sink - Verification of GCS Sink plugin
       | parquet    | application/octet-stream  |
       | orc        | application/octet-stream  |
 
-  @GCS_SINK_TEST @BQ_SOURCE_TEST
+  @BQ_SOURCE_TEST @GCS_SINK_TEST
   Scenario Outline: To verify data is getting transferred successfully from BigQuery to GCS with combinations of contenttype
     Given Open Datafusion Project to configure pipeline
     When Source is BigQuery
@@ -265,3 +265,120 @@ Feature: GCS sink - Verification of GCS Sink plugin
     Then Open and capture logs
     Then Verify the pipeline status is "Succeeded"
     Then Verify data is transferred to target GCS bucket
+
+  @BQ_SOURCE_TEST @GCS_SINK_TEST
+  Scenario Outline: To verify data is getting transferred successfully from BigQuery to GCS with contenttype selection
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "BigQuery" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Connect source as "BigQuery" and sink as "GCS" to establish connection
+    Then Open BigQuery source properties
+    Then Enter the BigQuery source mandatory properties
+    Then Validate "BigQuery" plugin properties
+    Then Close the BigQuery properties
+    Then Open GCS sink properties
+    Then Enter GCS property projectId and reference name
+    Then Enter GCS sink property path
+    Then Select GCS property format "<FileFormat>"
+    Then Select GCS sink property contentType "<contentType>"
+    Then Validate "GCS" plugin properties
+    Then Close the GCS properties
+    Then Save and Deploy Pipeline
+    Then Run the Pipeline in Runtime
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Verify data is transferred to target GCS bucket
+    Examples:
+      | FileFormat | contentType |
+      | csv        | text/csv    |
+      | tsv        | text/plain  |
+
+  @BQ_SOURCE_DATATYPE_TEST @GCS_SINK_TEST
+  Scenario:Validate successful records transfer from BigQuery to GCS with advanced file system properties field
+    Given Open Datafusion Project to configure pipeline
+    Then Select plugin: "BigQuery" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Open BigQuery source properties
+    Then Enter BigQuery property reference name
+    Then Enter BigQuery property projectId "projectId"
+    Then Enter BigQuery property datasetProjectId "projectId"
+    Then Override Service account details if set in environment variables
+    Then Enter BigQuery property dataset "dataset"
+    Then Enter BigQuery source property table name
+    Then Validate output schema with expectedSchema "bqSourceSchemaDatatype"
+    Then Validate "BigQuery" plugin properties
+    Then Close the BigQuery properties
+    Then Open GCS sink properties
+    Then Override Service account details if set in environment variables
+    Then Enter the GCS sink mandatory properties
+    Then Enter GCS File system properties field "gcsCSVFileSysProperty"
+    Then Validate "GCS" plugin properties
+    Then Close the GCS properties
+    Then Connect source as "BigQuery" and sink as "GCS" to establish connection
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Wait till pipeline preview is in running state
+    Then Open and capture pipeline preview logs
+    Then Verify the preview run status of pipeline in the logs is "succeeded"
+    Then Close the pipeline logs
+    Then Click on preview data for GCS sink
+    Then Verify preview output schema matches the outputSchema captured in properties
+    Then Close the preview data
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Verify data is transferred to target GCS bucket
+    Then Validate the values of records transferred to GCS bucket is equal to the values from source BigQuery table
+
+  @GCS_AVRO_FILE @GCS_SINK_TEST @GCS_Source_Required
+  Scenario Outline: To verify data transferred successfully from GCS Source to GCS Sink with write header true at Sink
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "GCS" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Connect plugins: "GCS" and "GCS2" to establish connection
+    Then Navigate to the properties page of plugin: "GCS"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Override Service account details if set in environment variables
+    Then Enter input plugin property: "referenceName" with value: "sourceRef"
+    Then Enter GCS source property path "gcsAvroAllDataFile"
+    Then Select GCS property format "avro"
+    Then Click on the Get Schema button
+    Then Verify the Output Schema matches the Expected Schema: "gcsAvroAllTypeDataSchema"
+    Then Validate "GCS" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "GCS2"
+    Then Enter GCS property projectId and reference name
+    Then Enter GCS sink property path
+    Then Select GCS property format "<FileFormat>"
+    Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader"
+    Then Validate "GCS" plugin properties
+    Then Close the GCS properties
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Run the preview of pipeline with runtime arguments
+    Then Wait till pipeline preview is in running state
+    Then Open and capture pipeline preview logs
+    Then Verify the preview run status of pipeline in the logs is "succeeded"
+    Then Close the pipeline logs
+    Then Close the preview
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Run the Pipeline in Runtime with runtime arguments
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Verify data is transferred to target GCS bucket
+    Then Validate the data from GCS Source to GCS Sink with expected csv file and target data in GCS bucket
+    Examples:
+      | FileFormat |
+      | csv        |
+      | tsv        |
+      | delimited  |
diff --git a/src/e2e-test/features/gcs/sink/GCSSinkError.feature b/src/e2e-test/features/gcs/sink/GCSSinkError.feature
index 0718136d4a..723e2d7ed7 100644
--- a/src/e2e-test/features/gcs/sink/GCSSinkError.feature
+++ b/src/e2e-test/features/gcs/sink/GCSSinkError.feature
@@ -65,3 +65,39 @@ Feature: GCS sink - Verify GCS Sink plugin error scenarios
     Then Select GCS property format "csv"
     Then Click on the Validate button
     Then Verify that the Plugin Property: "format" is displaying an in-line error message: "errorMessageInvalidFormat"
+
+  @BQ_SOURCE_TEST @GCS_SINK_TEST
+  Scenario: To verify and validate the Error message in pipeline logs after deploy with invalid bucket path
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "BigQuery" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Connect source as "BigQuery" and sink as "GCS" to establish connection
+    Then Open BigQuery source properties
+    Then Enter the BigQuery source mandatory properties
+    Then Validate "BigQuery" plugin properties
+    Then Close the BigQuery properties
+    Then Open GCS sink properties
+    Then Enter GCS property projectId and reference name
+    Then Enter GCS property "path" as macro argument "gcsSinkPath"
+    Then Select GCS property format "csv"
+    Then Click on the Validate button
+    Then Close the GCS properties
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath"
+    Then Run the preview of pipeline with runtime arguments
+    Then Wait till pipeline preview is in running state
+    Then Open and capture pipeline preview logs
+    Then Close the pipeline logs
+    Then Close the preview
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath"
+    Then Run the Pipeline in Runtime with runtime arguments
+    Then Wait till pipeline is in running state
+    Then Verify the pipeline status is "Failed"
+    Then Open Pipeline logs and verify Log entries having below listed Level and Message:
+      | Level | Message                           |
+      | ERROR | errorMessageInvalidBucketNameSink |
+    Then Close the pipeline logs
\ No newline at end of file
diff --git a/src/e2e-test/resources/errorMessage.properties b/src/e2e-test/resources/errorMessage.properties
index 45ce14b7de..6ecb9cc5e0 100644
--- a/src/e2e-test/resources/errorMessage.properties
+++ b/src/e2e-test/resources/errorMessage.properties
@@ -34,4 +34,4 @@ errorMessageMultipleFileWithoutClearDefaultSchema=Found a row with 4 fields when
 errorMessageInvalidSourcePath=Invalid bucket name in path 'abc@'. Bucket name should
 errorMessageInvalidDestPath=Invalid bucket name in path 'abc@'. Bucket name should
 errorMessageInvalidEncryptionKey=CryptoKeyName.parse: formattedString not in valid format: Parameter "abc@" must be
-
+errorMessageInvalidBucketNameSink=Unable to read or access GCS bucket.
diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties
index b89e3c9515..00532757bb 100644
--- a/src/e2e-test/resources/pluginParameters.properties
+++ b/src/e2e-test/resources/pluginParameters.properties
@@ -175,6 +175,13 @@ encryptedMetadataSuffix=.metadata
 gcsPathFieldOutputSchema={ "type": "record", "name": "text", "fields": [ \
   { "name": "EmployeeDepartment", "type": "string" }, { "name": "Employeename", "type": "string" }, \
   { "name": "Salary", "type": "int" }, { "name": "wotkhours", "type": "int" }, { "name": "pathFieldColumn", "type": "string" } ] }
+gcsInvalidBucketNameSink=ggg
+writeHeader=true
+gcsSinkBucketLocation=US
+contentType=application/octet-stream
+outputFileNameBase=part
+gcsCSVFileSysProperty={"csvinputformat.record.csv": "1"}
+jsonFormat=json
 ## GCS-PLUGIN-PROPERTIES-END
 
 ## BIGQUERY-PLUGIN-PROPERTIES-START