From 8ef163175e90a597a27c349c34f4b63a1ab7c86b Mon Sep 17 00:00:00 2001 From: bijay27bit Date: Thu, 12 Dec 2024 12:23:30 +0000 Subject: [PATCH] All review comments are incorporated. --- .../sink/BigQueryToGCSSink_WithMacro.feature | 76 +++++++++++++++++++ .../features/gcs/sink/GCSSink.feature | 52 ++++++++++++- .../features/gcs/sink/GCSSinkError.feature | 36 +++++++++ .../resources/errorMessage.properties | 2 +- .../resources/pluginParameters.properties | 8 +- 5 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature diff --git a/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature b/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature new file mode 100644 index 0000000000..83d5d2d65b --- /dev/null +++ b/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature @@ -0,0 +1,76 @@ +@GCS_Sink +Feature: GCS sink - Verification of GCS Sink plugin macro scenarios + + @BQ_SOURCE_DATATYPE_TEST @GCS_SINK_TEST + Scenario:Validate successful records transfer from BigQuery to GCS sink with macro fields + Given Open Datafusion Project to configure pipeline + Then Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" + Then Open BigQuery source properties + Then Enter BigQuery property reference name + Then Enter BigQuery property projectId "projectId" + Then Enter BigQuery property datasetProjectId "projectId" + Then Override Service account details if set in environment variables + Then Enter BigQuery property dataset "dataset" + Then Enter BigQuery source property table name + Then Validate output schema with expectedSchema "bqSourceSchemaDatatype" + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Override Service account details if set in environment variables + Then Enter the GCS sink mandatory properties + Then Enter GCS property "projectId" as macro argument "gcsProjectId" + Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter GCS property "path" as macro argument "gcsSinkPath" + Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix" + Then Enter GCS property "format" as macro argument "gcsFormat" + Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader" + Then Click on the Macro button of Property: "location" and set the value to: "gcsSinkLocation" + Then Click on the Macro button of Property: "contentType" and set the value to: "gcsContentType" + Then Click on the Macro button of Property: "outputFileNameBase" and set the value to: "OutFileNameBase" + Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "jsonFormat" for key "gcsFormat" + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Enter runtime argument value "contentType" for key "gcsContentType" + Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" + Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" + Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Click on preview data for GCS sink + Then Verify preview output schema matches the outputSchema captured in properties + Then Close the preview data + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "jsonFormat" for key "gcsFormat" + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Enter runtime argument value "contentType" for key "gcsContentType" + Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" + Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" + Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + Then Validate the values of records transferred to GCS bucket is equal to the values from source BigQuery table diff --git a/src/e2e-test/features/gcs/sink/GCSSink.feature b/src/e2e-test/features/gcs/sink/GCSSink.feature index 563a896e87..068aa8d6fc 100644 --- a/src/e2e-test/features/gcs/sink/GCSSink.feature +++ b/src/e2e-test/features/gcs/sink/GCSSink.feature @@ -95,7 +95,7 @@ Feature: GCS sink - Verification of GCS Sink plugin | parquet | application/octet-stream | | orc | application/octet-stream | - @GCS_SINK_TEST @BQ_SOURCE_TEST + @BQ_SOURCE_TEST @GCS_SINK_TEST Scenario Outline: To verify data is getting transferred successfully from BigQuery to GCS with combinations of contenttype Given Open Datafusion Project to configure pipeline When Source is BigQuery @@ -265,3 +265,53 @@ Feature: GCS sink - Verification of GCS Sink plugin Then Open and capture logs Then Verify the pipeline status is "Succeeded" Then Verify data is transferred to target GCS bucket + + @GCS_AVRO_FILE @GCS_SINK_TEST + Scenario Outline: To verify data transferred successfully from GCS Source to GCS Sink with datatypes and write header true + Given Open Datafusion Project to configure pipeline + When Select plugin: "GCS" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" + Then Connect plugins: "GCS" and "GCS2" to establish connection + Then Navigate to the properties page of plugin: "GCS" + Then Replace input plugin property: "project" with value: "projectId" + Then Override Service account details if set in environment variables + Then Enter input plugin property: "referenceName" with value: "sourceRef" + Then Enter GCS source property path "gcsAvroAllDataFile" + Then Select GCS property format "avro" + Then Click on the Get Schema button + Then Verify the Output Schema matches the Expected Schema: "gcsAvroAllTypeDataSchema" + Then Validate "GCS" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "GCS2" + Then Enter GCS property projectId and reference name + Then Enter GCS sink property path + Then Select GCS property format "" + Then Select GCS sink property contentType "" + Then Enter GCS File system properties field "gcsCSVFileSysProperty" + Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + Then Validate the data from GCS Source to GCS Sink with expected csv file and target data in GCS bucket + Examples: + | FileFormat | contentType | + | csv | text/csv | + | tsv | text/plain | + | delimited | text/csv | diff --git a/src/e2e-test/features/gcs/sink/GCSSinkError.feature b/src/e2e-test/features/gcs/sink/GCSSinkError.feature index 0718136d4a..723e2d7ed7 100644 --- a/src/e2e-test/features/gcs/sink/GCSSinkError.feature +++ b/src/e2e-test/features/gcs/sink/GCSSinkError.feature @@ -65,3 +65,39 @@ Feature: GCS sink - Verify GCS Sink plugin error scenarios Then Select GCS property format "csv" Then Click on the Validate button Then Verify that the Plugin Property: "format" is displaying an in-line error message: "errorMessageInvalidFormat" + + @BQ_SOURCE_TEST @GCS_SINK_TEST + Scenario: To verify and validate the Error message in pipeline logs after deploy with invalid bucket path + Given Open Datafusion Project to configure pipeline + When Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Open BigQuery source properties + Then Enter the BigQuery source mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Enter GCS property projectId and reference name + Then Enter GCS property "path" as macro argument "gcsSinkPath" + Then Select GCS property format "csv" + Then Click on the Validate button + Then Close the GCS properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Verify the pipeline status is "Failed" + Then Open Pipeline logs and verify Log entries having below listed Level and Message: + | Level | Message | + | ERROR | errorMessageInvalidBucketNameSink | + Then Close the pipeline logs \ No newline at end of file diff --git a/src/e2e-test/resources/errorMessage.properties b/src/e2e-test/resources/errorMessage.properties index 45ce14b7de..6ecb9cc5e0 100644 --- a/src/e2e-test/resources/errorMessage.properties +++ b/src/e2e-test/resources/errorMessage.properties @@ -34,4 +34,4 @@ errorMessageMultipleFileWithoutClearDefaultSchema=Found a row with 4 fields when errorMessageInvalidSourcePath=Invalid bucket name in path 'abc@'. Bucket name should errorMessageInvalidDestPath=Invalid bucket name in path 'abc@'. Bucket name should errorMessageInvalidEncryptionKey=CryptoKeyName.parse: formattedString not in valid format: Parameter "abc@" must be - +errorMessageInvalidBucketNameSink=Unable to read or access GCS bucket. diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties index b89e3c9515..117594cc14 100644 --- a/src/e2e-test/resources/pluginParameters.properties +++ b/src/e2e-test/resources/pluginParameters.properties @@ -109,7 +109,6 @@ gcsDataTypeTest2File=testdata/GCS_DATATYPE_TEST_2.csv gcsReadRecursivePath=testdata/GCS_RECURSIVE_TEST gcsReadWildcardPath=testdata/GCS_WILDCARD_TEST,testdata/GCS_WILDCARD_TEST/test gcsFileSysProperty={"textinputformat.record.delimiter": "@"} -gcsCSVFileSysProperty={"csvinputformat.record.csv": "1"} gcsDatatypeChange=[{"key":"createddate","value":"datetime"},{"key":"revenue","value":"double"},\ {"key":"points","value":"decimal"},{"key":"BytesData","value":"bytes"}] gcsDataTypeTestFileSchema=[{"key":"id","value":"int"},{"key":"name","value":"string"},\ @@ -175,6 +174,13 @@ encryptedMetadataSuffix=.metadata gcsPathFieldOutputSchema={ "type": "record", "name": "text", "fields": [ \ { "name": "EmployeeDepartment", "type": "string" }, { "name": "Employeename", "type": "string" }, \ { "name": "Salary", "type": "int" }, { "name": "wotkhours", "type": "int" }, { "name": "pathFieldColumn", "type": "string" } ] } +gcsInvalidBucketNameSink=ggg +writeHeader=true +gcsSinkBucketLocation=US +contentType=application/octet-stream +outputFileNameBase=part +gcsCSVFileSysProperty={"csvinputformat.record.csv": "1"} +jsonFormat=json ## GCS-PLUGIN-PROPERTIES-END ## BIGQUERY-PLUGIN-PROPERTIES-START