From bd99fdee5c12bdccea46a8c2e53eb09eee3a7d6c Mon Sep 17 00:00:00 2001
From: Archana Gupta <arcgupta@google.com>
Date: Tue, 30 Apr 2024 16:40:19 +0530
Subject: [PATCH 01/70] Modified parameters for all templates assigned

---
 .../teleport/bigtable/BigtableToAvro.java     | 14 +++----
 .../teleport/spanner/ExportPipeline.java      | 39 ++++++------------
 .../teleport/spanner/ImportPipeline.java      | 30 +++++---------
 .../teleport/templates/PubsubToPubsub.java    | 11 ++---
 .../teleport/templates/TextToPubsub.java      |  6 +--
 .../options/AstraDbToBigQueryOptions.java     | 18 ++++----
 .../SpannerChangeStreamsToGcsOptions.java     | 41 ++++++-------------
 .../v2/templates/PubSubToBigQuery.java        | 10 ++---
 .../v2/options/JdbcToBigQueryOptions.java     | 39 +++++++-----------
 .../teleport/v2/templates/JmsToPubsub.java    | 15 ++++---
 10 files changed, 79 insertions(+), 144 deletions(-)
diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
index 77bf83a85f..f9ede3e24b 100644
--- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
+++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
@@ -78,8 +78,7 @@ public interface Options extends PipelineOptions {
         order = 1,
         description = "Project ID",
         helpText =
-            "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to"
-                + " read data from")
+            "The ID of the Google Cloud project of the Bigtable instance that you want to read data from.")
     ValueProvider<String> getBigtableProjectId();
 
     @SuppressWarnings("unused")
@@ -89,7 +88,7 @@ public interface Options extends PipelineOptions {
         order = 2,
         regexes = {"[a-z][a-z0-9\\-]+[a-z0-9]"},
         description = "Instance ID",
-        helpText = "The ID of the Cloud Bigtable instance that contains the table")
+        helpText = "The ID of the Bigtable instance that contains the table.")
     ValueProvider<String> getBigtableInstanceId();
 
     @SuppressWarnings("unused")
@@ -99,7 +98,7 @@ public interface Options extends PipelineOptions {
         order = 3,
         regexes = {"[_a-zA-Z0-9][-_.a-zA-Z0-9]*"},
         description = "Table ID",
-        helpText = "The ID of the Cloud Bigtable table to read")
+        helpText = "The ID of the Bigtable table to export.")
     ValueProvider<String> getBigtableTableId();
 
     @SuppressWarnings("unused")
@@ -109,9 +108,8 @@ public interface Options extends PipelineOptions {
         order = 4,
         description = "Output file directory in Cloud Storage",
         helpText =
-            "The path and filename prefix for writing output files. Must end with a slash. DateTime"
-                + " formatting is used to parse directory path for date & time formatters.",
-        example = "gs://your-bucket/your-path")
+            "The Cloud Storage path where data is written.",
+        example = "gs://mybucket/somefolder")
     ValueProvider<String> getOutputDirectory();
 
     @SuppressWarnings("unused")
@@ -120,7 +118,7 @@ public interface Options extends PipelineOptions {
     @TemplateParameter.Text(
         order = 5,
         description = "Avro file prefix",
-        helpText = "The prefix of the Avro file name. For example, \"table1-\"")
+        helpText = "The prefix of the Avro filename. For example, output-.")
     @Default.String("part")
     ValueProvider<String> getFilenamePrefix();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 0fdb720cc7..3f68b73767 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -79,7 +79,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 1,
         regexes = {"[a-z][a-z0-9\\-]*[a-z0-9]"},
         description = "Cloud Spanner instance ID",
-        helpText = "The instance ID of the Cloud Spanner database that you want to export.")
+        helpText = "The instance ID of the Spanner database that you want to export.")
     ValueProvider<String> getInstanceId();
 
     void setInstanceId(ValueProvider<String> value);
@@ -88,7 +88,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 2,
         regexes = {"[a-z][a-z0-9_\\-]*[a-z0-9]"},
         description = "Cloud Spanner database ID",
-        helpText = "The database ID of the Cloud Spanner database that you want to export.")
+        helpText = "The database ID of the Spanner database that you want to export.")
     ValueProvider<String> getDatabaseId();
 
     void setDatabaseId(ValueProvider<String> value);
@@ -97,8 +97,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 3,
         description = "Cloud Storage output directory",
         helpText =
-            "The Cloud Storage path where the Avro files should be exported to. A new directory"
-                + " will be created under this path that contains the export.",
+            "The Cloud Storage path you want to export Avro files to. The export job creates a new directory under this path that contains the exported files.",
         example = "gs://your-bucket/your-path")
     ValueProvider<String> getOutputDir();
 
@@ -109,8 +108,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Cloud Storage temp directory for storing Avro files",
         helpText =
-            "The Cloud Storage path where the temporary Avro files can be created. Ex:"
-                + " gs://your-bucket/your-path")
+            "(Optional) The Cloud Storage path where temporary Avro files are written.")
     ValueProvider<String> getAvroTempDirectory();
 
     void setAvroTempDirectory(ValueProvider<String> value);
@@ -148,11 +146,8 @@ public interface ExportPipelineOptions extends PipelineOptions {
         },
         description = "Snapshot time",
         helpText =
-            "Specifies the snapshot time as RFC 3339 format in UTC time without the timezone"
-                + " offset(always ends in 'Z'). Timestamp must be in the past and Maximum timestamp"
-                + " staleness applies. See"
-                + " https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness",
-        example = "1990-12-31T23:59:59Z")
+            "(Optional) The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.",
+        example = "1990-12-31T23:59:60Z")
     @Default.String(value = "")
     ValueProvider<String> getSnapshotTime();
 
@@ -162,7 +157,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 8,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "The project ID of the Cloud Spanner instance.")
+        helpText = "(Optional) The Google Cloud Project ID of the Spanner database that you want to read data from.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
@@ -172,8 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export Timestamps as Timestamp-micros type",
         helpText =
-            "If true, Timestamps are exported as timestamp-micros type. Timestamps are exported as"
-                + " ISO8601 strings at nanosecond precision by default.")
+            "(Optional) If true, timestamps are exported as a long type with timestamp-micros logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportTimestampAsLogicalType();
 
@@ -185,10 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"},
         description = "Cloud Spanner table name(s).",
         helpText =
-            "If provided, only this comma separated list of tables are exported. Ancestor tables"
-                + " and tables that are referenced via foreign keys are required. If not explicitly"
-                + " listed, the `shouldExportRelatedTables` flag must be set for a successful"
-                + " export.")
+            "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the shouldExportRelatedTables parameter to true.")
     @Default.String(value = "")
     ValueProvider<String> getTableNames();
 
@@ -199,10 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export necessary Related Spanner tables.",
         helpText =
-            "Used in conjunction with `tableNames`. If true, add related tables necessary for the"
-                + " export, such as interleaved parent tables and foreign keys tables.  If"
-                + " `tableNames` is specified but doesn't include related tables, this option must"
-                + " be set to true for a successful export.")
+            "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the tableNames parameter.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportRelatedTables();
 
@@ -218,8 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "The request priority for Cloud Spanner calls. The value must be one of:"
-                + " [HIGH,MEDIUM,LOW].")
+            "(Optional) The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
@@ -229,9 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Use independent compute resource (Spanner DataBoost).",
         helpText =
-            "Use Spanner on-demand compute so the export job will run on independent compute"
-                + " resources and have no impact to current Spanner workloads. This will incur"
-                + " additional charges in Spanner.")
+            "(Optional) Set to true to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the spanner.databases.useDataBoost IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
     @Default.Boolean(false)
     ValueProvider<Boolean> getDataBoostEnabled();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
index bd2b0aee6e..8470c5c17a 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
@@ -67,7 +67,7 @@ public interface Options extends PipelineOptions {
         order = 1,
         regexes = {"^[a-z0-9\\-]+$"},
         description = "Cloud Spanner instance ID",
-        helpText = "The instance ID of the Cloud Spanner database that you want to import to.")
+        helpText = "The instance ID of the Spanner database.")
     ValueProvider<String> getInstanceId();
 
     void setInstanceId(ValueProvider<String> value);
@@ -77,8 +77,7 @@ public interface Options extends PipelineOptions {
         regexes = {"^[a-z_0-9\\-]+$"},
         description = "Cloud Spanner database ID",
         helpText =
-            "The database ID of the Cloud Spanner database that you want to import into (must"
-                + " already exist).")
+            "The database ID of the Spanner database.")
     ValueProvider<String> getDatabaseId();
 
     void setDatabaseId(ValueProvider<String> value);
@@ -86,7 +85,7 @@ public interface Options extends PipelineOptions {
     @TemplateParameter.GcsReadFolder(
         order = 3,
         description = "Cloud storage input directory",
-        helpText = "The Cloud Storage path where the Avro files should be imported from.")
+        helpText = "The Cloud Storage path where the Avro files are imported from.")
     ValueProvider<String> getInputDir();
 
     void setInputDir(ValueProvider<String> value);
@@ -107,9 +106,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Indexes",
         helpText =
-            "By default the import pipeline is not blocked on index creation, and it "
-                + "may complete with indexes still being created in the background. If true, the "
-                + "pipeline waits until indexes are created.")
+            "Optional: If true, the pipeline waits for indexes to be created. If false, the job might complete while indexes are still being created in the background. Default: false.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForIndexes();
 
@@ -120,9 +117,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Foreign Keys",
         helpText =
-            "By default the import pipeline is not blocked on foreign key creation, and it may"
-                + " complete with foreign keys still being created in the background. If true, the"
-                + " pipeline waits until foreign keys are created.")
+            "Optional: If true, the pipeline waits for foreign keys to be created. If false, the job might complete while foreign keys are still being created in the background. Default: false.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForForeignKeys();
 
@@ -133,8 +128,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Change Streams",
         helpText =
-            "By default the import pipeline is blocked on change stream creation. If false, it may"
-                + " complete with change streams still being created in the background.")
+            "Optional: If true, the pipeline waits for change streams to be created. If false, the job might complete while change streams are still being created in the background. Default: true.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getWaitForChangeStreams();
 
@@ -157,10 +151,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Create Indexes early",
         helpText =
-            "Flag to turn off early index creation if there are many indexes. Indexes and Foreign"
-                + " keys are created after dataload. If there are more than 40 DDL statements to be"
-                + " executed after dataload, it is preferable to create the indexes before dataload."
-                + " This is the flag to turn the feature off.")
+            "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set earlyIndexCreateFlag to false. Default: true.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getEarlyIndexCreateFlag();
 
@@ -175,7 +166,7 @@ public interface Options extends PipelineOptions {
         order = 9,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "The project ID of the Cloud Spanner instance.")
+        helpText = "\tOptional: The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
@@ -187,7 +178,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         regexes = {"[0-9]+"},
         description = "DDL Creation timeout in minutes",
-        helpText = "DDL Creation timeout in minutes.")
+        helpText = "Optional: The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.")
     @Default.Integer(30)
     ValueProvider<Integer> getDdlCreationTimeoutInMinutes();
 
@@ -203,8 +194,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "The request priority for Cloud Spanner calls. The value must be one of:"
-                + " [HIGH,MEDIUM,LOW].")
+            "Optional: The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
index 6b49dede09..770cf5c260 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
@@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 1,
         description = "Pub/Sub input subscription",
         helpText =
-            "Pub/Sub subscription to read the input from, in the format of 'projects/your-project-id/subscriptions/your-subscription-name'",
+            "Pub/Sub subscription to read the input from. For example, projects/<project-id>/subscriptions/<subscription-name>.",
         example = "projects/your-project-id/subscriptions/your-subscription-name")
     @Validation.Required
     ValueProvider<String> getInputSubscription();
@@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "The name of the topic to which data should published, in the format of 'projects/your-project-id/topics/your-topic-name'",
+            "Cloud Pub/Sub topic to write the output to. For example, projects/<project-id>/topics/<topic-name>.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     ValueProvider<String> getOutputTopic();
@@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter key",
         helpText =
-            "Attribute key by which events are filtered. No filters are applied if no key is specified.")
+            "(Optional) Filter events based on an attribute key. No filters are applied if filterKey is not specified.")
     ValueProvider<String> getFilterKey();
 
     void setFilterKey(ValueProvider<String> filterKey);
@@ -165,10 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter value",
         helpText =
-            "Filter attribute value to use if an event filter key is provided. Accepts a valid "
-                + "Java Regex string as an event filter value. In case a regex is provided, the complete "
-                + "expression should match in order for the message to be filtered. Partial matches (e.g. "
-                + "substring) will not be filtered. A null event filter value is used by default.")
+            "(Optional) Filter attribute value to use in case a filterKey is provided. A null filterValue is used by default.")
     ValueProvider<String> getFilterValue();
 
     void setFilterValue(ValueProvider<String> filterValue);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
index 5972905115..b1cd1b8e02 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
@@ -63,8 +63,8 @@ public interface Options extends PipelineOptions {
     @TemplateParameter.GcsReadFile(
         order = 1,
         description = "Cloud Storage Input File(s)",
-        helpText = "Path of the file pattern glob to read from.",
-        example = "gs://your-bucket/path/*.txt")
+        helpText = "The input file pattern to read from.",
+        example = "gs://bucket-name/files/*.json")
     @Required
     ValueProvider<String> getInputFilePattern();
 
@@ -74,7 +74,7 @@ public interface Options extends PipelineOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "The name of the topic to which data should published, in the format of 'projects/your-project-id/topics/your-topic-name'",
+            "The Pub/Sub input topic to write to. The name must be in the format of projects/<project-id>/topics/<topic-name>.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Required
     ValueProvider<String> getOutputTopic();
diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
index bc9946773f..80597dbbbf 100644
--- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
+++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
@@ -33,9 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions
         description = "BigQuery output table",
         optional = true,
         helpText =
-            "The BigQuery table location to write the output to. "
-                + "The table should be in the format `<project>:<dataset>.<table_name>`. "
-                + "The table's schema must match the input objects.")
+            "Optional: The BigQuery table location to write the output to. The table should be in the format <project>:<dataset>.<table_name>. The table's schema must match the input objects.")
     String getOutputTableSpec();
 
     @SuppressWarnings("unused")
@@ -47,7 +45,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 1,
         description = "Astra token",
-        helpText = "Token value or secret resource ID",
+        helpText = "Token value or secret resource ID.",
         example = "AstraCS:abcdefghij")
     @Validation.Required
     @SuppressWarnings("unused")
@@ -59,7 +57,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 2,
         description = "Database identifier",
-        helpText = "Database unique identifier (uuid)",
+        helpText = "Database unique identifier (uuid).",
         example = "cf7af129-d33a-498f-ad06-d97a6ee6eb7")
     @Validation.Required
     @SuppressWarnings("unused")
@@ -72,7 +70,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 3,
         description = "Cassandra keyspace",
         regexes = {"^[a-zA-Z0-9][a-zA-Z0-9_]{0,47}$"},
-        helpText = "Name of the Cassandra keyspace inside Astra database")
+        helpText = "Name of the Cassandra keyspace inside Astra database.")
     String getAstraKeyspace();
 
     @SuppressWarnings("unused")
@@ -82,7 +80,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 4,
         description = "Cassandra table",
         regexes = {"^[a-zA-Z][a-zA-Z0-9_]*$"},
-        helpText = "Name of the table inside the Cassandra database",
+        helpText = "Name of the table inside the Cassandra database.",
         example = "my_table")
     @SuppressWarnings("unused")
     String getAstraTable();
@@ -94,7 +92,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 5,
         optional = true,
         description = "Cassandra CQL Query",
-        helpText = "Query to filter rows instead of reading the whole table")
+        helpText = "Optional: Query to filter rows instead of reading the whole table.")
     @SuppressWarnings("unused")
     String getAstraQuery();
 
@@ -106,7 +104,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         optional = true,
         description = "Astra Database Region",
         helpText =
-            "If not provided, a default is chosen, which is useful with multi-region databases")
+            "Optional: If not provided, a default is chosen, which is useful with multi-region databases.")
     @SuppressWarnings("unused")
     String getAstraDatabaseRegion();
 
@@ -117,7 +115,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 7,
         optional = true,
         description = "Token range count",
-        helpText = "The minimal number of splits to distribute the query")
+        helpText = "Optional: The minimal number of splits to distribute the query.")
     Integer getMinTokenRangesCount();
 
     @SuppressWarnings("unused")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
index b80b68f479..fe9f4217c7 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
@@ -39,8 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner Project ID",
       helpText =
-          "Project to read change streams from. The default for this parameter is the project "
-              + "where the Dataflow pipeline is running.")
+          "(Optional) Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.")
   @Default.String("")
   String getSpannerProjectId();
 
@@ -49,7 +48,7 @@ public interface SpannerChangeStreamsToGcsOptions
   @TemplateParameter.Text(
       order = 2,
       description = "Spanner instance ID",
-      helpText = "The Spanner instance to read change streams from.")
+      helpText = "The Spanner instance ID to read change streams data from.")
   @Validation.Required
   String getSpannerInstanceId();
 
@@ -58,7 +57,7 @@ public interface SpannerChangeStreamsToGcsOptions
   @TemplateParameter.Text(
       order = 3,
       description = "Spanner database",
-      helpText = "The Spanner database to read change streams from.")
+      helpText = "The Spanner database to read change streams data from.")
   @Validation.Required
   String getSpannerDatabase();
 
@@ -69,10 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner database role",
       helpText =
-          "Database role user assumes while reading from the change stream. The database role"
-              + " should have required privileges to read from change stream. If a database role is"
-              + " not specified, the user should have required IAM permissions to read from the"
-              + " database.")
+          "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the SELECT privilege on the change stream and the EXECUTE privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).")
   String getSpannerDatabaseRole();
 
   void setSpannerDatabaseRole(String spannerDatabaseRole);
@@ -80,7 +76,7 @@ public interface SpannerChangeStreamsToGcsOptions
   @TemplateParameter.Text(
       order = 5,
       description = "Spanner metadata instance ID",
-      helpText = "The Spanner instance to use for the change streams connector metadata table.")
+      helpText = "The Spanner instance ID to use for the change streams connector metadata table.")
   @Validation.Required
   String getSpannerMetadataInstanceId();
 
@@ -90,9 +86,7 @@ public interface SpannerChangeStreamsToGcsOptions
       order = 6,
       description = "Spanner metadata database",
       helpText =
-          "The Spanner database to use for the change streams connector metadata table. For change"
-              + " streams tracking all tables in a database, we recommend putting the metadata"
-              + " table in a separate database.")
+          "The Spanner database to use for the change streams connector metadata table.")
   @Validation.Required
   String getSpannerMetadataDatabase();
 
@@ -103,10 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Cloud Spanner metadata table name",
       helpText =
-          "The Cloud Spanner change streams connector metadata table name to use. If not provided,"
-              + " a Cloud Spanner change streams connector metadata table will automatically be"
-              + " created during the pipeline flow. This parameter must be provided when updating"
-              + " an existing pipeline and should not be provided otherwise.")
+          "	(Optional) The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.")
   String getSpannerMetadataTableName();
 
   void setSpannerMetadataTableName(String value);
@@ -125,9 +116,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "The timestamp to read change streams from",
       helpText =
-          "The starting DateTime, inclusive, to use for reading change streams"
-              + " (https://tools.ietf.org/html/rfc3339). For example, 2022-05-05T07:59:59Z."
-              + " Defaults to the timestamp when the pipeline starts.")
+          "(Optional) The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.")
   @Default.String("")
   String getStartTimestamp();
 
@@ -138,9 +127,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "The timestamp to read change streams to",
       helpText =
-          "The ending DateTime, inclusive, to use for reading change streams"
-              + " (https://tools.ietf.org/html/rfc3339). Ex-2022-05-05T07:59:59Z. Defaults to an"
-              + " infinite time in the future.")
+          "(Optional) The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.")
   @Default.String("")
   String getEndTimestamp();
 
@@ -163,8 +150,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Output file format",
       helpText =
-          "The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is"
-              + " AVRO.")
+          "	(Optional) The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.")
   @Default.Enum("AVRO")
   FileFormat getOutputFileFormat();
 
@@ -175,9 +161,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Window duration",
       helpText =
-          "The window duration/size in which data will be written to Cloud Storage. Allowed formats"
-              + " are: Ns (for seconds, example: 5s), Nm (for minutes, example: 12m), Nh (for"
-              + " hours, example: 2h).",
+          "\t(Optional) The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).",
       example = "5m")
   @Default.String("5m")
   String getWindowDuration();
@@ -194,8 +178,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Priority for Spanner RPC invocations",
       helpText =
-          "The request priority for Cloud Spanner calls. The value must be one of:"
-              + " [HIGH,MEDIUM,LOW].")
+          "(Optional) The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)")
   @Default.Enum("HIGH")
   RpcPriority getRpcPriority();
 
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
index 6304067624..cade509590 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
@@ -183,8 +183,7 @@ public interface Options
         order = 1,
         description = "BigQuery output table",
         helpText =
-            "BigQuery table location to write the output to. The table’s schema must match the "
-                + "input JSON objects.")
+            "The BigQuery output table location, in the format of <my-project>:<my-dataset>.<my-table>")
     String getOutputTableSpec();
 
     void setOutputTableSpec(String value);
@@ -203,8 +202,7 @@ public interface Options
         optional = true,
         description = "Pub/Sub input subscription",
         helpText =
-            "Pub/Sub subscription to read the input from, in the format of"
-                + " 'projects/your-project-id/subscriptions/your-subscription-name'")
+            "The Pub/Sub input subscription to read from, in the format of projects/<project>/subscriptions/<subscription>.")
     String getInputSubscription();
 
     void setInputSubscription(String value);
@@ -215,9 +213,7 @@ public interface Options
         description =
             "Table for messages failed to reach the output table (i.e., Deadletter table)",
         helpText =
-            "BigQuery table for failed messages. Messages failed to reach the output table for different reasons "
-                + "(e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will"
-                + " be created during pipeline execution. If not specified, \"outputTableSpec_error_records\" is used instead.")
+            "The BigQuery table for messages that failed to reach the output table, in the format of <my-project>:<my-dataset>.<my-table>. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.")
     String getOutputDeadletterTable();
 
     void setOutputDeadletterTable(String value);
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 99b058f4c7..2c4c0e05e5 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -55,7 +55,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection URL string.",
       helpText =
-          "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. Can be passed in as a string that's Base64-encoded and then encrypted with a Cloud KMS key. Note the difference between an Oracle non-RAC database connection string (`jdbc:oracle:thin:@some-host:<port>:<sid>`) and an Oracle RAC database connection string (`jdbc:oracle:thin:@//some-host[:<port>]/<service_name>`).",
+          "The JDBC connection URL string. For example, jdbc:mysql://some-host:3306/sampledb. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (jdbc:oracle:thin:@some-host:<port>:<sid>) and an Oracle RAC database connection string (jdbc:oracle:thin:@//some-host[:<port>]/<service_name>).",
       example = "jdbc:mysql://some-host:3306/sampledb")
   String getConnectionURL();
 
@@ -68,8 +68,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection property string.",
       helpText =
-          "Properties string to use for the JDBC connection. Format of the string must be"
-              + " [propertyName=property;]*.",
+          "Optional: The properties string to use for the JDBC connection. Use the string format [propertyName=property;]*.",
       example = "unicode=true;characterEncoding=UTF-8")
   String getConnectionProperties();
 
@@ -82,8 +81,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection username.",
       helpText =
-          "The username to be used for the JDBC connection. Can be passed in as a Base64-encoded string encrypted "
-              + "with a Cloud KMS key.")
+          "Optional: The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
   String getUsername();
 
   void setUsername(String username);
@@ -94,8 +92,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection password.",
       helpText =
-          "The password to be used for the JDBC connection. Can be passed in as a Base64-encoded string encrypted "
-              + "with a Cloud KMS key.")
+          "Optional: The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
   String getPassword();
 
   void setPassword(String password);
@@ -107,8 +104,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC source SQL query",
       helpText =
-          "The query to be run on the source to extract the data. Either query OR both table AND "
-              + "PartitionColumn must be specified.",
+          "Optional: The query to run on the source to extract the data.",
       example = "select * from sampledb.sample_table")
   String getQuery();
 
@@ -131,7 +127,7 @@ public interface JdbcToBigQueryOptions
       optional = false,
       groupName = "Target",
       description = "Temporary directory for BigQuery loading process",
-      helpText = "The temporary directory for the BigQuery loading process",
+      helpText = "The temporary directory for the BigQuery loading process.",
       example = "gs://your-bucket/your-files/temp_dir")
   String getBigQueryLoadingTemporaryDirectory();
 
@@ -143,8 +139,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Google Cloud KMS key",
       helpText =
-          "Cloud KMS Encryption Key to decrypt the username, password, and connection string. If Cloud KMS key is "
-              + "passed in, the username, password, and connection string must all be passed in encrypted.",
+          "Optional: The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
       example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key")
   String getKMSEncryptionKey();
 
@@ -156,8 +151,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Whether to use column alias to map the rows.",
       helpText =
-          "If enabled (set to true) the pipeline will consider column alias (\"AS\") instead of the"
-              + " column name to map the rows to BigQuery. Defaults to false.")
+          "Optional: If enabled (set to true), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to false.")
   @Default.Boolean(false)
   Boolean getUseColumnAlias();
 
@@ -169,8 +163,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Target",
       description = "Whether to truncate data before writing",
       helpText =
-          "If enabled (set to true) the pipeline will truncate before loading data into BigQuery."
-              + " Defaults to false, which is used to only append data.")
+          "Optional: If enabled (set to true), the pipeline truncates before loading data into BigQuery. Defaults to false, which causes the pipeline to append data.")
   @Default.Boolean(false)
   Boolean getIsTruncate();
 
@@ -182,10 +175,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The name of a column of numeric type that will be used for partitioning.",
       helpText =
-          "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel "
-              + "by executing multiple instances of the query on the same table (subquery) using ranges. "
-              + "Currently, only Long partition columns are supported. Either query OR both table AND "
-              + "PartitionColumn must be specified.")
+          "	Optional: If this parameter is provided (along with table), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports Long partition columns.")
   String getPartitionColumn();
 
   void setPartitionColumn(String partitionColumn);
@@ -196,8 +186,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Name of the table in the external database.",
       helpText =
-          "Table to read from using partitions. Either query OR both table AND PartitionColumn must be specified. "
-              + "This parameter also accepts a subquery in parentheses.",
+          "Optional: The table to read from when using partitions. This parameter also accepts a subquery in parentheses.",
       example = "(select id, name from Person) as subq")
   String getTable();
 
@@ -209,7 +198,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The number of partitions.",
       helpText =
-          "The number of partitions. This, along with the lower and upper bound, form partitions strides for generated WHERE clause expressions used to split the partition column evenly. When the input is less than 1, the number is set to 1.")
+          "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated WHERE clause expressions that are used to split the partition column evenly. When the input is less than 1, the number is set to 1.")
   Integer getNumPartitions();
 
   void setNumPartitions(Integer numPartitions);
@@ -220,7 +209,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Lower bound of partition column.",
       helpText =
-          "Lower bound used in the partition scheme. If not provided, it is automatically inferred by Beam (for the supported types)")
+          "Optional: The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
   Long getLowerBound();
 
   void setLowerBound(Long lowerBound);
@@ -231,7 +220,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Upper bound of partition column",
       helpText =
-          "Upper bound used in partition scheme. If not provided, it is automatically inferred by Beam (for the supported types)")
+          "Optional: The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
   Long getUpperBound();
 
   void setUpperBound(Long lowerBound);
diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
index 0dee01a74a..c131fa5ced 100644
--- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
+++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
@@ -141,8 +141,8 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         optional = true,
         regexes = {"[,\\/:a-zA-Z0-9._-]+"},
         description = "JMS Host IP",
-        helpText = "Server IP for JMS Host",
-        example = "host:5672")
+        helpText = "The JMS (ActiveMQ) Server IP.",
+        example = "tcp://10.0.0.1:61616")
     @Validation.Required
     String getJmsServer();
 
@@ -153,7 +153,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         optional = false,
         regexes = {"[a-zA-Z0-9._-]+"},
         description = "JMS Queue/Topic Name to read the input from",
-        helpText = "JMS Queue/Topic Name to read the input from.",
+        helpText = "The name of the JMS topic/queue that data is read from.",
         example = "queue")
     @Validation.Required
     String getInputName();
@@ -165,7 +165,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         optional = false,
         regexes = {"[a-zA-Z0-9._-]+"},
         description = "JMS Destination Type to read the input from",
-        helpText = "JMS Destination Type to read the input from.",
+        helpText = "The JMS destination type to read data from , can be queue or topic.",
         example = "queue")
     @Validation.Required
     String getInputType();
@@ -176,8 +176,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         order = 4,
         description = "Output Pub/Sub topic",
         helpText =
-            "The name of the topic to which data should published, in the format of"
-                + " 'projects/your-project-id/topics/your-topic-name'",
+            "The name of the topic to which data should published, in the format projects/your-project-id/topics/your-topic-name.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     String getOutputTopic();
@@ -187,7 +186,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 5,
         description = "JMS Username",
-        helpText = "JMS username for authentication with JMS server",
+        helpText = "\t(Optional) The username to use for authentication on the JMS server.",
         example = "sampleusername")
     String getUsername();
 
@@ -196,7 +195,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 6,
         description = "JMS Password",
-        helpText = "Password for username provided for authentication with JMS server",
+        helpText = "(Optional) The password associated with the provided username.",
         example = "samplepassword")
     String getPassword();
 

From 0fda2012a3368fdd42c9dc6445e9af88887b39e0 Mon Sep 17 00:00:00 2001
From: Archana Gupta <arcgupta@google.com>
Date: Tue, 30 Apr 2024 17:13:13 +0530
Subject: [PATCH 02/70] Modified to add backquotes to highlight words for all
 templates

---
 .../cloud/teleport/bigtable/BigtableToAvro.java      |  2 +-
 .../cloud/teleport/spanner/ExportPipeline.java       | 10 +++++-----
 .../cloud/teleport/spanner/ImportPipeline.java       | 12 ++++++------
 .../cloud/teleport/templates/PubSubToBigQuery.java   | 10 +++-------
 .../cloud/teleport/templates/PubsubToPubsub.java     |  8 ++++----
 .../cloud/teleport/templates/TextToPubsub.java       |  2 +-
 .../v2/astradb/options/AstraDbToBigQueryOptions.java |  2 +-
 .../v2/options/SpannerChangeStreamsToGcsOptions.java |  2 +-
 .../teleport/v2/templates/PubSubToBigQuery.java      |  6 +++---
 .../teleport/v2/options/JdbcToBigQueryOptions.java   | 12 ++++++------
 .../cloud/teleport/v2/templates/JmsToPubsub.java     |  2 +-
 11 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
index f9ede3e24b..4cb5d2b2ad 100644
--- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
+++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
@@ -118,7 +118,7 @@ public interface Options extends PipelineOptions {
     @TemplateParameter.Text(
         order = 5,
         description = "Avro file prefix",
-        helpText = "The prefix of the Avro filename. For example, output-.")
+        helpText = "The prefix of the Avro filename. For example, `output-`.")
     @Default.String("part")
     ValueProvider<String> getFilenamePrefix();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 3f68b73767..7165873674 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export Timestamps as Timestamp-micros type",
         helpText =
-            "(Optional) If true, timestamps are exported as a long type with timestamp-micros logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.")
+            "(Optional) If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportTimestampAsLogicalType();
 
@@ -179,7 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"},
         description = "Cloud Spanner table name(s).",
         helpText =
-            "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the shouldExportRelatedTables parameter to true.")
+            "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.")
     @Default.String(value = "")
     ValueProvider<String> getTableNames();
 
@@ -190,7 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export necessary Related Spanner tables.",
         helpText =
-            "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the tableNames parameter.")
+            "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportRelatedTables();
 
@@ -206,7 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "(Optional) The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.")
+            "(Optional) The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
@@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Use independent compute resource (Spanner DataBoost).",
         helpText =
-            "(Optional) Set to true to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the spanner.databases.useDataBoost IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
+            "(Optional) Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
     @Default.Boolean(false)
     ValueProvider<Boolean> getDataBoostEnabled();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
index 8470c5c17a..b5556d1d3b 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
@@ -106,7 +106,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Indexes",
         helpText =
-            "Optional: If true, the pipeline waits for indexes to be created. If false, the job might complete while indexes are still being created in the background. Default: false.")
+            "Optional: If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForIndexes();
 
@@ -117,7 +117,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Foreign Keys",
         helpText =
-            "Optional: If true, the pipeline waits for foreign keys to be created. If false, the job might complete while foreign keys are still being created in the background. Default: false.")
+            "Optional: If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForForeignKeys();
 
@@ -128,7 +128,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Change Streams",
         helpText =
-            "Optional: If true, the pipeline waits for change streams to be created. If false, the job might complete while change streams are still being created in the background. Default: true.")
+            "Optional: If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getWaitForChangeStreams();
 
@@ -139,7 +139,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Sequences",
         helpText =
-            "By default the import pipeline is blocked on sequence creation. If false, it may"
+            "By default the import pipeline is blocked on sequence creation. If `false`, it may"
                 + " complete with sequences still being created in the background.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getWaitForSequences();
@@ -151,7 +151,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Create Indexes early",
         helpText =
-            "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set earlyIndexCreateFlag to false. Default: true.")
+            "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getEarlyIndexCreateFlag();
 
@@ -194,7 +194,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "Optional: The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.")
+            "Optional: The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
index a4b136500c..c03c9e7725 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
@@ -166,8 +166,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         order = 1,
         description = "BigQuery output table",
         helpText =
-            "BigQuery table location to write the output to. The table’s schema must match the "
-                + "input JSON objects.")
+            "The BigQuery output table location, in the format of `<my-project>:<my-dataset>.<my-table>`")
     ValueProvider<String> getOutputTableSpec();
 
     void setOutputTableSpec(ValueProvider<String> value);
@@ -184,8 +183,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         order = 3,
         description = "Pub/Sub input subscription",
         helpText =
-            "Pub/Sub subscription to read the input from, in the format of"
-                + " 'projects/your-project-id/subscriptions/your-subscription-name'")
+            "The Pub/Sub input subscription to read from, in the format of `projects/<project>/subscriptions/<subscription>`.")
     ValueProvider<String> getInputSubscription();
 
     void setInputSubscription(ValueProvider<String> value);
@@ -205,9 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         description =
             "Table for messages failed to reach the output table (i.e., Deadletter table)",
         helpText =
-            "BigQuery table for failed messages. Messages failed to reach the output table for different reasons "
-                + "(e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will"
-                + " be created during pipeline execution. If not specified, \"outputTableSpec_error_records\" is used instead.")
+            "The BigQuery table for messages that failed to reach the output table, in the format of `<my-project>:<my-dataset>.<my-table>`. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.")
     ValueProvider<String> getOutputDeadletterTable();
 
     void setOutputDeadletterTable(ValueProvider<String> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
index 770cf5c260..cf77cc1825 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
@@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 1,
         description = "Pub/Sub input subscription",
         helpText =
-            "Pub/Sub subscription to read the input from. For example, projects/<project-id>/subscriptions/<subscription-name>.",
+            "Pub/Sub subscription to read the input from. For example, `projects/<project-id>/subscriptions/<subscription-name>`.",
         example = "projects/your-project-id/subscriptions/your-subscription-name")
     @Validation.Required
     ValueProvider<String> getInputSubscription();
@@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "Cloud Pub/Sub topic to write the output to. For example, projects/<project-id>/topics/<topic-name>.",
+            "Cloud Pub/Sub topic to write the output to. For example, `projects/<project-id>/topics/<topic-name>`.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     ValueProvider<String> getOutputTopic();
@@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter key",
         helpText =
-            "(Optional) Filter events based on an attribute key. No filters are applied if filterKey is not specified.")
+            "(Optional) Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.")
     ValueProvider<String> getFilterKey();
 
     void setFilterKey(ValueProvider<String> filterKey);
@@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter value",
         helpText =
-            "(Optional) Filter attribute value to use in case a filterKey is provided. A null filterValue is used by default.")
+            "(Optional) Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.")
     ValueProvider<String> getFilterValue();
 
     void setFilterValue(ValueProvider<String> filterValue);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
index b1cd1b8e02..ac07eaa34d 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
@@ -74,7 +74,7 @@ public interface Options extends PipelineOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "The Pub/Sub input topic to write to. The name must be in the format of projects/<project-id>/topics/<topic-name>.",
+            "The Pub/Sub input topic to write to. The name must be in the format of `projects/<project-id>/topics/<topic-name>`.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Required
     ValueProvider<String> getOutputTopic();
diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
index 80597dbbbf..5e1d5e51ed 100644
--- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
+++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
@@ -33,7 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions
         description = "BigQuery output table",
         optional = true,
         helpText =
-            "Optional: The BigQuery table location to write the output to. The table should be in the format <project>:<dataset>.<table_name>. The table's schema must match the input objects.")
+            "Optional: The BigQuery table location to write the output to. The table should be in the format `<project>:<dataset>.<table_name>`. The table's schema must match the input objects.")
     String getOutputTableSpec();
 
     @SuppressWarnings("unused")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
index fe9f4217c7..c8fc5a33ac 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
@@ -68,7 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner database role",
       helpText =
-          "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the SELECT privilege on the change stream and the EXECUTE privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).")
+          "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).")
   String getSpannerDatabaseRole();
 
   void setSpannerDatabaseRole(String spannerDatabaseRole);
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
index cade509590..c7149bef6c 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
@@ -183,7 +183,7 @@ public interface Options
         order = 1,
         description = "BigQuery output table",
         helpText =
-            "The BigQuery output table location, in the format of <my-project>:<my-dataset>.<my-table>")
+            "The BigQuery output table location, in the format of `<my-project>:<my-dataset>.<my-table>`")
     String getOutputTableSpec();
 
     void setOutputTableSpec(String value);
@@ -202,7 +202,7 @@ public interface Options
         optional = true,
         description = "Pub/Sub input subscription",
         helpText =
-            "The Pub/Sub input subscription to read from, in the format of projects/<project>/subscriptions/<subscription>.")
+            "The Pub/Sub input subscription to read from, in the format of `projects/<project>/subscriptions/<subscription>`.")
     String getInputSubscription();
 
     void setInputSubscription(String value);
@@ -213,7 +213,7 @@ public interface Options
         description =
             "Table for messages failed to reach the output table (i.e., Deadletter table)",
         helpText =
-            "The BigQuery table for messages that failed to reach the output table, in the format of <my-project>:<my-dataset>.<my-table>. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.")
+            "The BigQuery table for messages that failed to reach the output table, in the format of` <my-project>:<my-dataset>.<my-table>`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.")
     String getOutputDeadletterTable();
 
     void setOutputDeadletterTable(String value);
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 2c4c0e05e5..f7286b1b95 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -55,7 +55,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection URL string.",
       helpText =
-          "The JDBC connection URL string. For example, jdbc:mysql://some-host:3306/sampledb. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (jdbc:oracle:thin:@some-host:<port>:<sid>) and an Oracle RAC database connection string (jdbc:oracle:thin:@//some-host[:<port>]/<service_name>).",
+          "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (`jdbc:oracle:thin:@some-host:<port>:<sid>`) and an Oracle RAC database connection string (`jdbc:oracle:thin:@//some-host[:<port>]/<service_name>`).",
       example = "jdbc:mysql://some-host:3306/sampledb")
   String getConnectionURL();
 
@@ -151,7 +151,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Whether to use column alias to map the rows.",
       helpText =
-          "Optional: If enabled (set to true), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to false.")
+          "Optional: If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.")
   @Default.Boolean(false)
   Boolean getUseColumnAlias();
 
@@ -163,7 +163,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Target",
       description = "Whether to truncate data before writing",
       helpText =
-          "Optional: If enabled (set to true), the pipeline truncates before loading data into BigQuery. Defaults to false, which causes the pipeline to append data.")
+          "Optional: If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.")
   @Default.Boolean(false)
   Boolean getIsTruncate();
 
@@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The name of a column of numeric type that will be used for partitioning.",
       helpText =
-          "	Optional: If this parameter is provided (along with table), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports Long partition columns.")
+          "	Optional: If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
   String getPartitionColumn();
 
   void setPartitionColumn(String partitionColumn);
@@ -198,7 +198,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The number of partitions.",
       helpText =
-          "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated WHERE clause expressions that are used to split the partition column evenly. When the input is less than 1, the number is set to 1.")
+          "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.")
   Integer getNumPartitions();
 
   void setNumPartitions(Integer numPartitions);
@@ -247,7 +247,7 @@ public interface JdbcToBigQueryOptions
       },
       optional = true,
       description = "Create Disposition to use for BigQuery",
-      helpText = "BigQuery CreateDisposition. For example, CREATE_IF_NEEDED, CREATE_NEVER.")
+      helpText = "BigQuery CreateDisposition. For example, `CREATE_IF_NEEDED`, `CREATE_NEVER`.")
   @Default.String("CREATE_NEVER")
   String getCreateDisposition();
 
diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
index c131fa5ced..ee3a87c7c3 100644
--- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
+++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
@@ -176,7 +176,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         order = 4,
         description = "Output Pub/Sub topic",
         helpText =
-            "The name of the topic to which data should published, in the format projects/your-project-id/topics/your-topic-name.",
+            "The name of the topic to which data should published, in the format `projects/your-project-id/topics/your-topic-name`.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     String getOutputTopic();

From 1a009018b016501b16c2d3038423d07703227369 Mon Sep 17 00:00:00 2001
From: Archana Gupta <arcgupta@google.com>
Date: Wed, 1 May 2024 10:48:57 +0530
Subject: [PATCH 03/70] Modified links and Optional statements

---
 .../teleport/spanner/ExportPipeline.java      | 16 ++++++-------
 .../teleport/spanner/ImportPipeline.java      | 14 +++++------
 .../teleport/templates/PubsubToPubsub.java    |  8 +++----
 .../options/AstraDbToBigQueryOptions.java     |  8 +++----
 .../SpannerChangeStreamsToGcsOptions.java     | 16 ++++++-------
 .../v2/templates/PubSubToBigQuery.java        |  2 +-
 .../v2/options/JdbcToBigQueryOptions.java     | 24 +++++++++----------
 .../teleport/v2/templates/JmsToPubsub.java    |  4 ++--
 8 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 7165873674..6abfe1f306 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -108,7 +108,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Cloud Storage temp directory for storing Avro files",
         helpText =
-            "(Optional) The Cloud Storage path where temporary Avro files are written.")
+            "The Cloud Storage path where temporary Avro files are written.")
     ValueProvider<String> getAvroTempDirectory();
 
     void setAvroTempDirectory(ValueProvider<String> value);
@@ -146,7 +146,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         },
         description = "Snapshot time",
         helpText =
-            "(Optional) The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.",
+            "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.",
         example = "1990-12-31T23:59:60Z")
     @Default.String(value = "")
     ValueProvider<String> getSnapshotTime();
@@ -157,7 +157,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 8,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "(Optional) The Google Cloud Project ID of the Spanner database that you want to read data from.")
+        helpText = "The Google Cloud Project ID of the Spanner database that you want to read data from.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
@@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export Timestamps as Timestamp-micros type",
         helpText =
-            "(Optional) If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.")
+            "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportTimestampAsLogicalType();
 
@@ -179,7 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"},
         description = "Cloud Spanner table name(s).",
         helpText =
-            "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.")
+            "A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.")
     @Default.String(value = "")
     ValueProvider<String> getTableNames();
 
@@ -190,7 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export necessary Related Spanner tables.",
         helpText =
-            "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.")
+            "Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportRelatedTables();
 
@@ -206,7 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "(Optional) The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
+            "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
@@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Use independent compute resource (Spanner DataBoost).",
         helpText =
-            "(Optional) Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
+            "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview(https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
     @Default.Boolean(false)
     ValueProvider<Boolean> getDataBoostEnabled();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
index b5556d1d3b..a64b0b471b 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
@@ -106,7 +106,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Indexes",
         helpText =
-            "Optional: If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.")
+            "If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForIndexes();
 
@@ -117,7 +117,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Foreign Keys",
         helpText =
-            "Optional: If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.")
+            "If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForForeignKeys();
 
@@ -128,7 +128,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Change Streams",
         helpText =
-            "Optional: If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.")
+            "If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getWaitForChangeStreams();
 
@@ -151,7 +151,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Create Indexes early",
         helpText =
-            "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.")
+            "Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getEarlyIndexCreateFlag();
 
@@ -166,7 +166,7 @@ public interface Options extends PipelineOptions {
         order = 9,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "\tOptional: The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.")
+        helpText = "The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
@@ -178,7 +178,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         regexes = {"[0-9]+"},
         description = "DDL Creation timeout in minutes",
-        helpText = "Optional: The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.")
+        helpText = "The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.")
     @Default.Integer(30)
     ValueProvider<Integer> getDdlCreationTimeoutInMinutes();
 
@@ -194,7 +194,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "Optional: The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
+            "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
index cf77cc1825..a7bc99181b 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
@@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 1,
         description = "Pub/Sub input subscription",
         helpText =
-            "Pub/Sub subscription to read the input from. For example, `projects/<project-id>/subscriptions/<subscription-name>`.",
+            "Pub/Sub subscription to read the input from.",
         example = "projects/your-project-id/subscriptions/your-subscription-name")
     @Validation.Required
     ValueProvider<String> getInputSubscription();
@@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "Cloud Pub/Sub topic to write the output to. For example, `projects/<project-id>/topics/<topic-name>`.",
+            "Cloud Pub/Sub topic to write the output to..",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     ValueProvider<String> getOutputTopic();
@@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter key",
         helpText =
-            "(Optional) Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.")
+            "Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.")
     ValueProvider<String> getFilterKey();
 
     void setFilterKey(ValueProvider<String> filterKey);
@@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter value",
         helpText =
-            "(Optional) Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.")
+            "Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.")
     ValueProvider<String> getFilterValue();
 
     void setFilterValue(ValueProvider<String> filterValue);
diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
index 5e1d5e51ed..040fea7c9f 100644
--- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
+++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
@@ -33,7 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions
         description = "BigQuery output table",
         optional = true,
         helpText =
-            "Optional: The BigQuery table location to write the output to. The table should be in the format `<project>:<dataset>.<table_name>`. The table's schema must match the input objects.")
+            "The BigQuery table location to write the output to. The table should be in the format `<project>:<dataset>.<table_name>`. The table's schema must match the input objects.")
     String getOutputTableSpec();
 
     @SuppressWarnings("unused")
@@ -92,7 +92,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 5,
         optional = true,
         description = "Cassandra CQL Query",
-        helpText = "Optional: Query to filter rows instead of reading the whole table.")
+        helpText = "Query to filter rows instead of reading the whole table.")
     @SuppressWarnings("unused")
     String getAstraQuery();
 
@@ -104,7 +104,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         optional = true,
         description = "Astra Database Region",
         helpText =
-            "Optional: If not provided, a default is chosen, which is useful with multi-region databases.")
+            "If not provided, a default is chosen, which is useful with multi-region databases.")
     @SuppressWarnings("unused")
     String getAstraDatabaseRegion();
 
@@ -115,7 +115,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 7,
         optional = true,
         description = "Token range count",
-        helpText = "Optional: The minimal number of splits to distribute the query.")
+        helpText = "The minimal number of splits to distribute the query.")
     Integer getMinTokenRangesCount();
 
     @SuppressWarnings("unused")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
index c8fc5a33ac..ecd68de243 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
@@ -39,7 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner Project ID",
       helpText =
-          "(Optional) Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.")
+          "Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.")
   @Default.String("")
   String getSpannerProjectId();
 
@@ -68,7 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner database role",
       helpText =
-          "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).")
+          "The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).")
   String getSpannerDatabaseRole();
 
   void setSpannerDatabaseRole(String spannerDatabaseRole);
@@ -97,7 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Cloud Spanner metadata table name",
       helpText =
-          "	(Optional) The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.")
+          "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.")
   String getSpannerMetadataTableName();
 
   void setSpannerMetadataTableName(String value);
@@ -116,7 +116,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "The timestamp to read change streams from",
       helpText =
-          "(Optional) The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.")
+          "The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.")
   @Default.String("")
   String getStartTimestamp();
 
@@ -127,7 +127,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "The timestamp to read change streams to",
       helpText =
-          "(Optional) The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.")
+          "The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.")
   @Default.String("")
   String getEndTimestamp();
 
@@ -150,7 +150,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Output file format",
       helpText =
-          "	(Optional) The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.")
+          "The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.")
   @Default.Enum("AVRO")
   FileFormat getOutputFileFormat();
 
@@ -161,7 +161,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Window duration",
       helpText =
-          "\t(Optional) The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).",
+          "The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).",
       example = "5m")
   @Default.String("5m")
   String getWindowDuration();
@@ -178,7 +178,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Priority for Spanner RPC invocations",
       helpText =
-          "(Optional) The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)")
+          "The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)")
   @Default.Enum("HIGH")
   RpcPriority getRpcPriority();
 
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
index c7149bef6c..aa8f81ece9 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
@@ -223,7 +223,7 @@ public interface Options
         optional = true,
         description = "Use at at-least-once semantics in BigQuery Storage Write API",
         helpText =
-            "This parameter takes effect only if \"Use BigQuery Storage Write API\" is enabled. If"
+            "This parameter takes effect only if `Use BigQuery Storage Write API` is enabled. If"
                 + " enabled the at-least-once semantics will be used for Storage Write API, otherwise"
                 + " exactly-once semantics will be used.",
         hiddenUi = true)
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index f7286b1b95..4e0f0419a6 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -68,7 +68,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection property string.",
       helpText =
-          "Optional: The properties string to use for the JDBC connection. Use the string format [propertyName=property;]*.",
+          "The properties string to use for the JDBC connection. Use the string format `[propertyName=property;]*.`",
       example = "unicode=true;characterEncoding=UTF-8")
   String getConnectionProperties();
 
@@ -81,7 +81,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection username.",
       helpText =
-          "Optional: The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
+          "The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
   String getUsername();
 
   void setUsername(String username);
@@ -92,7 +92,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection password.",
       helpText =
-          "Optional: The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
+          "The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
   String getPassword();
 
   void setPassword(String password);
@@ -104,7 +104,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC source SQL query",
       helpText =
-          "Optional: The query to run on the source to extract the data.",
+          "The query to run on the source to extract the data.",
       example = "select * from sampledb.sample_table")
   String getQuery();
 
@@ -139,7 +139,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Google Cloud KMS key",
       helpText =
-          "Optional: The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
+          "The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
       example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key")
   String getKMSEncryptionKey();
 
@@ -151,7 +151,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Whether to use column alias to map the rows.",
       helpText =
-          "Optional: If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.")
+          "If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.")
   @Default.Boolean(false)
   Boolean getUseColumnAlias();
 
@@ -163,7 +163,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Target",
       description = "Whether to truncate data before writing",
       helpText =
-          "Optional: If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.")
+          "If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.")
   @Default.Boolean(false)
   Boolean getIsTruncate();
 
@@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The name of a column of numeric type that will be used for partitioning.",
       helpText =
-          "	Optional: If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
+          "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
   String getPartitionColumn();
 
   void setPartitionColumn(String partitionColumn);
@@ -186,7 +186,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Name of the table in the external database.",
       helpText =
-          "Optional: The table to read from when using partitions. This parameter also accepts a subquery in parentheses.",
+          "The table to read from when using partitions. This parameter also accepts a subquery in parentheses.",
       example = "(select id, name from Person) as subq")
   String getTable();
 
@@ -198,7 +198,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The number of partitions.",
       helpText =
-          "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.")
+          "The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.")
   Integer getNumPartitions();
 
   void setNumPartitions(Integer numPartitions);
@@ -209,7 +209,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Lower bound of partition column.",
       helpText =
-          "Optional: The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
+          "The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
   Long getLowerBound();
 
   void setLowerBound(Long lowerBound);
@@ -220,7 +220,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Upper bound of partition column",
       helpText =
-          "Optional: The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
+          "The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
   Long getUpperBound();
 
   void setUpperBound(Long lowerBound);
diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
index ee3a87c7c3..c258588dcd 100644
--- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
+++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
@@ -186,7 +186,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 5,
         description = "JMS Username",
-        helpText = "\t(Optional) The username to use for authentication on the JMS server.",
+        helpText = "The username to use for authentication on the JMS server.",
         example = "sampleusername")
     String getUsername();
 
@@ -195,7 +195,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 6,
         description = "JMS Password",
-        helpText = "(Optional) The password associated with the provided username.",
+        helpText = "The password associated with the provided username.",
         example = "samplepassword")
     String getPassword();
 

From d8ba7dc55071a2bd136e6f176b5c9d8d83175199 Mon Sep 17 00:00:00 2001
From: Archana Gupta <arcgupta@google.com>
Date: Wed, 1 May 2024 12:59:03 +0530
Subject: [PATCH 04/70] Fixed typos

---
 .../java/com/google/cloud/teleport/spanner/ExportPipeline.java  | 2 +-
 .../com/google/cloud/teleport/templates/PubSubToBigQuery.java   | 2 +-
 .../com/google/cloud/teleport/templates/PubsubToPubsub.java     | 2 +-
 .../teleport/v2/options/SpannerChangeStreamsToGcsOptions.java   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 6abfe1f306..4bde7eef7a 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Use independent compute resource (Spanner DataBoost).",
         helpText =
-            "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview(https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
+            "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
     @Default.Boolean(false)
     ValueProvider<Boolean> getDataBoostEnabled();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
index c03c9e7725..ec763ab5f4 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
@@ -203,7 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         description =
             "Table for messages failed to reach the output table (i.e., Deadletter table)",
         helpText =
-            "The BigQuery table for messages that failed to reach the output table, in the format of `<my-project>:<my-dataset>.<my-table>`. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.")
+            "The BigQuery table for messages that failed to reach the output table, in the format of `<my-project>:<my-dataset>.<my-table>`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.")
     ValueProvider<String> getOutputDeadletterTable();
 
     void setOutputDeadletterTable(ValueProvider<String> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
index a7bc99181b..baff969afc 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
@@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "Cloud Pub/Sub topic to write the output to..",
+            "Cloud Pub/Sub topic to write the output to.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     ValueProvider<String> getOutputTopic();
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
index ecd68de243..a819e51c48 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
@@ -68,7 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner database role",
       helpText =
-          "The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).")
+          "The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see Fine-grained access control for change streams (https://cloud.google.com/spanner/docs/fgac-change-streams).")
   String getSpannerDatabaseRole();
 
   void setSpannerDatabaseRole(String spannerDatabaseRole);

From 199fa612676178379366597662da7aacd66f4f23 Mon Sep 17 00:00:00 2001
From: Archana Gupta <archanagupta040@gmail.com>
Date: Mon, 6 May 2024 10:17:34 +0530
Subject: [PATCH 05/70] Apply suggestions from code review

Changes made as recommended by technical writer Rebecca

Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com>
---
 .../cloud/teleport/bigtable/BigtableToAvro.java  |  2 +-
 .../cloud/teleport/spanner/ExportPipeline.java   | 12 ++++++------
 .../cloud/teleport/spanner/ImportPipeline.java   | 16 ++++++++--------
 .../teleport/templates/PubSubToBigQuery.java     |  6 +++---
 .../cloud/teleport/templates/PubsubToPubsub.java |  8 ++++----
 .../cloud/teleport/templates/TextToPubsub.java   |  2 +-
 .../options/AstraDbToBigQueryOptions.java        | 14 +++++++-------
 .../SpannerChangeStreamsToGcsOptions.java        | 14 +++++++-------
 .../teleport/v2/templates/PubSubToBigQuery.java  | 10 +++++-----
 .../v2/options/JdbcToBigQueryOptions.java        | 12 ++++++------
 .../cloud/teleport/v2/templates/JmsToPubsub.java |  6 +++---
 11 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
index 4cb5d2b2ad..3b55dd29fe 100644
--- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
+++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
@@ -78,7 +78,7 @@ public interface Options extends PipelineOptions {
         order = 1,
         description = "Project ID",
         helpText =
-            "The ID of the Google Cloud project of the Bigtable instance that you want to read data from.")
+            "The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.")
     ValueProvider<String> getBigtableProjectId();
 
     @SuppressWarnings("unused")
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 4bde7eef7a..26a7b55993 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -97,7 +97,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 3,
         description = "Cloud Storage output directory",
         helpText =
-            "The Cloud Storage path you want to export Avro files to. The export job creates a new directory under this path that contains the exported files.",
+            "The Cloud Storage path to export Avro files to. The export job creates a new directory under this path that contains the exported files.",
         example = "gs://your-bucket/your-path")
     ValueProvider<String> getOutputDir();
 
@@ -146,7 +146,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         },
         description = "Snapshot time",
         helpText =
-            "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.",
+            "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.",
         example = "1990-12-31T23:59:60Z")
     @Default.String(value = "")
     ValueProvider<String> getSnapshotTime();
@@ -157,7 +157,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 8,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "The Google Cloud Project ID of the Spanner database that you want to read data from.")
+        helpText = "The ID of the Google Cloud project that contains the Spanner database that you want to read data from.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
@@ -179,7 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"},
         description = "Cloud Spanner table name(s).",
         helpText =
-            "A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.")
+            "A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must either include all of the related tables (parent tables and foreign key referenced tables) or set the `shouldExportRelatedTables` parameter to `true`.")
     @Default.String(value = "")
     ValueProvider<String> getTableNames();
 
@@ -206,7 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
+            "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
@@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Use independent compute resource (Spanner DataBoost).",
         helpText =
-            "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
+            "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. When set to `true`, you also need the `spanner.databases.useDataBoost` IAM permission. For more information, see the Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
     @Default.Boolean(false)
     ValueProvider<Boolean> getDataBoostEnabled();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
index a64b0b471b..2a5fc96327 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
@@ -106,7 +106,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Indexes",
         helpText =
-            "If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.")
+            "If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. The default value is `false`.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForIndexes();
 
@@ -117,7 +117,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Foreign Keys",
         helpText =
-            "If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.")
+            "If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. The default value is `false`.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getWaitForForeignKeys();
 
@@ -128,7 +128,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Change Streams",
         helpText =
-            "If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.")
+            "If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. The default value is `true`.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getWaitForChangeStreams();
 
@@ -139,7 +139,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Wait for Sequences",
         helpText =
-            "By default the import pipeline is blocked on sequence creation. If `false`, it may"
+            "By default, the import pipeline is blocked on sequence creation. If `false`, the import pipeline might"
                 + " complete with sequences still being created in the background.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getWaitForSequences();
@@ -151,7 +151,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Create Indexes early",
         helpText =
-            "Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.")
+            "Specifies whether early index creation is enabled. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set `earlyIndexCreateFlag` to `false`. The default value is `true`.")
     @Default.Boolean(true)
     ValueProvider<Boolean> getEarlyIndexCreateFlag();
 
@@ -166,7 +166,7 @@ public interface Options extends PipelineOptions {
         order = 9,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.")
+        helpText = "The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
@@ -178,7 +178,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         regexes = {"[0-9]+"},
         description = "DDL Creation timeout in minutes",
-        helpText = "The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.")
+        helpText = "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.")
     @Default.Integer(30)
     ValueProvider<Integer> getDdlCreationTimeoutInMinutes();
 
@@ -194,7 +194,7 @@ public interface Options extends PipelineOptions {
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
+            "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
index ec763ab5f4..99bb755ec5 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
@@ -166,7 +166,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         order = 1,
         description = "BigQuery output table",
         helpText =
-            "The BigQuery output table location, in the format of `<my-project>:<my-dataset>.<my-table>`")
+            "The BigQuery output table location, in the format `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`")
     ValueProvider<String> getOutputTableSpec();
 
     void setOutputTableSpec(ValueProvider<String> value);
@@ -183,7 +183,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         order = 3,
         description = "Pub/Sub input subscription",
         helpText =
-            "The Pub/Sub input subscription to read from, in the format of `projects/<project>/subscriptions/<subscription>`.")
+            "The Pub/Sub input subscription to read from, in the format `projects/<PROJECT_ID>/subscriptions/<SUBSCRIPTION>`.")
     ValueProvider<String> getInputSubscription();
 
     void setInputSubscription(ValueProvider<String> value);
@@ -203,7 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         description =
             "Table for messages failed to reach the output table (i.e., Deadletter table)",
         helpText =
-            "The BigQuery table for messages that failed to reach the output table, in the format of `<my-project>:<my-dataset>.<my-table>`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.")
+            "The BigQuery table to use for messages that failed to reach the output table, in the format of `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.")
     ValueProvider<String> getOutputDeadletterTable();
 
     void setOutputDeadletterTable(ValueProvider<String> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
index baff969afc..658580b607 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
@@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 1,
         description = "Pub/Sub input subscription",
         helpText =
-            "Pub/Sub subscription to read the input from.",
+            "The Pub/Sub subscription to read the input from.",
         example = "projects/your-project-id/subscriptions/your-subscription-name")
     @Validation.Required
     ValueProvider<String> getInputSubscription();
@@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "Cloud Pub/Sub topic to write the output to.",
+            "The Pub/Sub topic to write the output to.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     ValueProvider<String> getOutputTopic();
@@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter key",
         helpText =
-            "Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.")
+            "The attribute key to use to filter events based. No filters are applied if `filterKey` is not specified.")
     ValueProvider<String> getFilterKey();
 
     void setFilterKey(ValueProvider<String> filterKey);
@@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter value",
         helpText =
-            "Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.")
+            "The attribute value to use to filter events when a `filterKey` is provided. By default, a null `filterValue` is used.")
     ValueProvider<String> getFilterValue();
 
     void setFilterValue(ValueProvider<String> filterValue);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
index ac07eaa34d..c1e7a9d2ee 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java
@@ -74,7 +74,7 @@ public interface Options extends PipelineOptions {
         order = 2,
         description = "Output Pub/Sub topic",
         helpText =
-            "The Pub/Sub input topic to write to. The name must be in the format of `projects/<project-id>/topics/<topic-name>`.",
+            "The Pub/Sub input topic to write to. The name must be in the format `projects/<PROJECT_ID>/topics/<TOPIC_NAME>`.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Required
     ValueProvider<String> getOutputTopic();
diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
index 040fea7c9f..f5ee4cdbc9 100644
--- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
+++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
@@ -33,7 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions
         description = "BigQuery output table",
         optional = true,
         helpText =
-            "The BigQuery table location to write the output to. The table should be in the format `<project>:<dataset>.<table_name>`. The table's schema must match the input objects.")
+            "The BigQuery table location to write the output to. Use the format `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`. The table's schema must match the input objects.")
     String getOutputTableSpec();
 
     @SuppressWarnings("unused")
@@ -45,7 +45,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 1,
         description = "Astra token",
-        helpText = "Token value or secret resource ID.",
+        helpText = "The token value or secret resource ID.",
         example = "AstraCS:abcdefghij")
     @Validation.Required
     @SuppressWarnings("unused")
@@ -57,7 +57,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 2,
         description = "Database identifier",
-        helpText = "Database unique identifier (uuid).",
+        helpText = "The database unique identifier (uuid).",
         example = "cf7af129-d33a-498f-ad06-d97a6ee6eb7")
     @Validation.Required
     @SuppressWarnings("unused")
@@ -70,7 +70,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 3,
         description = "Cassandra keyspace",
         regexes = {"^[a-zA-Z0-9][a-zA-Z0-9_]{0,47}$"},
-        helpText = "Name of the Cassandra keyspace inside Astra database.")
+        helpText = "The name of the Cassandra keyspace inside of the Astra database.")
     String getAstraKeyspace();
 
     @SuppressWarnings("unused")
@@ -80,7 +80,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 4,
         description = "Cassandra table",
         regexes = {"^[a-zA-Z][a-zA-Z0-9_]*$"},
-        helpText = "Name of the table inside the Cassandra database.",
+        helpText = "The name of the table inside of the Cassandra database.",
         example = "my_table")
     @SuppressWarnings("unused")
     String getAstraTable();
@@ -92,7 +92,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 5,
         optional = true,
         description = "Cassandra CQL Query",
-        helpText = "Query to filter rows instead of reading the whole table.")
+        helpText = "The query to use to filter rows instead of reading the whole table.")
     @SuppressWarnings("unused")
     String getAstraQuery();
 
@@ -115,7 +115,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
         order = 7,
         optional = true,
         description = "Token range count",
-        helpText = "The minimal number of splits to distribute the query.")
+        helpText = "The minimal number of splits to use to distribute the query.")
     Integer getMinTokenRangesCount();
 
     @SuppressWarnings("unused")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
index a819e51c48..e1646cfc21 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
@@ -39,7 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner Project ID",
       helpText =
-          "Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.")
+          "The ID of the Google Cloud project that contains the Spanner database to read change streams from. This  project is also where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.")
   @Default.String("")
   String getSpannerProjectId();
 
@@ -97,7 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Cloud Spanner metadata table name",
       helpText =
-          "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.")
+          "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table is automatically created during the pipeline flow. You must provide a value for this parameter when updating an existing pipeline. Otherwise, don't use this parameter.")
   String getSpannerMetadataTableName();
 
   void setSpannerMetadataTableName(String value);
@@ -116,7 +116,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "The timestamp to read change streams from",
       helpText =
-          "The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.")
+          "The starting DateTime, inclusive, to use for reading change streams, in the format Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, that is, the current time.")
   @Default.String("")
   String getStartTimestamp();
 
@@ -127,7 +127,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "The timestamp to read change streams to",
       helpText =
-          "The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.")
+          "The ending DateTime, inclusive, to use for reading change streams. For example, Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.")
   @Default.String("")
   String getEndTimestamp();
 
@@ -150,7 +150,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Output file format",
       helpText =
-          "The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.")
+          "The format of the output Cloud Storage file. Allowed formats are TEXT and AVRO. Defaults to AVRO.")
   @Default.Enum("AVRO")
   FileFormat getOutputFileFormat();
 
@@ -161,7 +161,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Window duration",
       helpText =
-          "The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).",
+          "The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m (five minutes), with a minimum of 1s (one second). Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).",
       example = "5m")
   @Default.String("5m")
   String getWindowDuration();
@@ -178,7 +178,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Priority for Spanner RPC invocations",
       helpText =
-          "The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)")
+          "The request priority for Spanner calls. The value must be HIGH, MEDIUM, or LOW. Defaults to HIGH.")
   @Default.Enum("HIGH")
   RpcPriority getRpcPriority();
 
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
index aa8f81ece9..44d7ae02c8 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java
@@ -183,7 +183,7 @@ public interface Options
         order = 1,
         description = "BigQuery output table",
         helpText =
-            "The BigQuery output table location, in the format of `<my-project>:<my-dataset>.<my-table>`")
+            "The BigQuery output table location, in the format `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`.")
     String getOutputTableSpec();
 
     void setOutputTableSpec(String value);
@@ -202,7 +202,7 @@ public interface Options
         optional = true,
         description = "Pub/Sub input subscription",
         helpText =
-            "The Pub/Sub input subscription to read from, in the format of `projects/<project>/subscriptions/<subscription>`.")
+            "The Pub/Sub input subscription to read from, in the format `projects/<PROJECT_ID>/subscriptions/<SUBSCRIPTION_NAME>`.")
     String getInputSubscription();
 
     void setInputSubscription(String value);
@@ -213,7 +213,7 @@ public interface Options
         description =
             "Table for messages failed to reach the output table (i.e., Deadletter table)",
         helpText =
-            "The BigQuery table for messages that failed to reach the output table, in the format of` <my-project>:<my-dataset>.<my-table>`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.")
+            "The BigQuery table to use for messages that failed to reach the output table, in the format `<PROJECT_ID>:<DATABASE_NAME>.<TABLE_NAME>`. If the table doesn't exist, it's created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.")
     String getOutputDeadletterTable();
 
     void setOutputDeadletterTable(String value);
@@ -224,8 +224,8 @@ public interface Options
         description = "Use at at-least-once semantics in BigQuery Storage Write API",
         helpText =
             "This parameter takes effect only if `Use BigQuery Storage Write API` is enabled. If"
-                + " enabled the at-least-once semantics will be used for Storage Write API, otherwise"
-                + " exactly-once semantics will be used.",
+                + " enabled, at-least-once semantics are be used for Storage Write API. Otherwise,"
+                + " exactly-once semantics are used.",
         hiddenUi = true)
     @Default.Boolean(false)
     @Override
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 4e0f0419a6..73e509898f 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -81,7 +81,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection username.",
       helpText =
-          "The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
+          "The username to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.")
   String getUsername();
 
   void setUsername(String username);
@@ -92,7 +92,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "JDBC connection password.",
       helpText =
-          "The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.")
+          "The password to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.")
   String getPassword();
 
   void setPassword(String password);
@@ -151,7 +151,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Whether to use column alias to map the rows.",
       helpText =
-          "If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.")
+          "If set to `true`, the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.")
   @Default.Boolean(false)
   Boolean getUseColumnAlias();
 
@@ -163,7 +163,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Target",
       description = "Whether to truncate data before writing",
       helpText =
-          "If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.")
+          "If set to `true`, the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.")
   @Default.Boolean(false)
   Boolean getIsTruncate();
 
@@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The name of a column of numeric type that will be used for partitioning.",
       helpText =
-          "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
+          "If this parameter is provided with `table`, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
   String getPartitionColumn();
 
   void setPartitionColumn(String partitionColumn);
@@ -247,7 +247,7 @@ public interface JdbcToBigQueryOptions
       },
       optional = true,
       description = "Create Disposition to use for BigQuery",
-      helpText = "BigQuery CreateDisposition. For example, `CREATE_IF_NEEDED`, `CREATE_NEVER`.")
+      helpText = "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.")
   @Default.String("CREATE_NEVER")
   String getCreateDisposition();
 
diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
index c258588dcd..304a2295f4 100644
--- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
+++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
@@ -153,7 +153,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         optional = false,
         regexes = {"[a-zA-Z0-9._-]+"},
         description = "JMS Queue/Topic Name to read the input from",
-        helpText = "The name of the JMS topic/queue that data is read from.",
+        helpText = "The name of the JMS topic or queue that data is read from.",
         example = "queue")
     @Validation.Required
     String getInputName();
@@ -165,7 +165,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         optional = false,
         regexes = {"[a-zA-Z0-9._-]+"},
         description = "JMS Destination Type to read the input from",
-        helpText = "The JMS destination type to read data from , can be queue or topic.",
+        helpText = "The JMS destination type to read data from. Can be a queue or a topic.",
         example = "queue")
     @Validation.Required
     String getInputType();
@@ -176,7 +176,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
         order = 4,
         description = "Output Pub/Sub topic",
         helpText =
-            "The name of the topic to which data should published, in the format `projects/your-project-id/topics/your-topic-name`.",
+            "The name of the Pub/Sub topic to publish data to, in the format `projects/<PROJECT_ID>/topics/<TOPIC_NAME>`.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     String getOutputTopic();

From 52e9ca53b3a31e8c3ccf3e92bfc7b904d2009b99 Mon Sep 17 00:00:00 2001
From: Archana Gupta <arcgupta@google.com>
Date: Mon, 6 May 2024 10:47:23 +0530
Subject: [PATCH 06/70] Made changes to the suggestions recommended

---
 .../com/google/cloud/teleport/spanner/ExportPipeline.java     | 2 +-
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 4bde7eef7a..09b3d32789 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export Timestamps as Timestamp-micros type",
         helpText =
-            "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.")
+            "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportTimestampAsLogicalType();
 
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 4e0f0419a6..73499570aa 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -139,7 +139,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Google Cloud KMS key",
       helpText =
-          "The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
+          "The Cloud KMS encryption key to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
       example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key")
   String getKMSEncryptionKey();
 
@@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The name of a column of numeric type that will be used for partitioning.",
       helpText =
-          "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
+          "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
   String getPartitionColumn();
 
   void setPartitionColumn(String partitionColumn);

From 18dc7928013d8efe0a8f120593c0166f96042dd8 Mon Sep 17 00:00:00 2001
From: gauravjainn <gauravjainn@google.com>
Date: Mon, 6 May 2024 13:06:41 +0530
Subject: [PATCH 07/70] "Reverting the JdbcToBigQueryOptions java file"

---
 .../v2/options/JdbcToBigQueryOptions.java     | 270 ++++++++++++++++++
 1 file changed, 270 insertions(+)
 create mode 100644 v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java

diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
new file mode 100644
index 0000000000..06c1178c23
--- /dev/null
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.options;
+
+import com.google.cloud.teleport.metadata.TemplateParameter;
+import org.apache.beam.sdk.options.Default;
+
+/** Interface used by the JdbcToBigQuery pipeline to accept user input. */
+public interface JdbcToBigQueryOptions
+    extends CommonTemplateOptions, BigQueryStorageApiBatchOptions {
+
+  @TemplateParameter.Text(
+      order = 1,
+      optional = false,
+      regexes = {"^.+$"},
+      groupName = "Source",
+      description = "Comma-separated Cloud Storage path(s) of the JDBC driver(s)",
+      helpText = "The comma-separated list of driver JAR files.",
+      example = "gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar")
+  String getDriverJars();
+
+  void setDriverJars(String driverJar);
+
+  @TemplateParameter.Text(
+      order = 2,
+      optional = false,
+      regexes = {"^.+$"},
+      groupName = "Source",
+      description = "JDBC driver class name",
+      helpText = "The JDBC driver class name.",
+      example = "com.mysql.jdbc.Driver")
+  String getDriverClassName();
+
+  void setDriverClassName(String driverClassName);
+
+  @TemplateParameter.Text(
+      order = 3,
+      optional = false,
+      regexes = {
+        "(^jdbc:[a-zA-Z0-9/:@.?_+!*=&-;]+$)|(^([A-Za-z0-9+/]{4}){1,}([A-Za-z0-9+/]{0,3})={0,3})"
+      },
+      groupName = "Source",
+      description = "JDBC connection URL string.",
+      helpText =
+          "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (`jdbc:oracle:thin:@some-host:<port>:<sid>`) and an Oracle RAC database connection string (`jdbc:oracle:thin:@//some-host[:<port>]/<service_name>`).",
+      example = "jdbc:mysql://some-host:3306/sampledb")
+  String getConnectionURL();
+
+  void setConnectionURL(String connectionURL);
+
+  @TemplateParameter.Text(
+      order = 4,
+      optional = true,
+      regexes = {"^[a-zA-Z0-9_;!*&=@#-:\\/]+$"},
+      groupName = "Source",
+      description = "JDBC connection property string.",
+      helpText =
+          "The properties string to use for the JDBC connection. Use the string format `[propertyName=property;]*.`",
+      example = "unicode=true;characterEncoding=UTF-8")
+  String getConnectionProperties();
+
+  void setConnectionProperties(String connectionProperties);
+
+  @TemplateParameter.Text(
+      order = 5,
+      optional = true,
+      regexes = {"^.+$"},
+      groupName = "Source",
+      description = "JDBC connection username.",
+      helpText =
+          "The username to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.")
+  String getUsername();
+
+  void setUsername(String username);
+
+  @TemplateParameter.Password(
+      order = 6,
+      optional = true,
+      groupName = "Source",
+      description = "JDBC connection password.",
+      helpText =
+          "The password to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.")
+  String getPassword();
+
+  void setPassword(String password);
+
+  @TemplateParameter.Text(
+      order = 7,
+      optional = true,
+      regexes = {"^.+$"},
+      groupName = "Source",
+      description = "JDBC source SQL query",
+      helpText =
+          "The query to run on the source to extract the data.",
+      example = "select * from sampledb.sample_table")
+  String getQuery();
+
+  void setQuery(String query);
+
+  void setOutputTable(String value);
+
+  @TemplateParameter.BigQueryTable(
+      order = 8,
+      groupName = "Target",
+      description = "BigQuery output table",
+      helpText =
+          "BigQuery table location to write the output to. The name should be in the format"
+              + " `<project>:<dataset>.<table_name>`. The table's schema must match input objects.",
+      example = "<my-project>:<my-dataset>.<my-table>")
+  String getOutputTable();
+
+  @TemplateParameter.GcsWriteFolder(
+      order = 9,
+      optional = false,
+      groupName = "Target",
+      description = "Temporary directory for BigQuery loading process",
+      helpText = "The temporary directory for the BigQuery loading process.",
+      example = "gs://your-bucket/your-files/temp_dir")
+  String getBigQueryLoadingTemporaryDirectory();
+
+  void setBigQueryLoadingTemporaryDirectory(String directory);
+
+  @TemplateParameter.KmsEncryptionKey(
+      order = 10,
+      optional = true,
+      groupName = "Source",
+      description = "Google Cloud KMS key",
+      helpText =
+          "The Cloud KMS encryption key to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
+      example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key")
+  String getKMSEncryptionKey();
+
+  void setKMSEncryptionKey(String keyName);
+
+  @TemplateParameter.Boolean(
+      order = 11,
+      optional = true,
+      groupName = "Source",
+      description = "Whether to use column alias to map the rows.",
+      helpText =
+          "If set to `true`, the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.")
+  @Default.Boolean(false)
+  Boolean getUseColumnAlias();
+
+  void setUseColumnAlias(Boolean useColumnAlias);
+
+  @TemplateParameter.Boolean(
+      order = 12,
+      optional = true,
+      groupName = "Target",
+      description = "Whether to truncate data before writing",
+      helpText =
+          "If set to `true`, the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.")
+  @Default.Boolean(false)
+  Boolean getIsTruncate();
+
+  void setIsTruncate(Boolean isTruncate);
+
+  @TemplateParameter.Text(
+      order = 13,
+      optional = true,
+      groupName = "Source",
+      description = "The name of a column of numeric type that will be used for partitioning.",
+      helpText =
+<<<<<<< HEAD
+          "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
+=======
+          "If this parameter is provided with `table`, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
+>>>>>>> 199fa612676178379366597662da7aacd66f4f23
+  String getPartitionColumn();
+
+  void setPartitionColumn(String partitionColumn);
+
+  @TemplateParameter.Text(
+      order = 14,
+      optional = true,
+      groupName = "Source",
+      description = "Name of the table in the external database.",
+      helpText =
+          "The table to read from when using partitions. This parameter also accepts a subquery in parentheses.",
+      example = "(select id, name from Person) as subq")
+  String getTable();
+
+  void setTable(String table);
+
+  @TemplateParameter.Integer(
+      order = 15,
+      optional = true,
+      groupName = "Source",
+      description = "The number of partitions.",
+      helpText =
+          "The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.")
+  Integer getNumPartitions();
+
+  void setNumPartitions(Integer numPartitions);
+
+  @TemplateParameter.Long(
+      order = 16,
+      optional = true,
+      groupName = "Source",
+      description = "Lower bound of partition column.",
+      helpText =
+          "The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
+  Long getLowerBound();
+
+  void setLowerBound(Long lowerBound);
+
+  @TemplateParameter.Long(
+      order = 17,
+      optional = true,
+      groupName = "Source",
+      description = "Upper bound of partition column",
+      helpText =
+          "The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.")
+  Long getUpperBound();
+
+  void setUpperBound(Long lowerBound);
+
+  @TemplateParameter.Integer(
+      order = 18,
+      optional = true,
+      groupName = "Source",
+      description = "Fetch Size",
+      // TODO: remove the "Not used for partitioned reads" once
+      // https://github.com/apache/beam/pull/28999 is released.
+      helpText =
+          "The number of rows to be fetched from database at a time. Not used for partitioned reads.")
+  @Default.Integer(50000)
+  Integer getFetchSize();
+
+  void setFetchSize(Integer fetchSize);
+
+  @TemplateParameter.Enum(
+      order = 19,
+      enumOptions = {
+        @TemplateParameter.TemplateEnumOption("CREATE_IF_NEEDED"),
+        @TemplateParameter.TemplateEnumOption("CREATE_NEVER")
+      },
+      optional = true,
+      description = "Create Disposition to use for BigQuery",
+      helpText = "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.")
+  @Default.String("CREATE_NEVER")
+  String getCreateDisposition();
+
+  void setCreateDisposition(String createDisposition);
+
+  @TemplateParameter.GcsReadFile(
+      order = 20,
+      optional = true,
+      description = "Cloud Storage path to BigQuery JSON schema",
+      helpText =
+          "The Cloud Storage path for the BigQuery JSON schema. If `createDisposition` is set to CREATE_IF_NEEDED, this parameter must be specified.",
+      example = "gs://your-bucket/your-schema.json")
+  String getBigQuerySchemaPath();
+
+  void setBigQuerySchemaPath(String path);
+}

From 743ebbec8a170ceb45fd30d25eb96d473f834226 Mon Sep 17 00:00:00 2001
From: Archana Gupta <archanagupta040@gmail.com>
Date: Tue, 7 May 2024 10:56:32 +0530
Subject: [PATCH 08/70] Apply suggestions from code review

Updating recommendations provided

Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com>
---
 .../com/google/cloud/teleport/spanner/ExportPipeline.java | 2 +-
 .../google/cloud/teleport/templates/PubSubToBigQuery.java | 2 +-
 .../google/cloud/teleport/templates/PubsubToPubsub.java   | 2 +-
 .../v2/astradb/options/AstraDbToBigQueryOptions.java      | 2 +-
 .../v2/options/SpannerChangeStreamsToGcsOptions.java      | 4 ++--
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java  | 8 ++++----
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 98164c3076..8d77654c9f 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -146,7 +146,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         },
         description = "Snapshot time",
         helpText =
-            "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.",
+            "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.",
         example = "1990-12-31T23:59:60Z")
     @Default.String(value = "")
     ValueProvider<String> getSnapshotTime();
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
index 99bb755ec5..14d33a06a0 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java
@@ -203,7 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio
         description =
             "Table for messages failed to reach the output table (i.e., Deadletter table)",
         helpText =
-            "The BigQuery table to use for messages that failed to reach the output table, in the format of `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.")
+            "The BigQuery table to use for messages that fail to reach the output table, in the format of `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.")
     ValueProvider<String> getOutputDeadletterTable();
 
     void setOutputDeadletterTable(ValueProvider<String> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
index 658580b607..a090f3bba9 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
@@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
         optional = true,
         description = "Event filter key",
         helpText =
-            "The attribute key to use to filter events based. No filters are applied if `filterKey` is not specified.")
+            "The attribute key to use to filter events. No filters are applied if `filterKey` is not specified.")
     ValueProvider<String> getFilterKey();
 
     void setFilterKey(ValueProvider<String> filterKey);
diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
index f5ee4cdbc9..aee2942ca1 100644
--- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
+++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java
@@ -57,7 +57,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 2,
         description = "Database identifier",
-        helpText = "The database unique identifier (uuid).",
+        helpText = "The database unique identifier (UUID).",
         example = "cf7af129-d33a-498f-ad06-d97a6ee6eb7")
     @Validation.Required
     @SuppressWarnings("unused")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
index e1646cfc21..dab4820842 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
@@ -39,7 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Spanner Project ID",
       helpText =
-          "The ID of the Google Cloud project that contains the Spanner database to read change streams from. This  project is also where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.")
+          "The ID of the Google Cloud project that contains the Spanner database to read change streams from. This project is also where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.")
   @Default.String("")
   String getSpannerProjectId();
 
@@ -97,7 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions
       optional = true,
       description = "Cloud Spanner metadata table name",
       helpText =
-          "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table is automatically created during the pipeline flow. You must provide a value for this parameter when updating an existing pipeline. Otherwise, don't use this parameter.")
+          "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table is automatically created during pipeline execution. You must provide a value for this parameter when updating an existing pipeline. Otherwise, don't use this parameter.")
   String getSpannerMetadataTableName();
 
   void setSpannerMetadataTableName(String value);
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 06c1178c23..da1bc74743 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -117,8 +117,8 @@ public interface JdbcToBigQueryOptions
       groupName = "Target",
       description = "BigQuery output table",
       helpText =
-          "BigQuery table location to write the output to. The name should be in the format"
-              + " `<project>:<dataset>.<table_name>`. The table's schema must match input objects.",
+          "The BigQuery table location to write the output to. The name should be in the format"
+              + " `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`. The table's schema must match input objects.",
       example = "<my-project>:<my-dataset>.<my-table>")
   String getOutputTable();
 
@@ -139,7 +139,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Google Cloud KMS key",
       helpText =
-          "The Cloud KMS encryption key to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
+          "The Cloud KMS encryption key to use to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.",
       example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key")
   String getKMSEncryptionKey();
 
@@ -178,7 +178,7 @@ public interface JdbcToBigQueryOptions
 <<<<<<< HEAD
           "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
 =======
-          "If this parameter is provided with `table`, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
+          "If this parameter is provided with the `table` parameter, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
 >>>>>>> 199fa612676178379366597662da7aacd66f4f23
   String getPartitionColumn();
 

From 98c9d4e8b9e21cceffe3af373c129a6a6c615272 Mon Sep 17 00:00:00 2001
From: archanagupta03 <arcgupta@google.com>
Date: Tue, 7 May 2024 11:27:51 +0530
Subject: [PATCH 09/70] Resolving HEAD comments

---
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java      | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index da1bc74743..3478a00404 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -175,11 +175,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The name of a column of numeric type that will be used for partitioning.",
       helpText =
-<<<<<<< HEAD
           "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
-=======
-          "If this parameter is provided with the `table` parameter, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
->>>>>>> 199fa612676178379366597662da7aacd66f4f23
   String getPartitionColumn();
 
   void setPartitionColumn(String partitionColumn);

From 82bb215fd0122309c6555dce5b6e046e8e6af97c Mon Sep 17 00:00:00 2001
From: archanagupta03 <arcgupta@google.com>
Date: Tue, 7 May 2024 12:18:22 +0530
Subject: [PATCH 10/70] Pushing changes after running mvn spotless:apply

---
 .../google/cloud/teleport/bigtable/BigtableToAvro.java   | 3 +--
 .../google/cloud/teleport/spanner/ExportPipeline.java    | 8 ++++----
 .../google/cloud/teleport/spanner/ImportPipeline.java    | 9 +++++----
 .../google/cloud/teleport/templates/PubsubToPubsub.java  | 6 ++----
 .../v2/options/SpannerChangeStreamsToGcsOptions.java     | 3 +--
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java | 8 ++++----
 6 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
index 3b55dd29fe..cc7db434ad 100644
--- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
+++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java
@@ -107,8 +107,7 @@ public interface Options extends PipelineOptions {
     @TemplateParameter.GcsWriteFolder(
         order = 4,
         description = "Output file directory in Cloud Storage",
-        helpText =
-            "The Cloud Storage path where data is written.",
+        helpText = "The Cloud Storage path where data is written.",
         example = "gs://mybucket/somefolder")
     ValueProvider<String> getOutputDirectory();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 8d77654c9f..e40317d257 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -107,8 +107,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 4,
         optional = true,
         description = "Cloud Storage temp directory for storing Avro files",
-        helpText =
-            "The Cloud Storage path where temporary Avro files are written.")
+        helpText = "The Cloud Storage path where temporary Avro files are written.")
     ValueProvider<String> getAvroTempDirectory();
 
     void setAvroTempDirectory(ValueProvider<String> value);
@@ -146,7 +145,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         },
         description = "Snapshot time",
         helpText =
-            "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.",
+            "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC `Zulu` format. The timestamp must be in the past, and maximum timestamp staleness applies.",
         example = "1990-12-31T23:59:60Z")
     @Default.String(value = "")
     ValueProvider<String> getSnapshotTime();
@@ -157,7 +156,8 @@ public interface ExportPipelineOptions extends PipelineOptions {
         order = 8,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "The ID of the Google Cloud project that contains the Spanner database that you want to read data from.")
+        helpText =
+            "The ID of the Google Cloud project that contains the Spanner database that you want to read data from.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
index 2a5fc96327..4376c1d5ea 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
@@ -76,8 +76,7 @@ public interface Options extends PipelineOptions {
         order = 2,
         regexes = {"^[a-z_0-9\\-]+$"},
         description = "Cloud Spanner database ID",
-        helpText =
-            "The database ID of the Spanner database.")
+        helpText = "The database ID of the Spanner database.")
     ValueProvider<String> getDatabaseId();
 
     void setDatabaseId(ValueProvider<String> value);
@@ -166,7 +165,8 @@ public interface Options extends PipelineOptions {
         order = 9,
         optional = true,
         description = "Cloud Spanner Project Id",
-        helpText = "The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.")
+        helpText =
+            "The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.")
     ValueProvider<String> getSpannerProjectId();
 
     void setSpannerProjectId(ValueProvider<String> value);
@@ -178,7 +178,8 @@ public interface Options extends PipelineOptions {
         optional = true,
         regexes = {"[0-9]+"},
         description = "DDL Creation timeout in minutes",
-        helpText = "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.")
+        helpText =
+            "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.")
     @Default.Integer(30)
     ValueProvider<Integer> getDdlCreationTimeoutInMinutes();
 
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
index a090f3bba9..2bd1f94de5 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java
@@ -131,8 +131,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
     @TemplateParameter.PubsubSubscription(
         order = 1,
         description = "Pub/Sub input subscription",
-        helpText =
-            "The Pub/Sub subscription to read the input from.",
+        helpText = "The Pub/Sub subscription to read the input from.",
         example = "projects/your-project-id/subscriptions/your-subscription-name")
     @Validation.Required
     ValueProvider<String> getInputSubscription();
@@ -142,8 +141,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
     @TemplateParameter.PubsubTopic(
         order = 2,
         description = "Output Pub/Sub topic",
-        helpText =
-            "The Pub/Sub topic to write the output to.",
+        helpText = "The Pub/Sub topic to write the output to.",
         example = "projects/your-project-id/topics/your-topic-name")
     @Validation.Required
     ValueProvider<String> getOutputTopic();
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
index dab4820842..bc06022508 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java
@@ -85,8 +85,7 @@ public interface SpannerChangeStreamsToGcsOptions
   @TemplateParameter.Text(
       order = 6,
       description = "Spanner metadata database",
-      helpText =
-          "The Spanner database to use for the change streams connector metadata table.")
+      helpText = "The Spanner database to use for the change streams connector metadata table.")
   @Validation.Required
   String getSpannerMetadataDatabase();
 
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 3478a00404..8d6b0e96d8 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -103,8 +103,7 @@ public interface JdbcToBigQueryOptions
       regexes = {"^.+$"},
       groupName = "Source",
       description = "JDBC source SQL query",
-      helpText =
-          "The query to run on the source to extract the data.",
+      helpText = "The query to run on the source to extract the data.",
       example = "select * from sampledb.sample_table")
   String getQuery();
 
@@ -151,7 +150,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "Whether to use column alias to map the rows.",
       helpText =
-          "If set to `true`, the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.")
+          "If set to `true`, the pipeline uses the column alias (`AS`) instead of the column name to map the rows to BigQuery. Defaults to `false`.")
   @Default.Boolean(false)
   Boolean getUseColumnAlias();
 
@@ -247,7 +246,8 @@ public interface JdbcToBigQueryOptions
       },
       optional = true,
       description = "Create Disposition to use for BigQuery",
-      helpText = "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.")
+      helpText =
+          "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.")
   @Default.String("CREATE_NEVER")
   String getCreateDisposition();
 

From 3954bf219bc78ae70d176394d3cb819c1869ee4a Mon Sep 17 00:00:00 2001
From: Archana Gupta <archanagupta040@gmail.com>
Date: Tue, 7 May 2024 23:25:06 +0530
Subject: [PATCH 11/70] Apply suggestions from code review

Finishing touches as per recommendations suggested

Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com>
---
 .../com/google/cloud/teleport/spanner/ExportPipeline.java     | 4 ++--
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index e40317d257..949201edac 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export Timestamps as Timestamp-micros type",
         helpText =
-            "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.")
+            "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this parameter is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportTimestampAsLogicalType();
 
@@ -190,7 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions {
         optional = true,
         description = "Export necessary Related Spanner tables.",
         helpText =
-            "Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.")
+            "Whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getShouldExportRelatedTables();
 
diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 8d6b0e96d8..3b51908f9f 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -116,7 +116,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Target",
       description = "BigQuery output table",
       helpText =
-          "The BigQuery table location to write the output to. The name should be in the format"
+          "The BigQuery table location to write the output to. Use the format"
               + " `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`. The table's schema must match input objects.",
       example = "<my-project>:<my-dataset>.<my-table>")
   String getOutputTable();
@@ -174,7 +174,7 @@ public interface JdbcToBigQueryOptions
       groupName = "Source",
       description = "The name of a column of numeric type that will be used for partitioning.",
       helpText =
-          "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
+          "If this parameter is provided with the name of the `table` defined as an optional parameter, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.")
   String getPartitionColumn();
 
   void setPartitionColumn(String partitionColumn);

From 1d34d0e688f608c1ad5470207b7946298a061837 Mon Sep 17 00:00:00 2001
From: an2x <52892974+an2x@users.noreply.github.com>
Date: Thu, 9 May 2024 10:42:43 -0400
Subject: [PATCH 12/70] Add support for KAFKA_TOPIC template parameters.

---
 .../teleport/metadata/TemplateParameter.java  | 40 +++++++++++++++++++
 .../teleport/metadata/util/MetadataUtils.java |  1 +
 .../plugin/model/ImageSpecParameter.java      | 17 ++++++++
 .../plugin/model/ImageSpecParameterType.java  |  5 ++-
 .../plugin/model/TemplateDefinitionsTest.java |  3 ++
 .../cloud/teleport/plugin/sample/AtoBOk.java  | 10 ++++-
 6 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java b/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java
index 24e1e67891..15c5d78efb 100644
--- a/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java
+++ b/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java
@@ -701,4 +701,44 @@ public final class TemplateParameter {
     /** Parameter visibility in the UI. */
     boolean hiddenUi() default false;
   }
+
+  /**
+   * Template parameter containing a Kafka Topic.
+   *
+   * <p>The parameter specifies the fully-qualified name of an Apache Kafka topic. This can be
+   * either a Google Managed Kafka topic or a non-managed Kafka topic.
+   */
+  @Retention(RetentionPolicy.RUNTIME)
+  @Target({ElementType.FIELD, ElementType.METHOD})
+  public @interface KafkaTopic {
+    /** Order of appearance. */
+    int order() default 999;
+
+    /** Name of the parameter. */
+    String name() default "";
+
+    /** Group Name of the parameter. */
+    String groupName() default "";
+
+    /** Parent Name of the parameter. */
+    String parentName() default "";
+
+    /** List of parent trigger values. */
+    String[] parentTriggerValues() default "";
+
+    /** If parameter is optional. */
+    boolean optional() default false;
+
+    /** Description of the parameter. */
+    String description();
+
+    /** Help text of the parameter. */
+    String helpText();
+
+    /** Example of the parameter. */
+    String example() default "";
+
+    /** Parameter visibility in the UI. */
+    boolean hiddenUi() default false;
+  }
 }
diff --git a/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java b/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java
index 9eaf664589..da1ced4fb5 100644
--- a/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java
+++ b/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java
@@ -42,6 +42,7 @@ public final class MetadataUtils {
         TemplateParameter.GcsWriteFile.class,
         TemplateParameter.GcsWriteFolder.class,
         TemplateParameter.Integer.class,
+        TemplateParameter.KafkaTopic.class,
         TemplateParameter.KmsEncryptionKey.class,
         TemplateParameter.Long.class,
         TemplateParameter.Password.class,
diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java
index 1774e215f8..4bc677dccf 100644
--- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java
+++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java
@@ -468,6 +468,23 @@ public void processParamType(Annotation parameterAnnotation) {
         this.setHiddenUi(durationParam.hiddenUi());
         this.setParamType(ImageSpecParameterType.TEXT);
         break;
+      case "KafkaTopic":
+        TemplateParameter.KafkaTopic kafkaTopic =
+            (TemplateParameter.KafkaTopic) parameterAnnotation;
+        if (!kafkaTopic.name().isEmpty()) {
+          this.setName(kafkaTopic.name());
+        }
+        processDescriptions(
+            kafkaTopic.groupName(),
+            kafkaTopic.description(),
+            kafkaTopic.helpText(),
+            kafkaTopic.example());
+        this.setParentName(kafkaTopic.parentName());
+        this.setParentTriggerValues(kafkaTopic.parentTriggerValues());
+        this.setOptional(kafkaTopic.optional());
+        this.setHiddenUi(kafkaTopic.hiddenUi());
+        this.setParamType(ImageSpecParameterType.KAFKA_TOPIC);
+        break;
       default:
         throw new IllegalArgumentException("Invalid type " + parameterAnnotation);
     }
diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java
index 7f12cc959e..0d1fd972d6 100644
--- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java
+++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java
@@ -49,5 +49,8 @@ public enum ImageSpecParameterType {
   ENUM,
 
   /** Number parameter. */
-  NUMBER;
+  NUMBER,
+
+  /** Kafka Topic parameter. */
+  KAFKA_TOPIC;
 }
diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java
index 6864e74012..181ff10366 100644
--- a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java
+++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java
@@ -63,6 +63,9 @@ public void testSampleAtoBOk() {
     ImageSpecParameter to = metadata.getParameter("to").get();
     assertEquals(ImageSpecParameterType.BIGQUERY_TABLE, to.getParamType());
 
+    ImageSpecParameter inputKafkaTopic = metadata.getParameter("inputKafkaTopic").get();
+    assertEquals(ImageSpecParameterType.KAFKA_TOPIC, inputKafkaTopic.getParamType());
+
     ImageSpecParameter logical = metadata.getParameter("logical").get();
     assertEquals(ImageSpecParameterType.BOOLEAN, logical.getParamType());
     assertEquals("^(true|false)$", logical.getRegexes().get(0));
diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java
index 491d09f776..f0ea357288 100644
--- a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java
+++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java
@@ -99,12 +99,20 @@ public interface AtoBOptions {
     Boolean getParamWithGroupName();
 
     @TemplateParameter.Text(
-        order = 8,
+        order = 9,
         parentName = "paramWithGroupName",
         parentTriggerValues = {"true"},
         description = "N/A",
         helpText = "Text that has parent name and parent trigger value")
     @Default.Boolean(false)
     Boolean getParamWithParentName();
+
+    @TemplateParameter.KafkaTopic(
+        order = 10,
+        description = "Kafka input topic",
+        helpText = "Kafka topic to trad from",
+        example =
+            "projects/project-foo/locations/us-central1/clusters/cluster-bar/topics/topic-baz")
+    String getInputKafkaTopic();
   }
 }

From c42a7713530700e0e847d5ac8a974d49601557d5 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Thu, 9 May 2024 22:23:39 +0530
Subject: [PATCH 13/70] added unit tests to Spanner transaction writer

---
 .../SpannerTransactionWriterDoFn.java         |  13 ++
 .../SpannerTransactionWriterDoFnTest.java     | 203 ++++++++++++++++++
 2 files changed, 216 insertions(+)
 create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java

diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java
index ec21ac4cee..6c7f9e303c 100644
--- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java
+++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java
@@ -301,4 +301,17 @@ String getTxnTag(PipelineOptions options) {
     }
     return txnTag;
   }
+
+  public void setMapper(ObjectMapper mapper) {
+    this.mapper = mapper;
+  }
+
+  public void setSpannerAccessor(SpannerAccessor spannerAccessor) {
+    this.spannerAccessor = spannerAccessor;
+  }
+
+  public void setChangeEventSessionConvertor(
+      ChangeEventSessionConvertor changeEventSessionConvertor) {
+    this.changeEventSessionConvertor = changeEventSessionConvertor;
+  }
 }
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
new file mode 100644
index 0000000000..c9803eba7f
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.cloud.spanner.DatabaseClient;
+import com.google.cloud.spanner.Options;
+import com.google.cloud.spanner.TransactionRunner;
+import com.google.cloud.teleport.v2.spanner.ddl.Ddl;
+import com.google.cloud.teleport.v2.spanner.migrations.constants.Constants;
+import com.google.cloud.teleport.v2.spanner.migrations.convertors.ChangeEventSessionConvertor;
+import com.google.cloud.teleport.v2.spanner.migrations.schema.Schema;
+import com.google.cloud.teleport.v2.templates.datastream.DatastreamConstants;
+import com.google.cloud.teleport.v2.values.FailsafeElement;
+import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions;
+import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor;
+import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+
+public class SpannerTransactionWriterDoFnTest {
+  @Test
+  void testGetTxnTag() {
+    String[] args = new String[] {"--jobId=123"};
+    SpannerConfig spannerConfig = mock(SpannerConfig.class);
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.fromArgs(args)
+            .withValidation()
+            .as(DataflowWorkerHarnessOptions.class);
+    SpannerTransactionWriterDoFn spannerTransactionWriterDoFn =
+        new SpannerTransactionWriterDoFn(spannerConfig, null, null, null, "", "mysql", false, true);
+    String result = spannerTransactionWriterDoFn.getTxnTag(options);
+    assertEquals(result, "txBy=123");
+  }
+
+  Ddl getTestDdl() {
+    Ddl ddl =
+        Ddl.builder()
+            .createTable("Users")
+            .column("first_name")
+            .string()
+            .max()
+            .endColumn()
+            .column("last_name")
+            .string()
+            .size(5)
+            .endColumn()
+            .column("age")
+            .int64()
+            .endColumn()
+            .primaryKey()
+            .asc("first_name")
+            .desc("last_name")
+            .end()
+            .endTable()
+            .createTable("shadow_Users")
+            .column("first_name")
+            .string()
+            .max()
+            .endColumn()
+            .column("last_name")
+            .string()
+            .size(5)
+            .endColumn()
+            .column("version")
+            .int64()
+            .endColumn()
+            .primaryKey()
+            .asc("first_name")
+            .desc("last_name")
+            .end()
+            .endTable()
+            .build();
+    return ddl;
+  }
+
+  @Test
+  void testProcessElement() throws Exception {
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
+    SpannerConfig spannerConfig = mock(SpannerConfig.class);
+    SpannerAccessor spannerAccessor = mock(SpannerAccessor.class);
+    PCollectionView<Ddl> ddlView = mock(PCollectionView.class);
+    Schema schema = mock(Schema.class);
+    DoFn.ProcessContext processContextMock = mock(DoFn.ProcessContext.class);
+    DatabaseClient databaseClientMock = mock(DatabaseClient.class);
+    TransactionRunner transactionCallableMock = mock(TransactionRunner.class);
+    ValueProvider<Options.RpcPriority> rpcPriorityValueProviderMock = mock(ValueProvider.class);
+    ChangeEventSessionConvertor changeEventSessionConvertor =
+        mock(ChangeEventSessionConvertor.class);
+
+    String[] args = new String[] {"--jobId=123"};
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.fromArgs(args)
+            .withValidation()
+            .as(DataflowWorkerHarnessOptions.class);
+
+    ObjectNode outputObject = mapper.createObjectNode();
+    outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE);
+    outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users");
+    outputObject.put("first_name", "Johny");
+    outputObject.put("last_name", "Depp");
+    outputObject.put("age", 13);
+    outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123);
+    FailsafeElement<String, String> failsafeElement =
+        FailsafeElement.of(outputObject.toString(), outputObject.toString());
+    Ddl ddl = getTestDdl();
+
+    when(processContextMock.element()).thenReturn(failsafeElement);
+    when(processContextMock.sideInput(any())).thenReturn(ddl);
+    when(processContextMock.getPipelineOptions()).thenReturn(options);
+    when(schema.isEmpty()).thenReturn(true);
+    when(rpcPriorityValueProviderMock.get()).thenReturn(Options.RpcPriority.LOW);
+    when(spannerConfig.getRpcPriority()).thenReturn(rpcPriorityValueProviderMock);
+    when(spannerAccessor.getDatabaseClient()).thenReturn(databaseClientMock);
+    when(changeEventSessionConvertor.transformChangeEventData(eq(outputObject), any(), eq(ddl)))
+        .thenReturn(outputObject);
+    when(transactionCallableMock.run(any())).thenReturn(null);
+    when(databaseClientMock.readWriteTransaction(any(), any())).thenReturn(transactionCallableMock);
+
+    SpannerTransactionWriterDoFn spannerTransactionWriterDoFn =
+        new SpannerTransactionWriterDoFn(
+            spannerConfig, ddlView, schema, null, "shadow", "mysql", false, true);
+    spannerTransactionWriterDoFn.setMapper(mapper);
+    spannerTransactionWriterDoFn.setChangeEventSessionConvertor(changeEventSessionConvertor);
+    spannerTransactionWriterDoFn.setSpannerAccessor(spannerAccessor);
+    spannerTransactionWriterDoFn.processElement(processContextMock);
+
+    verify(processContextMock, times(1)).output(any(com.google.cloud.Timestamp.class));
+  }
+
+  @Test
+  void testProcessElementWithInvalidChangeEvent() throws Exception {
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
+    SpannerConfig spannerConfig = mock(SpannerConfig.class);
+    SpannerAccessor spannerAccessor = mock(SpannerAccessor.class);
+    PCollectionView<Ddl> ddlView = mock(PCollectionView.class);
+    Schema schema = mock(Schema.class);
+    DoFn.ProcessContext processContextMock = mock(DoFn.ProcessContext.class);
+    ChangeEventSessionConvertor changeEventSessionConvertor =
+        mock(ChangeEventSessionConvertor.class);
+
+    ObjectNode outputObject = mapper.createObjectNode();
+    outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE);
+    outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users1");
+    outputObject.put("first_name", "Johny");
+    outputObject.put("last_name", "Depp");
+    outputObject.put("age", 13);
+    outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123);
+    FailsafeElement<String, String> failsafeElement =
+        FailsafeElement.of(outputObject.toString(), outputObject.toString());
+    Ddl ddl = getTestDdl();
+
+    when(processContextMock.element()).thenReturn(failsafeElement);
+    when(processContextMock.sideInput(any())).thenReturn(ddl);
+    when(schema.isEmpty()).thenReturn(true);
+    when(changeEventSessionConvertor.transformChangeEventData(eq(outputObject), any(), eq(ddl)))
+        .thenReturn(outputObject);
+
+    SpannerTransactionWriterDoFn spannerTransactionWriterDoFn =
+        new SpannerTransactionWriterDoFn(
+            spannerConfig, ddlView, schema, null, "shadow", "mysql", false, true);
+    spannerTransactionWriterDoFn.setMapper(mapper);
+    spannerTransactionWriterDoFn.setChangeEventSessionConvertor(changeEventSessionConvertor);
+    spannerTransactionWriterDoFn.setSpannerAccessor(spannerAccessor);
+    spannerTransactionWriterDoFn.processElement(processContextMock);
+
+    ArgumentCaptor<FailsafeElement> argument = ArgumentCaptor.forClass(FailsafeElement.class);
+    verify(processContextMock, times(1))
+        .output(eq(SpannerTransactionWriter.PERMANENT_ERROR_TAG), argument.capture());
+    assertEquals(
+        "Table from change event does not exist in Spanner. table=Users1",
+        argument.getValue().getErrorMessage());
+  }
+}

From f24e8b35efaf6d9da9abde35eea8966b0e6f5972 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Fri, 10 May 2024 10:34:22 +0530
Subject: [PATCH 14/70] fix test issue

---
 .../v2/templates/SpannerTransactionWriterDoFnTest.java    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
index c9803eba7f..1109173472 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
@@ -42,12 +42,12 @@
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.values.PCollectionView;
-import org.junit.jupiter.api.Test;
+import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 
 public class SpannerTransactionWriterDoFnTest {
   @Test
-  void testGetTxnTag() {
+  public void testGetTxnTag() {
     String[] args = new String[] {"--jobId=123"};
     SpannerConfig spannerConfig = mock(SpannerConfig.class);
     DataflowWorkerHarnessOptions options =
@@ -102,7 +102,7 @@ Ddl getTestDdl() {
   }
 
   @Test
-  void testProcessElement() throws Exception {
+  public void testProcessElement() throws Exception {
     ObjectMapper mapper = new ObjectMapper();
     mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
     SpannerConfig spannerConfig = mock(SpannerConfig.class);
@@ -157,7 +157,7 @@ void testProcessElement() throws Exception {
   }
 
   @Test
-  void testProcessElementWithInvalidChangeEvent() throws Exception {
+  public void testProcessElementWithInvalidChangeEvent() throws Exception {
     ObjectMapper mapper = new ObjectMapper();
     mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
     SpannerConfig spannerConfig = mock(SpannerConfig.class);

From e8ab30af193e0803d4f73fbb3e1e622e8f0b875e Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Fri, 10 May 2024 15:27:15 +0530
Subject: [PATCH 15/70] removed validation

---
 .../v2/templates/SpannerTransactionWriterDoFnTest.java   | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
index 1109173472..7b4acb6646 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
@@ -45,15 +45,14 @@
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 
+/** Unit tests for SpannerTransactionWriterDoFn class. */
 public class SpannerTransactionWriterDoFnTest {
   @Test
   public void testGetTxnTag() {
     String[] args = new String[] {"--jobId=123"};
     SpannerConfig spannerConfig = mock(SpannerConfig.class);
     DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.fromArgs(args)
-            .withValidation()
-            .as(DataflowWorkerHarnessOptions.class);
+        PipelineOptionsFactory.fromArgs(args).as(DataflowWorkerHarnessOptions.class);
     SpannerTransactionWriterDoFn spannerTransactionWriterDoFn =
         new SpannerTransactionWriterDoFn(spannerConfig, null, null, null, "", "mysql", false, true);
     String result = spannerTransactionWriterDoFn.getTxnTag(options);
@@ -118,9 +117,7 @@ public void testProcessElement() throws Exception {
 
     String[] args = new String[] {"--jobId=123"};
     DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.fromArgs(args)
-            .withValidation()
-            .as(DataflowWorkerHarnessOptions.class);
+        PipelineOptionsFactory.fromArgs(args).as(DataflowWorkerHarnessOptions.class);
 
     ObjectNode outputObject = mapper.createObjectNode();
     outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE);

From 5667766d2964db8c3a883807a4b29317e87d8837 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Fri, 10 May 2024 16:30:50 +0530
Subject: [PATCH 16/70] addressing comments

---
 .../SpannerTransactionWriterDoFnTest.java     | 49 +++++++++++++++++--
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
index 7b4acb6646..f8d3d0b378 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java
@@ -27,7 +27,9 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.cloud.spanner.DatabaseClient;
+import com.google.cloud.spanner.Mutation;
 import com.google.cloud.spanner.Options;
+import com.google.cloud.spanner.TransactionContext;
 import com.google.cloud.spanner.TransactionRunner;
 import com.google.cloud.teleport.v2.spanner.ddl.Ddl;
 import com.google.cloud.teleport.v2.spanner.migrations.constants.Constants;
@@ -35,6 +37,7 @@
 import com.google.cloud.teleport.v2.spanner.migrations.schema.Schema;
 import com.google.cloud.teleport.v2.templates.datastream.DatastreamConstants;
 import com.google.cloud.teleport.v2.values.FailsafeElement;
+import java.util.Iterator;
 import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig;
@@ -111,6 +114,7 @@ public void testProcessElement() throws Exception {
     DoFn.ProcessContext processContextMock = mock(DoFn.ProcessContext.class);
     DatabaseClient databaseClientMock = mock(DatabaseClient.class);
     TransactionRunner transactionCallableMock = mock(TransactionRunner.class);
+    TransactionContext transactionContext = mock(TransactionContext.class);
     ValueProvider<Options.RpcPriority> rpcPriorityValueProviderMock = mock(ValueProvider.class);
     ChangeEventSessionConvertor changeEventSessionConvertor =
         mock(ChangeEventSessionConvertor.class);
@@ -122,10 +126,10 @@ public void testProcessElement() throws Exception {
     ObjectNode outputObject = mapper.createObjectNode();
     outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE);
     outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users");
-    outputObject.put("first_name", "Johny");
+    outputObject.put("first_name", "Johnny");
     outputObject.put("last_name", "Depp");
     outputObject.put("age", 13);
-    outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123);
+    outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 12345);
     FailsafeElement<String, String> failsafeElement =
         FailsafeElement.of(outputObject.toString(), outputObject.toString());
     Ddl ddl = getTestDdl();
@@ -139,7 +143,12 @@ public void testProcessElement() throws Exception {
     when(spannerAccessor.getDatabaseClient()).thenReturn(databaseClientMock);
     when(changeEventSessionConvertor.transformChangeEventData(eq(outputObject), any(), eq(ddl)))
         .thenReturn(outputObject);
-    when(transactionCallableMock.run(any())).thenReturn(null);
+    when(transactionCallableMock.run(any()))
+        .thenAnswer(
+            invocation -> {
+              TransactionRunner.TransactionCallable<Void> callable = invocation.getArgument(0);
+              return callable.run(transactionContext);
+            });
     when(databaseClientMock.readWriteTransaction(any(), any())).thenReturn(transactionCallableMock);
 
     SpannerTransactionWriterDoFn spannerTransactionWriterDoFn =
@@ -149,6 +158,38 @@ public void testProcessElement() throws Exception {
     spannerTransactionWriterDoFn.setChangeEventSessionConvertor(changeEventSessionConvertor);
     spannerTransactionWriterDoFn.setSpannerAccessor(spannerAccessor);
     spannerTransactionWriterDoFn.processElement(processContextMock);
+    ArgumentCaptor<Iterable<Mutation>> argument = ArgumentCaptor.forClass(Iterable.class);
+    verify(transactionContext, times(1)).buffer(argument.capture());
+    Iterable<Mutation> capturedMutations = argument.getValue();
+    Iterator<Mutation> mutationIterator = capturedMutations.iterator();
+    Mutation actualDataMutation = null;
+    Mutation actualShadowTableMutation = null;
+
+    if (mutationIterator.hasNext()) {
+      // Get the first mutation
+      actualDataMutation = mutationIterator.next();
+
+      if (mutationIterator.hasNext()) {
+        // Get the second mutation
+        actualShadowTableMutation = mutationIterator.next();
+      }
+    }
+
+    Mutation.WriteBuilder dataBuilder = Mutation.newInsertOrUpdateBuilder("Users");
+    dataBuilder.set("first_name").to("Johnny");
+    dataBuilder.set("last_name").to("Depp");
+    dataBuilder.set("age").to(13);
+    Mutation expectedDataMutation = dataBuilder.build();
+    assertEquals(actualDataMutation, expectedDataMutation);
+
+    Mutation.WriteBuilder shadowBuilder = Mutation.newInsertOrUpdateBuilder("shadow_Users");
+    shadowBuilder.set("first_name").to("Johnny");
+    shadowBuilder.set("last_name").to("Depp");
+    shadowBuilder.set("timestamp").to(12345);
+    shadowBuilder.set("log_file").to("");
+    shadowBuilder.set("log_position").to(-1);
+    Mutation expectedShadowMutation = shadowBuilder.build();
+    assertEquals(actualShadowTableMutation, expectedShadowMutation);
 
     verify(processContextMock, times(1)).output(any(com.google.cloud.Timestamp.class));
   }
@@ -168,7 +209,7 @@ public void testProcessElementWithInvalidChangeEvent() throws Exception {
     ObjectNode outputObject = mapper.createObjectNode();
     outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE);
     outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users1");
-    outputObject.put("first_name", "Johny");
+    outputObject.put("first_name", "Johnny");
     outputObject.put("last_name", "Depp");
     outputObject.put("age", 13);
     outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123);

From 46ada98784d20568e348c5e775902c78b1e636a3 Mon Sep 17 00:00:00 2001
From: Jeffrey Kinard <jeff@thekinards.com>
Date: Fri, 10 May 2024 12:26:55 -0400
Subject: [PATCH 17/70] Add removed files from bad merge 1490

Signed-off-by: Jeffrey Kinard <jeff@thekinards.com>
---
 .github/actions/setup-env/action.yml          |   5 +-
 .github/actions/setup-java-env/action.yml     |   4 +-
 .github/scripts/configure-runners.sh          |  17 +-
 .github/scripts/startup-script.sh             |  26 +-
 .github/workflows/go-pr.yml                   |   8 +-
 .github/workflows/java-pr.yml                 |   4 +
 .github/workflows/prepare-java-cache.yml      |   2 +-
 .github/workflows/release.yml                 |   2 +-
 .github/workflows/scorecards-analysis.yml     |   2 +-
 contributor-docs/maintainers-guide.md         |  71 ++++-
 .../BigQueryResourceManagerUtils.java         |  30 +-
 .../BigtableResourceManagerUtils.java         |  86 +++++-
 .../DatastreamResourceManagerUtils.java       |  30 +-
 .../utils/SpannerResourceManagerUtils.java    |  53 +++-
 .../bigquery/BigQueryResourceManagerTest.java |   3 +-
 .../bigtable/BigtableResourceManagerTest.java |   2 +-
 .../BigtableResourceManagerUtilsTest.java     |   5 +-
 .../spanner/SpannerResourceManagerTest.java   |   4 +-
 .../SpannerResourceManagerUtilsTest.java      |  38 ++-
 .../plugin/PythonDockerfileGenerator.java     |  76 -----
 .../plugin/XlangDockerfileGenerator.java      | 116 +++++++
 .../main/resources/Dockerfile-template-xlang  |  72 +++++
 .../main/resources/Dockerfile-xlang-template  |  51 ----
 .../plugin/XlangDockerfileGeneratorTest.java  |  58 ++++
 .../plugin/maven/TemplatesStageMojo.java      |  11 +-
 pom.xml                                       |   1 +
 .../common/JavascriptTextTransformer.java     |   2 +-
 .../v2/templates/DataStreamToSpanner.java     |   4 +-
 .../datastream/ChangeEventContextFactory.java |  11 +-
 .../spanner/ProcessInformationSchema.java     |  14 +
 .../DataStreamToSpannerDatatypeIT.java        | 131 ++++----
 ...igrationWithMigrationShardIdColumnIT.java} |  42 +--
 ...rationWithoutMigrationShardIdColumnIT.java | 286 ++++++++++++++++++
 .../v2/templates/DataStreamToSpannerTest.java |  67 ++++
 .../ChangeEventContextFactoryTest.java        |  81 +++++
 .../spanner/ProcessInformationSchemaTest.java |  63 ++++
 .../AllDatatypeColumns-statements.sql         |  73 +++++
 .../AllDatatypeColumns2-statements.sql        |  70 +++++
 .../mysql-backfill-AllDatatypeColumns.avro    | Bin 0 -> 4398 bytes
 .../mysql-backfill-AllDatatypeColumns.jsonl   |   2 -
 .../mysql-backfill-AllDatatypeColumns2.avro   | Bin 0 -> 4317 bytes
 .../mysql-backfill-AllDatatypeColumns2.jsonl  |   2 -
 .../mysql-cdc-AllDatatypeColumns.jsonl        |   2 -
 .../mysql-cdc-AllDatatypeColumns2.avro        | Bin 0 -> 4382 bytes
 .../mysql-cdc-AllDatatypeColumns2.jsonl       |   2 -
 .../mysql-cdc1-AllDatatypeColumns.avro        | Bin 0 -> 3780 bytes
 .../mysql-cdc2-AllDatatypeColumns.avro        | Bin 0 -> 3791 bytes
 .../Movie-shard1.jsonl                        |   0
 .../Movie-shard2.jsonl                        |   0
 .../Users-backfill-logical-shard1.jsonl       |   0
 .../Users-backfill-logical-shard2.jsonl       |   0
 .../Users-backfill-logical-shard3.jsonl       |   0
 .../Users-backfill-logical-shard4.jsonl       |   0
 .../Users-cdc-shard1.jsonl                    |   0
 .../Users-cdc-shard2.jsonl                    |   0
 .../mysql-schema.sql                          |   0
 .../mysql-session.json                        |   0
 .../spanner-schema.sql                        |   0
 .../transformation-context-shard1.json        |   0
 .../transformation-context-shard2.json        |   0
 .../Users-backfill-logical-shard1.jsonl       |   3 +
 .../Users-backfill-logical-shard2.jsonl       |   3 +
 .../Users-backfill-logical-shard3.jsonl       |   3 +
 .../Users-backfill-logical-shard4.jsonl       |   3 +
 .../Users-cdc-shard1.jsonl                    |   6 +
 .../Users-cdc-shard2.jsonl                    |   6 +
 .../mysql-schema.sql                          |   6 +
 .../spanner-schema.sql                        |   5 +
 .../src/main/resources/requirements.txt       |   1 -
 .../src/main/resources/requirements.txt       |   1 -
 .../neo4j/templates/ConstraintsIndicesIT.java |   2 +
 .../teleport/v2/templates/GCSToSplunk.java    | 100 ++++--
 .../transforms/BinaryAvroDeserializer.java    |  10 +-
 .../transforms/BinaryAvroSerializer.java      |  13 +-
 .../v2/templates/KafkaToBigQueryFlex.java     |  11 +-
 .../teleport/v2/transforms/AvroTransform.java |  21 +-
 v2/kafka-to-gcs/pom.xml                       |  11 +
 .../v2/transforms/AvroWriteTransform.java     |   2 +
 .../teleport/v2/templates/KafkaToGcsIT.java   | 178 +++++++++++
 .../resources/KafkaToGcsIT/avro_schema.avsc   |  15 +
 .../src/main/resources/requirements.txt       |   1 -
 .../src/main/resources/requirements.txt       |   1 -
 v2/sourcedb-to-spanner/pom.xml                |  11 +
 .../v2/options/OptionsToConfigBuilder.java    |  98 ++++++
 .../v2/options/SourceDbToSpannerOptions.java  | 120 ++++++--
 .../v2/source/DataSourceProvider.java         | 145 ---------
 .../teleport/v2/source/reader/ReaderImpl.java |  55 ++++
 .../v2/source/reader/auth/dbauth/DbAuth.java  |  40 +++
 .../dbauth/GuardedStringValueProvider.java    |  80 +++++
 .../auth/dbauth/LocalCredentialsProvider.java |  59 ++++
 .../reader/auth/dbauth/package-info.java      |  17 ++
 .../v2/source/reader/auth/package-info.java   |  18 ++
 .../v2/source/reader/io/IoWrapper.java        |  30 ++
 .../mysql/MysqlDialectAdapter.java            |   2 +-
 .../io/jdbc/iowrapper/JdbcIoWrapper.java      | 201 ++++++++++++
 .../iowrapper/config/JdbcIOWrapperConfig.java | 149 +++++++++
 .../io/jdbc/iowrapper/config/TableConfig.java |  81 +++++
 .../config/defaults/MySqlConfigDefaults.java  |  58 ++++
 .../config/defaults/package-info.java         |  18 ++
 .../jdbc/iowrapper/config/package-info.java   |  18 ++
 .../io/jdbc/iowrapper/package-info.java       |  18 ++
 .../io/jdbc/rowmapper/JdbcValueMapper.java    |   3 +-
 .../rowmapper/JdbcValueMappingsProvider.java  |   3 +-
 .../rowmapper/ResultSetValueExtractor.java    |   3 +-
 .../jdbc/rowmapper/ResultSetValueMapper.java  |   3 +-
 .../reader/io/schema/SourceTableSchema.java   |  15 +-
 .../io/transform/AccumulatingTableReader.java |   6 +-
 .../reader/io/transform/ReaderTransform.java  |   9 +-
 .../v2/templates/SourceDbToSpanner.java       | 136 +++++----
 .../transformer/SourceRowToMutationDoFn.java  |   7 +-
 .../options/OptionsToConfigBuilderTest.java   |  68 +++++
 .../v2/source/DataSourceProviderTest.java     |  51 ----
 .../GuardedStringValueProviderTest.java       |  34 +++
 .../dbauth/LocalCredentialsProviderTest.java  |  39 +++
 .../mysql/MysqlDialectAdapterTest.java        |   2 +-
 .../iowrapper/config/TableConfigTest.java     |  74 +++++
 .../avro/GenericRecordTypeConvertor.java      |  96 ++++--
 .../avro/GenericRecordTypeConvertorTest.java  | 203 +++++++++++--
 .../resources/avro/all-spanner-types.avsc     |  62 ----
 .../resources/avro/logical-types-schema.avsc  |  51 ----
 120 files changed, 3321 insertions(+), 793 deletions(-)
 create mode 100644 plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java
 create mode 100644 plugins/core-plugin/src/main/resources/Dockerfile-template-xlang
 delete mode 100644 plugins/core-plugin/src/main/resources/Dockerfile-xlang-template
 create mode 100644 plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java
 rename v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/{DataStreamToSpannerShardedMigrationIT.java => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java} (85%)
 create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java
 create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java
 create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc1-AllDatatypeColumns.avro
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Movie-shard1.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Movie-shard2.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard1.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard2.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard3.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard4.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-cdc-shard1.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-cdc-shard2.jsonl (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/mysql-schema.sql (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/mysql-session.json (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/spanner-schema.sql (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/transformation-context-shard1.json (100%)
 rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/transformation-context-shard2.json (100%)
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql
 delete mode 100644 v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt
 delete mode 100644 v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt
 create mode 100644 v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java
 create mode 100644 v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc
 delete mode 100644 v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt
 delete mode 100644 v2/pubsub-to-mongodb/src/main/resources/requirements.txt
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java
 delete mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java
 create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java
 delete mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java
 create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java
 create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java
 create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java
 delete mode 100644 v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc
 delete mode 100644 v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc

diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml
index 69ba7f1575..4f2cb143e2 100644
--- a/.github/actions/setup-env/action.yml
+++ b/.github/actions/setup-env/action.yml
@@ -55,12 +55,13 @@ runs:
   using: 'composite'
   steps:
     - name: Setup Go
-      uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf
+      uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
       with:
         go-version: ${{ inputs.go-version }}
+        cache: false
     - name: Get Changed Files
       id: changed-files
-      uses: tj-actions/changed-files@61ee456a9d0f512e7ecfdf28863634c97dae2d16
+      uses: tj-actions/changed-files@4c5f5d698fbf2d763d5f13815ac7c2ccbef1ff7f # v44.2.0
       with:
         separator: ','
     # It shouldn't take too long to build all of this, and it will at least
diff --git a/.github/actions/setup-java-env/action.yml b/.github/actions/setup-java-env/action.yml
index d13a82281a..c3b50c7158 100644
--- a/.github/actions/setup-java-env/action.yml
+++ b/.github/actions/setup-java-env/action.yml
@@ -40,7 +40,7 @@ runs:
   using: 'composite'
   steps:
     - name: Setup Java
-      uses: actions/setup-java@a12e082d834968c1847f782019214fadd20719f6
+      uses: actions/setup-java@5896cecc08fd8a1fbdfaf517e29b571164b031f7 # v4.2.0
       with:
         distribution: 'temurin'
         java-version: ${{ inputs.java-version }}
@@ -57,7 +57,7 @@ runs:
           echo "YESTERDAY=$KEY" >> $GITHUB_ENV
         fi
     - name: Setup Cache
-      uses: actions/cache@72d1e4fdff0ff7b1b6e86b415f2d4f5941e5c006
+      uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 # v4.0.1
       id: setup-cache
       with:
         path: |
diff --git a/.github/scripts/configure-runners.sh b/.github/scripts/configure-runners.sh
index eeee825d19..a51b9aaf7e 100755
--- a/.github/scripts/configure-runners.sh
+++ b/.github/scripts/configure-runners.sh
@@ -13,7 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# For running the script, see go/templates-gitactions-script
+# For running the script, see
+# https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/contributor-docs/maintainers-guide.md#provision-new-runners
 
 # Defaults
 NAME_SUFFIX="it"
@@ -22,6 +23,10 @@ BASE_NAME="gitactions-runner"
 REPO_NAME="DataflowTemplates"
 REPO_OWNER="GoogleCloudPlatform"
 GH_RUNNER_VERSION="2.299.1"
+
+MACHINE_TYPE="n1-highmem-32"
+BOOT_DISK_SIZE="200GB"
+
 VERBOSE=0
 
 ############################################################
@@ -44,6 +49,8 @@ Help()
    echo "o     (optional) Set the owner of the GitHub repo. Default '$REPO_OWNER'"
    echo "s     (optional) Set the number of runners. Default $SIZE"
    echo "v     (optional) Set the gitactions runner version. Default $GH_RUNNER_VERSION"
+   echo "m     (optional) Set the machine type for the GCE VM runner. $MACHINE_TYPE"
+   echo "b     (optional) Set the boot disk size for the GCE VM runner. $BOOT_DISK_SIZE"
    echo "V     Verbose mode."
    echo "h     Print this Help."
    echo
@@ -79,6 +86,10 @@ while getopts ":h:Vp:a:t:n:S:r:o:s:v:" option; do
          SIZE=$OPTARG;;
       v) # Enter a version
          GH_RUNNER_VERSION=$OPTARG;;
+      m) # Enter a machine type
+         MACHINE_TYPE=$OPTARG;;
+      b) # Enter a boot disk size
+         BOOT_DISK_SIZE=$OPTARG;;
       V) # Verbose
          VERBOSE=1;;
       \?) # Invalid option
@@ -163,8 +174,6 @@ gcloud secrets add-iam-policy-binding $SECRET_NAME \
 IMAGE_FAMILY="ubuntu-2004-lts"
 IMAGE_PROJECT="ubuntu-os-cloud"
 BOOT_DISK_TYPE="pd-balanced"
-BOOT_DISK_SIZE="200GB"
-MACHINE_TYPE="n1-highmem-16"
 SCOPE="cloud-platform"
 if [ $VERBOSE -eq 1 ]; then echo; echo "Creating instance template: $INSTANCE_TEMPLATE_NAME..."; fi
 if [ $VERBOSE -eq 1 ]; then
@@ -181,7 +190,7 @@ gcloud compute instance-templates create $INSTANCE_TEMPLATE_NAME \
   --image-project=$IMAGE_PROJECT \
   --boot-disk-type=$BOOT_DISK_TYPE \
   --boot-disk-size=$BOOT_DISK_SIZE \
-  --machine-type="MACHINE_TYPE" \
+  --machine-type=$MACHINE_TYPE \
   --scopes=$SCOPE \
   --service-account=${SA_EMAIL} \
   --metadata-from-file=startup-script=startup-script-${NAME_SUFFIX}.sh,shutdown-script=shutdown-script-${NAME_SUFFIX}.sh
diff --git a/.github/scripts/startup-script.sh b/.github/scripts/startup-script.sh
index 5ee8470b18..fa782dae38 100644
--- a/.github/scripts/startup-script.sh
+++ b/.github/scripts/startup-script.sh
@@ -22,30 +22,28 @@ ulimit -n 65536
 # increase max virtual memory
 sudo sysctl -w vm.max_map_count=262144
 
+# update git
+sudo add-apt-repository ppa:git-core/ppa -y
+sudo apt update
+sudo apt install git -y
+
 # install jq
-apt-get update
-apt-get -y install jq
+sudo apt install jq -y
 
 # install maven
-sudo apt update
 sudo apt install git maven -y
 
-# update git
-sudo add-apt-repository ppa:git-core/ppa -y
-sudo apt-get update
-sudo apt-get install git -y
-
 # install gh
-sudo type -p curl >/dev/null || (sudo apt update && sudo apt install curl -y)
-sudo curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
+sudo apt install curl -y \
+&& sudo curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
 && sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
 && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
 && sudo apt update \
 && sudo apt install gh -y
 
 # install docker
-sudo apt-get update
-sudo apt-get install \
+sudo apt update
+sudo apt install \
     ca-certificates \
     curl \
     gnupg \
@@ -55,8 +53,8 @@ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o
 echo \
   "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
   $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
-sudo apt-get update
-sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y
+sudo apt update
+sudo apt install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y
 
 # add user to docker group
 sudo groupadd docker
diff --git a/.github/workflows/go-pr.yml b/.github/workflows/go-pr.yml
index a7b73b1d06..7e1573f6fd 100644
--- a/.github/workflows/go-pr.yml
+++ b/.github/workflows/go-pr.yml
@@ -35,9 +35,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Code
-        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+        uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
       - name: Setup Go
-        uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf
+        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
         with:
           go-version: '1.21'
       - name: Run Fmt
@@ -53,9 +53,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Code
-        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+        uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
       - name: Setup Go
-        uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf
+        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
         with:
           go-version: '1.21'
       # By nature, this also makes sure that everything builds
diff --git a/.github/workflows/java-pr.yml b/.github/workflows/java-pr.yml
index 887b3b2ab7..ec617ebae7 100644
--- a/.github/workflows/java-pr.yml
+++ b/.github/workflows/java-pr.yml
@@ -31,6 +31,10 @@ on:
     - cron: "0 */12 * * *"
   workflow_dispatch:
 
+concurrency:
+  group: java-pr-${{ github.event.issue.number || github.run_id }}
+  cancel-in-progress: true
+
 env:
   MAVEN_OPTS: -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=error
 
diff --git a/.github/workflows/prepare-java-cache.yml b/.github/workflows/prepare-java-cache.yml
index 399a3e8bd1..33af88c88b 100644
--- a/.github/workflows/prepare-java-cache.yml
+++ b/.github/workflows/prepare-java-cache.yml
@@ -58,7 +58,7 @@ jobs:
         run: |
           echo "CACHE_KEY=''" >> $GITHUB_ENV
       - name: Checkout Code
-        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+        uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
       - name: Setup Java
         id: setup-java
         uses: ./.github/actions/setup-java-env
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 01dd51cd82..4d4657c743 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ permissions:
 jobs:
   release:
     name: Create Release
-    runs-on: [self-hosted, it]
+    runs-on: [self-hosted, release]
     steps:
     - name: Get releaser identity
       run: |
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index e066e6bbc0..f7cbc7b10e 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -22,7 +22,7 @@ jobs:
 
     steps:
       - name: "Checkout code"
-        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+        uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
         with:
           persist-credentials: false
 
diff --git a/contributor-docs/maintainers-guide.md b/contributor-docs/maintainers-guide.md
index 6099961058..f6cdbe7be5 100644
--- a/contributor-docs/maintainers-guide.md
+++ b/contributor-docs/maintainers-guide.md
@@ -56,4 +56,73 @@ This repo's code currently is mirrored in Google's internal source control syste
 6) Approve the change internally. At this point, the change will get automatically merged internally and externally.
 
 We are actively working to deprecate this process and use GitHub as the only source of truth.
-If you encounter unresolvable issues with this flow, please reach out to the Dataflow team directly.
\ No newline at end of file
+If you encounter unresolvable issues with this flow, please reach out to the Dataflow team directly.
+
+## GitHub actions
+
+There are several workflows that leverage GitHub actions to keep the repo healthy. Of these workflows, there are 
+currently 2 that are run on self-hosted runners on GCP - [Java PR](../.github/workflows/java-pr.yml) which is used to 
+test PR's and [Release](../.github/workflows/release.yml) which is the workflow used for releasing new templates each 
+week.
+
+### Provision new runners
+
+There are instances where we may need to re-provision self-hosted runners, due to unexpected failures, updating 
+dependencies, increasing memory, etc. In these cases, there are helper scripts to aid in redeployment of the GitHub 
+actions runners.
+
+There are 3 scripts: [configure-runners.sh](../.github/scripts/configure-runners.sh), 
+[startup-script.sh](../.github/scripts/startup-script.sh) and 
+[shutdown-script.sh](../.github/scripts/shutdown-script.sh). The first is the main script used to provision the runners 
+themselves. The startup script is what will be invoked by the GCE VM as it is booted up for the first time and will 
+install all necessary packages needed by IT's, unit tests, Release, etc. as well as link the machine as a runner for the 
+repo. Likewise, the shutdown script is run when the VM is shutdown.
+
+To provision GitHub actions runners, there are a couple prerequisites
+- Must be a maintainer of the repo
+- Must have access to GCP project cloud-teleport-testing
+
+Things to remember:
+- Running the script will tear down existing runners and provision new ones. This will kill any actions currently
+running on any of the runners. Failure to spin up new runner correctly will block PR's and Releases, so use carefully.
+- After running the script, it is likely the old runners will still be listed under
+https://github.com/GoogleCloudPlatform/DataflowTemplates/settings/actions/runners. Simply force remove these to keep the
+repo clean
+- The commands below will demonstrate how to provision runners for use with our workflows as they exist today. If there
+arises a need to provision runners in a different manner, feel free to modify the scripts directly and open a PR with 
+the necessary changes.
+
+To run the configuration script:
+
+1. Set gcloud project to cloud-teleport-testing if not already set
+    ```
+    gcloud config set project cloud-teleport-testing
+    ```
+
+2. Export the GitHub actions token
+    ```
+    GITACTIONS_TOKEN=$(gcloud secrets versions access latest --secret=gitactions-runner-secret)
+    ```
+
+3. Run the script
+   
+   * For IT runners:
+   
+      ```
+      ./configure-runners.sh \
+        -p cloud-teleport-testing \
+        -a 269744978479-compute@developer.gserviceaccount.com \
+        -t $GITACTIONS_TOKEN
+      ```
+   
+   * For Performance Test Runner
+      ```
+      ./configure-runners.sh \
+        -p cloud-teleport-testing \
+        -a 269744978479-compute@developer.gserviceaccount.com \
+        -t $GITACTIONS_TOKEN \
+        -S perf \
+        -s 1
+      ```
+
+**Note**: To see optional configurable parameters, run `./configure-runners.sh -h`
\ No newline at end of file
diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java
index f2b6849caa..76be7ffcdf 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java
@@ -22,17 +22,18 @@
 import com.google.cloud.bigquery.TableId;
 import java.time.format.DateTimeFormatter;
 import java.util.regex.Pattern;
+import org.apache.commons.lang3.RandomStringUtils;
 
 /** Utilities for {@link BigQueryResourceManager} implementations. */
 public final class BigQueryResourceManagerUtils {
 
   private static final int MAX_DATASET_ID_LENGTH = 1024;
   private static final Pattern ILLEGAL_DATASET_ID_CHARS = Pattern.compile("[^a-zA-Z0-9_]");
+  private static final String REPLACE_CHAR = "_";
   private static final int MIN_TABLE_ID_LENGTH = 1;
   private static final int MAX_TABLE_ID_LENGTH = 1024;
   private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_]");
-  private static final DateTimeFormatter TIME_FORMAT =
-      DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss_SSSSSS");
+  private static final String TIME_FORMAT = "yyyyMMdd_HHmmss";
 
   private BigQueryResourceManagerUtils() {}
 
@@ -46,8 +47,31 @@ private BigQueryResourceManagerUtils() {}
    * @return a BigQuery compatible dataset name.
    */
   static String generateDatasetId(String datasetName) {
+
+    // Take substring of datasetName to account for random suffix
+    // TODO(polber) - remove with Beam 2.57.0
+    int randomSuffixLength = 6;
+    datasetName =
+        datasetName.substring(
+            0,
+            Math.min(
+                datasetName.length(),
+                MAX_DATASET_ID_LENGTH
+                    - REPLACE_CHAR.length()
+                    - TIME_FORMAT.length()
+                    - REPLACE_CHAR.length()
+                    - randomSuffixLength));
+
+    // Add random suffix to avoid collision
+    // TODO(polber) - remove with Beam 2.57.0
     return generateResourceId(
-        datasetName, ILLEGAL_DATASET_ID_CHARS, "_", MAX_DATASET_ID_LENGTH, TIME_FORMAT);
+            datasetName,
+            ILLEGAL_DATASET_ID_CHARS,
+            REPLACE_CHAR,
+            MAX_DATASET_ID_LENGTH,
+            DateTimeFormatter.ofPattern(TIME_FORMAT))
+        + REPLACE_CHAR
+        + RandomStringUtils.randomAlphanumeric(6).toLowerCase();
   }
 
   /**
diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java
index 28f1f5bf60..a719805884 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java
@@ -24,6 +24,7 @@
 import java.util.List;
 import java.util.regex.Pattern;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
+import org.apache.commons.lang3.RandomStringUtils;
 
 /** Utilities for {@link BigtableResourceManager} implementations. */
 public final class BigtableResourceManagerUtils {
@@ -39,8 +40,7 @@ public final class BigtableResourceManagerUtils {
   private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_.]");
   private static final String REPLACE_TABLE_ID_CHAR = "-";
 
-  private static final DateTimeFormatter TIME_FORMAT =
-      DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS");
+  private static final String TIME_FORMAT = "yyyyMMdd-HHmmss";
 
   private BigtableResourceManagerUtils() {}
 
@@ -57,13 +57,35 @@ private BigtableResourceManagerUtils() {}
   static List<BigtableResourceManagerCluster> generateDefaultClusters(
       String baseString, String zone, int numNodes, StorageType storageType) {
 
+    // Take substring of baseString to account for random suffix
+    // TODO(polber) - remove with Beam 2.57.0
+    int randomSuffixLength = 6;
+    baseString =
+        baseString
+            .toLowerCase()
+            .substring(
+                0,
+                Math.min(
+                    baseString.length(),
+                    MAX_CLUSTER_ID_LENGTH
+                        - REPLACE_CLUSTER_CHAR.length()
+                        - TIME_FORMAT.length()
+                        - REPLACE_CLUSTER_CHAR.length()
+                        - randomSuffixLength));
+
     String clusterId =
         generateResourceId(
             baseString.toLowerCase(),
             ILLEGAL_CLUSTER_CHARS,
             REPLACE_CLUSTER_CHAR,
             MAX_CLUSTER_ID_LENGTH,
-            TIME_FORMAT);
+            DateTimeFormatter.ofPattern(TIME_FORMAT));
+
+    // Add random suffix to avoid collision
+    // TODO(polber) - remove with Beam 2.57.0
+    clusterId =
+        clusterId + REPLACE_CLUSTER_CHAR + RandomStringUtils.randomAlphanumeric(6).toLowerCase();
+
     BigtableResourceManagerCluster cluster =
         BigtableResourceManagerCluster.create(clusterId, zone, numNodes, storageType);
 
@@ -77,12 +99,31 @@ static List<BigtableResourceManagerCluster> generateDefaultClusters(
    * @return The instance id string.
    */
   static String generateInstanceId(String baseString) {
+
+    // Take substring of baseString to account for random suffix
+    // TODO(polber) - remove with Beam 2.57.0
+    int randomSuffixLength = 6;
+    baseString =
+        baseString.substring(
+            0,
+            Math.min(
+                baseString.length(),
+                MAX_INSTANCE_ID_LENGTH
+                    - REPLACE_INSTANCE_ID_CHAR.length()
+                    - TIME_FORMAT.length()
+                    - REPLACE_INSTANCE_ID_CHAR.length()
+                    - randomSuffixLength));
+
+    // Add random suffix to avoid collision
+    // TODO(polber) - remove with Beam 2.57.0
     return generateResourceId(
-        baseString.toLowerCase(),
-        ILLEGAL_INSTANCE_ID_CHARS,
-        REPLACE_INSTANCE_ID_CHAR,
-        MAX_INSTANCE_ID_LENGTH,
-        TIME_FORMAT);
+            baseString.toLowerCase(),
+            ILLEGAL_INSTANCE_ID_CHARS,
+            REPLACE_INSTANCE_ID_CHAR,
+            MAX_INSTANCE_ID_LENGTH,
+            DateTimeFormatter.ofPattern(TIME_FORMAT))
+        + REPLACE_INSTANCE_ID_CHAR
+        + RandomStringUtils.randomAlphanumeric(6).toLowerCase();
   }
 
   /**
@@ -92,12 +133,31 @@ static String generateInstanceId(String baseString) {
    * @return The instance id string.
    */
   public static String generateTableId(String baseString) {
+
+    // Take substring of baseString to account for random suffix
+    // TODO(polber) - remove with Beam 2.57.0
+    int randomSuffixLength = 6;
+    baseString =
+        baseString.substring(
+            0,
+            Math.min(
+                baseString.length(),
+                MAX_TABLE_ID_LENGTH
+                    - REPLACE_TABLE_ID_CHAR.length()
+                    - TIME_FORMAT.length()
+                    - REPLACE_TABLE_ID_CHAR.length()
+                    - randomSuffixLength));
+
+    // Add random suffix to avoid collision
+    // TODO(polber) - remove with Beam 2.57.0
     return generateResourceId(
-        baseString.toLowerCase(),
-        ILLEGAL_TABLE_CHARS,
-        REPLACE_TABLE_ID_CHAR,
-        MAX_TABLE_ID_LENGTH,
-        TIME_FORMAT);
+            baseString.toLowerCase(),
+            ILLEGAL_TABLE_CHARS,
+            REPLACE_TABLE_ID_CHAR,
+            MAX_TABLE_ID_LENGTH,
+            DateTimeFormatter.ofPattern(TIME_FORMAT))
+        + REPLACE_TABLE_ID_CHAR
+        + RandomStringUtils.randomAlphanumeric(6).toLowerCase();
   }
 
   /**
diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java
index ec9dd9c256..24b5269f85 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java
@@ -21,13 +21,14 @@
 
 import java.time.format.DateTimeFormatter;
 import java.util.regex.Pattern;
+import org.apache.commons.lang3.RandomStringUtils;
 
 public class DatastreamResourceManagerUtils {
 
   private static final int MAX_RESOURCE_ID_LENGTH = 60;
   private static final Pattern ILLEGAL_RESOURCE_ID_CHARS = Pattern.compile("[^a-zA-Z0-9- ]");
-  private static final DateTimeFormatter TIME_FORMAT =
-      DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS");
+  private static final String REPLACE_CHAR = "_";
+  private static final String TIME_FORMAT = "yyyyMMdd-HHmmss";
 
   private DatastreamResourceManagerUtils() {}
 
@@ -41,7 +42,30 @@ private DatastreamResourceManagerUtils() {}
    * @return a Datastream compatible resource ID.
    */
   static String generateDatastreamId(String resourceId) {
+
+    // Take substring of baseString to account for random suffix
+    // TODO(polber) - remove with Beam 2.57.0
+    int randomSuffixLength = 6;
+    resourceId =
+        resourceId.substring(
+            0,
+            Math.min(
+                resourceId.length(),
+                MAX_RESOURCE_ID_LENGTH
+                    - REPLACE_CHAR.length()
+                    - TIME_FORMAT.length()
+                    - REPLACE_CHAR.length()
+                    - randomSuffixLength));
+
+    // Add random suffix to avoid collision
+    // TODO(polber) - remove with Beam 2.57.0
     return generateResourceId(
-        resourceId, ILLEGAL_RESOURCE_ID_CHARS, "-", MAX_RESOURCE_ID_LENGTH, TIME_FORMAT);
+            resourceId,
+            ILLEGAL_RESOURCE_ID_CHARS,
+            REPLACE_CHAR,
+            MAX_RESOURCE_ID_LENGTH,
+            DateTimeFormatter.ofPattern(TIME_FORMAT))
+        + REPLACE_CHAR
+        + RandomStringUtils.randomAlphanumeric(6).toLowerCase();
   }
 }
diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java
index 3dce40511a..9e40637e18 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java
@@ -25,6 +25,7 @@
 import java.util.regex.Pattern;
 import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CharMatcher;
+import org.apache.commons.lang3.RandomStringUtils;
 
 /** Utilities for {@link SpannerResourceManager} implementations. */
 public final class SpannerResourceManagerUtils {
@@ -34,10 +35,8 @@ public final class SpannerResourceManagerUtils {
   private static final String REPLACE_DATABASE_CHAR = "_";
   public static final int MAX_INSTANCE_ID_LENGTH = 30;
   public static final int MAX_DATABASE_ID_LENGTH = 30;
-  private static final DateTimeFormatter INSTANCE_TIME_FORMAT =
-      DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS");
-  private static final DateTimeFormatter DATABASE_TIME_FORMAT =
-      DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss_SSSSSS");
+  private static final String INSTANCE_TIME_FORMAT = "yyyyMMdd-HHmmss";
+  private static final String DATABASE_TIME_FORMAT = "yyyyMMdd_HHmmss";
 
   private SpannerResourceManagerUtils() {}
 
@@ -50,13 +49,27 @@ private SpannerResourceManagerUtils() {}
   public static String generateDatabaseId(String baseString) {
     checkArgument(baseString.length() != 0, "baseString cannot be empty!");
 
+    // Take substring of baseString to account for random suffix
+    // TODO(polber) - remove with Beam 2.57.0
+    int randomSuffixLength = 6;
+    baseString =
+        baseString.substring(
+            0,
+            Math.min(
+                baseString.length(),
+                MAX_DATABASE_ID_LENGTH
+                    - REPLACE_DATABASE_CHAR.length()
+                    - DATABASE_TIME_FORMAT.length()
+                    - REPLACE_DATABASE_CHAR.length()
+                    - randomSuffixLength));
+
     String databaseId =
         generateResourceId(
             baseString,
             ILLEGAL_DATABASE_CHARS,
             REPLACE_DATABASE_CHAR,
             MAX_DATABASE_ID_LENGTH,
-            DATABASE_TIME_FORMAT);
+            DateTimeFormatter.ofPattern(DATABASE_TIME_FORMAT));
 
     // replace hyphen with underscore, so there's no need for backticks
     String trimmed = CharMatcher.is('_').trimTrailingFrom(databaseId);
@@ -71,6 +84,14 @@ public static String generateDatabaseId(String baseString) {
     if (!Character.isLetter(trimmed.charAt(0))) {
       trimmed = padding + trimmed.substring(1);
     }
+
+    // Add random suffix to avoid collision
+    // TODO(polber) - remove with Beam 2.57.0
+    trimmed =
+        trimmed
+            + REPLACE_DATABASE_CHAR
+            + RandomStringUtils.randomAlphanumeric(randomSuffixLength).toLowerCase();
+
     return trimmed;
   }
 
@@ -81,13 +102,28 @@ public static String generateDatabaseId(String baseString) {
    * @return The instance id string.
    */
   public static String generateInstanceId(String baseString) {
+
+    // Take substring of baseString to account for random suffix
+    // TODO(polber) - remove with Beam 2.57.0
+    int randomSuffixLength = 6;
+    baseString =
+        baseString.substring(
+            0,
+            Math.min(
+                baseString.length(),
+                MAX_INSTANCE_ID_LENGTH
+                    - REPLACE_INSTANCE_CHAR.length()
+                    - INSTANCE_TIME_FORMAT.length()
+                    - REPLACE_INSTANCE_CHAR.length()
+                    - randomSuffixLength));
+
     String instanceId =
         generateResourceId(
             baseString,
             ILLEGAL_INSTANCE_CHARS,
             REPLACE_INSTANCE_CHAR,
             MAX_INSTANCE_ID_LENGTH,
-            INSTANCE_TIME_FORMAT);
+            DateTimeFormatter.ofPattern(INSTANCE_TIME_FORMAT));
 
     // if first char is not a letter, replace with letter, so it doesn't
     // violate spanner's instance naming rules
@@ -96,6 +132,11 @@ public static String generateInstanceId(String baseString) {
       instanceId = padding + instanceId.substring(1);
     }
 
+    // Add random suffix to avoid collision
+    // TODO(polber) - remove with Beam 2.57.0
+    instanceId =
+        instanceId + REPLACE_INSTANCE_CHAR + RandomStringUtils.randomAlphanumeric(6).toLowerCase();
+
     return instanceId;
   }
 }
diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java
index e9cd252387..039c63b33f 100644
--- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java
+++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java
@@ -87,7 +87,8 @@ public void testGetProjectIdReturnsCorrectValue() {
   public void testGetDatasetIdReturnsCorrectValue() {
     BigQueryResourceManager tm = BigQueryResourceManager.builder(TEST_ID, PROJECT_ID, null).build();
 
-    assertThat(tm.getDatasetId()).matches(TEST_ID.replace('-', '_') + "_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(tm.getDatasetId())
+        .matches(TEST_ID.replace('-', '_') + "_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java
index 74b25e84c6..7842b99d29 100644
--- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java
+++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java
@@ -95,7 +95,7 @@ public void testCreateResourceManagerCreatesCorrectIdValues() throws IOException
             BigtableResourceManager.builder(TEST_ID, PROJECT_ID, null),
             bigtableResourceManagerClientFactory);
 
-    assertThat(rm.getInstanceId()).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}");
+    assertThat(rm.getInstanceId()).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
     assertThat(rm.getProjectId()).matches(PROJECT_ID);
   }
 
diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java
index 9634dc2d04..d0e8c32917 100644
--- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java
+++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java
@@ -41,7 +41,7 @@ public void testGenerateDefaultClustersShouldWorkWhenAllParametersValid() {
         generateDefaultClusters(TEST_ID, ZONE, NUM_NODES, STORAGE_TYPE);
     BigtableResourceManagerCluster thisCluster = cluster.iterator().next();
 
-    assertThat(thisCluster.clusterId()).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}");
+    assertThat(thisCluster.clusterId()).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
     assertThat(thisCluster.zone()).isEqualTo(ZONE);
     assertThat(thisCluster.numNodes()).isEqualTo(NUM_NODES);
     assertThat(thisCluster.storageType()).isEqualTo(STORAGE_TYPE);
@@ -58,7 +58,8 @@ public void testGenerateDefaultClustersShouldThrowErrorWhenTestIdIsEmpty() {
   public void testGenerateDefaultClustersShouldShortenTestIdWhenTooLong() {
     Iterable<BigtableResourceManagerCluster> cluster =
         generateDefaultClusters("longer-id", ZONE, NUM_NODES, STORAGE_TYPE);
-    assertThat(cluster.iterator().next().clusterId()).matches("longer--\\d{8}-\\d{6}-\\d{6}");
+    assertThat(cluster.iterator().next().clusterId())
+        .matches("longer--\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
   }
 
   @Test
diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java
index b361f13cfd..7f79db449a 100644
--- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java
+++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java
@@ -185,9 +185,9 @@ public void testExecuteDdlStatementShouldWorkWhenSpannerDoesntThrowAnyError()
     String actualDatabaseId = databaseIdCaptor.getValue();
     Iterable<String> actualStatement = statementCaptor.getValue();
 
-    assertThat(actualInstanceId).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}");
+    assertThat(actualInstanceId).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
 
-    assertThat(actualDatabaseId).matches(TEST_ID + "_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actualDatabaseId).matches(TEST_ID + "_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
     assertThat(actualStatement).containsExactlyElementsIn(ImmutableList.of(statement));
   }
 
diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java
index cd1a418a18..4381befc3a 100644
--- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java
+++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java
@@ -18,6 +18,8 @@
 package org.apache.beam.it.gcp.spanner.utils;
 
 import static com.google.common.truth.Truth.assertThat;
+import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.MAX_DATABASE_ID_LENGTH;
+import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.MAX_INSTANCE_ID_LENGTH;
 import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.generateDatabaseId;
 import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.generateInstanceId;
 import static org.junit.Assert.assertThrows;
@@ -36,7 +38,17 @@ public void testGenerateInstanceIdShouldReplaceNonLetterFirstCharWithLetter() {
 
     String actual = generateInstanceId(testBaseString);
 
-    assertThat(actual).matches("[a-z]-test-\\d{8}-\\d{6}-\\d{6}");
+    assertThat(actual).matches("[a-z]-test-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
+  }
+
+  @Test
+  public void testGenerateInstanceIdShouldConcatLongId() {
+    String testBaseString = "test-really_long-database-id";
+
+    String actual = generateInstanceId(testBaseString);
+
+    assertThat(actual).matches("test-re-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
+    assertThat(actual.length()).isEqualTo(MAX_INSTANCE_ID_LENGTH);
   }
 
   @Test
@@ -45,7 +57,7 @@ public void testGenerateDatabaseIdShouldNotReplaceDigitLastCharWithLetter() {
 
     String actual = generateDatabaseId(testBaseString);
 
-    assertThat(actual).matches("db_0_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actual).matches("db_0_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
@@ -54,7 +66,7 @@ public void testGenerateDatabaseIdShouldReplaceDollarSignWithUnderscore() {
 
     String actual = generateDatabaseId(testBaseString);
 
-    assertThat(actual).matches("t_db_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actual).matches("t_db_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
@@ -63,7 +75,7 @@ public void testGenerateDatabaseIdShouldReplaceDotWithUnderscore() {
 
     String actual = generateDatabaseId(testBaseString);
 
-    assertThat(actual).matches("test_da_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actual).matches("test_da_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
@@ -72,7 +84,7 @@ public void testGenerateDatabaseIdShouldReplaceHyphenWithUnderscore() {
 
     String actual = generateDatabaseId(testBaseString);
 
-    assertThat(actual).matches("test_da_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actual).matches("test_da_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
@@ -81,7 +93,7 @@ public void testGenerateDatabaseIdShouldReplaceNonLetterFirstCharWithLetter() {
 
     String actual = generateDatabaseId(testBaseString);
 
-    assertThat(actual).matches("[a-z]_datab_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actual).matches("[a-z]_datab_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
@@ -90,7 +102,7 @@ public void testGenerateDatabaseIdShouldReplaceUpperCaseLettersWithLowerCase() {
 
     String actual = generateDatabaseId(testBaseString);
 
-    assertThat(actual).matches("tda_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actual).matches("tda_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
@@ -99,7 +111,7 @@ public void testGenerateDatabaseIdShouldTrimTrailingUnderscore() {
 
     String actual = generateDatabaseId(testBaseString);
 
-    assertThat(actual).matches("test_da_\\d{8}_\\d{6}_\\d{6}");
+    assertThat(actual).matches("test_da_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
   }
 
   @Test
@@ -108,4 +120,14 @@ public void testGenerateDatabaseIdShouldThrowErrorWithEmptyInput() {
 
     assertThrows(IllegalArgumentException.class, () -> generateDatabaseId(testBaseString));
   }
+
+  @Test
+  public void testGenerateDatabaseIdShouldConcatLongId() {
+    String testBaseString = "test_really_long_database_id";
+
+    String actual = generateDatabaseId(testBaseString);
+
+    assertThat(actual).matches("test_re_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
+    assertThat(actual.length()).isEqualTo(MAX_DATABASE_ID_LENGTH);
+  }
 }
diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java
index 2487650331..6bbb8d58f5 100644
--- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java
+++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java
@@ -26,7 +26,6 @@
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.logging.Logger;
@@ -67,79 +66,4 @@ public static void generateDockerfile(
       throw e;
     }
   }
-
-  public static void generateXlangDockerfile(
-      String basePythonContainerImage,
-      String containerName,
-      File targetDirectory,
-      File artifactFile,
-      String commandSpec,
-      String beamVersion)
-      throws IOException, TemplateException {
-    Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_32);
-    freemarkerConfig.setDefaultEncoding("UTF-8");
-    freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);
-    freemarkerConfig.setLogTemplateExceptions(true);
-    freemarkerConfig.setClassForTemplateLoading(PythonDockerfileGenerator.class, "/");
-
-    String classesDirectory = targetDirectory.getPath() + "/classes";
-    Map<String, Object> parameters = new HashMap<>();
-    parameters.put("baseContainerImage", basePythonContainerImage);
-    parameters.put("commandSpec", commandSpec);
-    parameters.put("containerName", containerName);
-    parameters.put("beamVersion", beamVersion);
-
-    Template template = freemarkerConfig.getTemplate("Dockerfile-xlang-template");
-
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    OutputStreamWriter writer = new OutputStreamWriter(baos);
-
-    try {
-      template.process(parameters, writer);
-      writer.flush();
-
-      Files.createDirectories(Path.of(classesDirectory + "/" + containerName));
-
-      Files.write(
-          Path.of(classesDirectory + "/" + containerName + "/Dockerfile"),
-          baos.toString(StandardCharsets.UTF_8).getBytes());
-
-    } catch (Exception e) {
-      LOG.warning("Unable to generate Dockerfile for " + containerName);
-      throw e;
-    }
-
-    try {
-      Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/classpath"));
-      Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/libs"));
-
-      String artifactPath = artifactFile.getPath();
-      String targetArtifactPath =
-          artifactPath.substring(artifactPath.lastIndexOf("/"), artifactPath.length());
-
-      Files.copy(
-          Path.of(targetDirectory.getPath() + targetArtifactPath),
-          Path.of(classesDirectory + "/" + containerName + "/classpath" + targetArtifactPath));
-      String sourceLibsDirectory = targetDirectory.getPath() + "/extra_libs";
-      String destLibsDirectory = classesDirectory + "/" + containerName + "/libs/";
-      Files.walk(Paths.get(sourceLibsDirectory))
-          .forEach(
-              source -> {
-                LOG.warning("current source: " + source.toString());
-                LOG.warning("current source libs directory: " + sourceLibsDirectory);
-                Path dest =
-                    Paths.get(
-                        destLibsDirectory,
-                        source.toString().substring(sourceLibsDirectory.length()));
-                try {
-                  Files.copy(source, dest);
-                } catch (IOException e) {
-                  LOG.warning("Unable to copy contents of " + sourceLibsDirectory);
-                }
-              });
-    } catch (Exception e) {
-      LOG.warning("unable to copy jar files");
-      throw e;
-    }
-  }
 }
diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java
new file mode 100644
index 0000000000..a8738a0732
--- /dev/null
+++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.plugin;
+
+import freemarker.template.Configuration;
+import freemarker.template.Template;
+import freemarker.template.TemplateException;
+import freemarker.template.TemplateExceptionHandler;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.logging.Logger;
+
+/** Utility class that generates a simple Dockerfile for Python templates. */
+public class XlangDockerfileGenerator {
+  private static final Logger LOG = Logger.getLogger(XlangDockerfileGenerator.class.getName());
+
+  private XlangDockerfileGenerator() {}
+
+  public static void generateDockerfile(
+      String baseJavaContainerImage,
+      String beamVersion,
+      String pythonVersion,
+      String containerName,
+      File targetDirectory,
+      File artifactFile,
+      String commandSpec)
+      throws IOException, TemplateException {
+    Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_32);
+    freemarkerConfig.setDefaultEncoding("UTF-8");
+    freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);
+    freemarkerConfig.setLogTemplateExceptions(true);
+    freemarkerConfig.setClassForTemplateLoading(PythonDockerfileGenerator.class, "/");
+
+    String classesDirectory = targetDirectory.getPath() + "/classes";
+    Map<String, Object> parameters = new HashMap<>();
+    parameters.put("baseJavaContainerImage", baseJavaContainerImage);
+    parameters.put("beamVersion", beamVersion);
+    parameters.put("pythonVersion", pythonVersion);
+    parameters.put("commandSpec", commandSpec);
+    parameters.put("containerName", containerName);
+
+    Template template = freemarkerConfig.getTemplate("Dockerfile-template-xlang");
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    OutputStreamWriter writer = new OutputStreamWriter(baos);
+
+    try {
+      template.process(parameters, writer);
+      writer.flush();
+
+      Files.createDirectories(Path.of(classesDirectory + "/" + containerName));
+
+      Files.write(
+          Path.of(classesDirectory + "/" + containerName + "/Dockerfile"),
+          baos.toString(StandardCharsets.UTF_8).getBytes());
+
+    } catch (Exception e) {
+      LOG.warning("Unable to generate Dockerfile for " + containerName);
+      throw e;
+    }
+
+    try {
+      Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/classpath"));
+      Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/libs"));
+
+      String artifactPath = artifactFile.getPath();
+      String targetArtifactPath =
+          artifactPath.substring(artifactPath.lastIndexOf("/"), artifactPath.length());
+
+      Files.copy(
+          Path.of(targetDirectory.getPath() + targetArtifactPath),
+          Path.of(classesDirectory + "/" + containerName + "/classpath" + targetArtifactPath));
+      String sourceLibsDirectory = targetDirectory.getPath() + "/extra_libs";
+      String destLibsDirectory = classesDirectory + "/" + containerName + "/libs/";
+      Files.walk(Paths.get(sourceLibsDirectory))
+          .forEach(
+              source -> {
+                LOG.warning("current source: " + source.toString());
+                LOG.warning("current source libs directory: " + sourceLibsDirectory);
+                Path dest =
+                    Paths.get(
+                        destLibsDirectory,
+                        source.toString().substring(sourceLibsDirectory.length()));
+                try {
+                  Files.copy(source, dest);
+                } catch (IOException e) {
+                  LOG.warning("Unable to copy contents of " + sourceLibsDirectory);
+                }
+              });
+    } catch (Exception e) {
+      LOG.warning("unable to copy jar files");
+      throw e;
+    }
+  }
+}
diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-template-xlang b/plugins/core-plugin/src/main/resources/Dockerfile-template-xlang
new file mode 100644
index 0000000000..1bee40db15
--- /dev/null
+++ b/plugins/core-plugin/src/main/resources/Dockerfile-template-xlang
@@ -0,0 +1,72 @@
+#===================================================================#
+# Create build environment from base Python template launcher image #
+#===================================================================#
+FROM gcr.io/dataflow-templates-base/python311-template-launcher-base:latest as python-base
+
+# Build args
+ARG WORKDIR=/template
+ARG REQUIREMENTS_FILE=requirements.txt
+ARG BEAM_VERSION=${beamVersion}
+ARG BEAM_PACKAGE=apache-beam[dataframe,gcp,test,yaml]==$BEAM_VERSION
+ARG PY_VERSION=${pythonVersion}
+
+# Copy template files to /template
+RUN mkdir -p $WORKDIR
+COPY ${containerName}-generated-metadata.json $REQUIREMENTS_FILE* /template/
+COPY ${containerName}/ /template/${containerName}/
+WORKDIR $WORKDIR
+
+# Create requirements.txt file if not provided
+RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi
+
+# Install dependencies to launch the pipeline and download to reduce startup time
+RUN python -m venv /venv \
+    && /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \
+    && /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \
+    && /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
+    && rm -rf /usr/local/lib/python$PY_VERSION/site-packages \
+    && cp -r /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/
+    # change to mv once Beam 2.57.0 is released
+
+
+#============================================================#
+# Create Distroless xlang image compatible with YamlTemplate #
+#============================================================#
+FROM ${baseJavaContainerImage}
+
+# Build args
+ARG CHIPSET_ARCH=x86_64-linux-gnu
+ARG PY_VERSION=${pythonVersion}
+
+# Set environment variables
+ENV DATAFLOW_JAVA_COMMAND_SPEC=${commandSpec}
+ENV PIP_NO_DEPS=True
+
+# Copy template, python wheels and python launcher script from python-base
+COPY --from=python-base /template /template
+COPY --from=python-base /tmp/dataflow-requirements-cache /tmp/dataflow-requirements-cache
+COPY --from=python-base /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher
+
+# Copy python and installed packages from python-base
+COPY --from=python-base /usr/local/bin/python$PY_VERSION /usr/local/bin/python
+COPY --from=python-base /usr/local/lib/python$PY_VERSION /usr/local/lib/python$PY_VERSION
+
+# Workaround until Beam 2.57.0 is released
+COPY --from=python-base /venv /root/.apache_beam/cache/venvs/py-$PY_VERSION-beam-${beamVersion}-da39a3ee5e6b4b0d3255bfef95601890afd80709
+
+# Copy required shared libraries from python-base
+COPY --from=python-base /lib/$CHIPSET_ARCH/ld-*so* /lib64/
+COPY --from=python-base /lib/$CHIPSET_ARCH/lib*so* /lib/$CHIPSET_ARCH/
+COPY --from=python-base /usr/lib/$CHIPSET_ARCH/libffi* /usr/lib/$CHIPSET_ARCH/
+COPY --from=python-base /usr/local/lib/libpython$PY_VERSION* /usr/local/lib/
+
+# Copy minimal commands from python-base needed to execute template
+COPY --from=python-base /bin/dash /bin/sh
+COPY --from=python-base /usr/bin/which.debianutils /usr/bin/which
+
+# Copy licenses
+COPY --from=python-base /usr/licenses/ /usr/licenses/
+
+WORKDIR /template
+
+ENTRYPOINT ["/opt/google/dataflow/java_template_launcher"]
\ No newline at end of file
diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-xlang-template b/plugins/core-plugin/src/main/resources/Dockerfile-xlang-template
deleted file mode 100644
index d5ceefb049..0000000000
--- a/plugins/core-plugin/src/main/resources/Dockerfile-xlang-template
+++ /dev/null
@@ -1,51 +0,0 @@
-#========================================================================#
-# Create build environment from base Python 3.11 template launcher image #
-#========================================================================#
-FROM gcr.io/dataflow-templates-base/java11-template-launcher-base-distroless as java-base
-
-
-#============================================================#
-# Create Distroless xlang image compatible with YamlTemplate #
-#============================================================#
-FROM gcr.io/dataflow-templates-base/python311-template-launcher-base:latest
-
-
-ARG REQUIREMENTS_FILE=requirements.txt
-
-COPY --from=busybox:1.35.0-uclibc /bin/sh /bin/sh
-# Copy template files to /template
-ARG WORKDIR=/template
-RUN mkdir -p $WORKDIR
-COPY $REQUIREMENTS_FILE /template
-COPY ${containerName}/ /template/${containerName}/
-COPY ${containerName}-generated-metadata.json /template
-
-WORKDIR $WORKDIR
-
-# Set environment variables
-ENV DATAFLOW_JAVA_COMMAND_SPEC=${commandSpec}
-ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt
-
-# SHA hash here equates to null and is independent of beam version. This hash is generated based on the values of the
-# .withExtraPackages() call made in the PythonExternalTextTransform class. We do not utilize this function meaning that
-# this hash will remain constant.
-ENV SITE_PACKAGES=/root/.apache_beam/cache/venvs/py-3.11-beam-${beamVersion}-da39a3ee5e6b4b0d3255bfef95601890afd80709
-RUN python -m venv $SITE_PACKAGES
-
-# pip install dependencies and cache wheels
-RUN source $SITE_PACKAGES/bin/activate \
-    && pip install --no-cache-dir --upgrade pip \
-    && pip install --no-cache-dir --upgrade setuptools \
-    && pip install --no-cache-dir pyparsing==2.4.2 \
-    && pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \
-    && pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE
-
-COPY --from=java-base /usr/local/jdk-11.0.20+8 /usr/local/jdk-11.0.20+8
-RUN ln -s /usr/local/jdk-11.0.20+8/bin/java /usr/local/bin/java
-COPY --from=java-base /opt/google/dataflow/java_template_launcher /opt/google/dataflow/java_template_launcher
-COPY --from=java-base /usr/licenses/LICENSE-java11 /usr/licenses/LICENSE-java11
-
-WORKDIR /template
-
-ENV PIP_NO_DEPS=True
-ENTRYPOINT ["/opt/google/dataflow/java_template_launcher"]
\ No newline at end of file
diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java
new file mode 100644
index 0000000000..f5598f7e91
--- /dev/null
+++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2023 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.plugin;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.common.base.Charsets;
+import com.google.common.io.Files;
+import freemarker.template.TemplateException;
+import java.io.File;
+import java.io.IOException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for class {@link XlangDockerfileGenerator}. */
+@RunWith(JUnit4.class)
+public class XlangDockerfileGeneratorTest {
+  private final File outputFolder = Files.createTempDir().getAbsoluteFile();
+
+  @Test
+  public void testGenerateDockerfile() throws IOException, TemplateException {
+    new File(outputFolder.getAbsolutePath() + "/containerName").mkdirs();
+    new File(outputFolder.getAbsolutePath() + "/extra_libs/example").mkdirs();
+    File artifactPath = new File(outputFolder.getAbsolutePath() + "/artifactPath");
+    artifactPath.mkdirs();
+    XlangDockerfileGenerator.generateDockerfile(
+        "a java container image",
+        "beam_version",
+        "py_version",
+        "containerName",
+        outputFolder,
+        artifactPath,
+        "command_spec");
+    File outputFile =
+        new File(outputFolder.getAbsolutePath() + "/classes/containerName/Dockerfile");
+
+    assertTrue(outputFile.exists());
+    String fileContents = Files.toString(outputFile, Charsets.UTF_8);
+    assertThat(fileContents).contains("FROM a java container image");
+    assertThat(fileContents).contains("=beam_version");
+    assertThat(fileContents).contains("=py_version");
+  }
+}
diff --git a/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java b/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java
index 2fc1244ead..53d1397e41 100644
--- a/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java
+++ b/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java
@@ -30,6 +30,7 @@
 import com.google.cloud.teleport.plugin.TemplateDefinitionsParser;
 import com.google.cloud.teleport.plugin.TemplatePluginUtils;
 import com.google.cloud.teleport.plugin.TemplateSpecsGenerator;
+import com.google.cloud.teleport.plugin.XlangDockerfileGenerator;
 import com.google.cloud.teleport.plugin.YamlDockerfileGenerator;
 import com.google.cloud.teleport.plugin.model.ImageSpec;
 import com.google.cloud.teleport.plugin.model.TemplateDefinitions;
@@ -483,15 +484,17 @@ private void stageFlexJavaTemplate(
       String dockerfileContainer = outputClassesDirectory.getPath() + "/" + containerName;
       String dockerfilePath = dockerfileContainer + "/Dockerfile";
       String xlangCommandSpec = "/template/" + containerName + "/resources/" + commandSpecFileName;
+      String beamVersion = project.getProperties().getProperty("beam.version");
       File dockerfile = new File(dockerfilePath);
       if (!dockerfile.exists()) {
-        PythonDockerfileGenerator.generateXlangDockerfile(
-            basePythonContainerImage,
+        XlangDockerfileGenerator.generateDockerfile(
+            baseContainerImage,
+            beamVersion,
+            pythonVersion,
             containerName,
             targetDirectory,
             project.getArtifact().getFile(),
-            xlangCommandSpec,
-            project.getProperties().getProperty("beam.version"));
+            xlangCommandSpec);
       }
       LOG.info("Staging XLANG image using Dockerfile");
       stageXlangUsingDockerfile(imagePath, containerName + "/Dockerfile");
diff --git a/pom.xml b/pom.xml
index 0492523cad..aa01852b89 100644
--- a/pom.xml
+++ b/pom.xml
@@ -358,6 +358,7 @@
           <excludes>
             <!-- Excluding auto-generated classes. -->
             <exclude>**/*AutoValue_*</exclude>
+            <exclude>**/*Exception.*</exclude>
           </excludes>
         </configuration>
         <executions>
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java
index b75dfa47e2..eee28497ae 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java
@@ -105,7 +105,7 @@ void setJavascriptTextTransformFunctionName(
         optional = true,
         description = "JavaScript UDF auto-reload interval (minutes)",
         helpText =
-            "Specifies how frequently to reload the UDF, in minutes. If the value is greater than 0, Dataflow periodically checks the UDF file in Cloud Storage, and reloads the UDF if the file is modified. This parameter allows you to update the UDF while the pipeline is running, without needing to restart the job. If the value is 0, UDF reloading is disabled. The default value is 0.")
+            "Define the interval that workers may check for JavaScript UDF changes to reload the files.")
     @Default.Integer(0)
     ValueProvider<Integer> getJavascriptTextTransformReloadIntervalMinutes();
 
diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java
index 21a00f7eb8..a29d67b18b 100644
--- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java
+++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java
@@ -424,12 +424,12 @@ private static void validateSourceType(Options options) {
     options.setDatastreamSourceType(sourceType);
   }
 
-  private static String getSourceType(Options options) {
+  static String getSourceType(Options options) {
     if (options.getDatastreamSourceType() != null) {
       return options.getDatastreamSourceType();
     }
     if (options.getStreamName() == null) {
-      throw new IllegalArgumentException("Stream name cannot be empty. ");
+      throw new IllegalArgumentException("Stream name cannot be empty.");
     }
     GcpOptions gcpOptions = options.as(GcpOptions.class);
     DataStreamClient datastreamClient;
diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java
index bf29ae000e..4e0e05e529 100644
--- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java
+++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java
@@ -38,12 +38,17 @@ private static String getSourceType(JsonNode changeEvent) throws InvalidChangeEv
   public static ChangeEventContext createChangeEventContext(
       JsonNode changeEvent, Ddl ddl, String shadowTablePrefix, String sourceType)
       throws ChangeEventConvertorException, InvalidChangeEventException {
-
-    if (!sourceType.equals(getSourceType(changeEvent))) {
+    String sourceTypeFromChangeEvent;
+    try {
+      sourceTypeFromChangeEvent = getSourceType(changeEvent);
+    } catch (Exception e) {
+      throw new InvalidChangeEventException(e);
+    }
+    if (!sourceType.equals(sourceTypeFromChangeEvent)) {
       throw new InvalidChangeEventException(
           "Change event with invalid source. "
               + "Actual("
-              + getSourceType(changeEvent)
+              + sourceTypeFromChangeEvent
               + "), Expected("
               + sourceType
               + ")");
diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java
index 8ceac94623..e06796b080 100644
--- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java
+++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java
@@ -199,5 +199,19 @@ List<String> getDataTablesWithNoShadowTables(Ddl ddl) {
           .filter(f -> !existingShadowTables.contains(shadowTablePrefix + f))
           .collect(Collectors.toList());
     }
+
+    /*
+      Added for the purpose of unit testing
+    */
+    public void setDialect(Dialect dialect) {
+      this.dialect = dialect;
+    }
+
+    /*
+      Added for the purpose of unit testing
+    */
+    public void setSpannerAccessor(SpannerAccessor spannerAccessor) {
+      this.spannerAccessor = spannerAccessor;
+    }
   }
 }
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java
index 6c886aadb3..d1997ba376 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java
@@ -87,7 +87,7 @@ public void setUp() throws IOException {
                 pubsubResourceManager,
                 new HashMap<>() {
                   {
-                    put("inputFileFormat", "json");
+                    put("inputFileFormat", "avro");
                   }
                 });
       }
@@ -108,7 +108,7 @@ public static void cleanUp() throws IOException {
   }
 
   @Test
-  public void migrationTestWithAllDatatypeDefaultMapping() {
+  public void migrationTestWithAllDatatypeConversionMapping() {
     // Construct a ChainedConditionCheck with 4 stages.
     // 1. Send initial wave of events
     // 2. Wait on Spanner to have events
@@ -118,8 +118,8 @@ public void migrationTestWithAllDatatypeDefaultMapping() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE1,
-                        "backfill.jsonl",
-                        "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.jsonl"),
+                        "backfill.avro",
+                        "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE1)
                         .setMinRows(2)
                         .setMaxRows(2)
@@ -142,8 +142,13 @@ public void migrationTestWithAllDatatypeDefaultMapping() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE1,
-                        "cdc1.jsonl",
-                        "DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl"),
+                        "cdc1.avro",
+                        "DataStreamToSpannerDatatypeIT/mysql-cdc1-AllDatatypeColumns.avro"),
+                    uploadDataStreamFile(
+                        jobInfo,
+                        TABLE1,
+                        "cdc2.avro",
+                        "DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE1)
                         .setMinRows(1)
                         .setMaxRows(1)
@@ -161,7 +166,7 @@ public void migrationTestWithAllDatatypeDefaultMapping() {
   }
 
   @Test
-  public void migrationTestWithAllDatatypeConversionMapping() {
+  public void migrationTestWithAllDatatypeDefaultMapping() {
     // Construct a ChainedConditionCheck with 4 stages.
     // 1. Send initial wave of events
     // 2. Wait on Spanner to have events
@@ -171,8 +176,8 @@ public void migrationTestWithAllDatatypeConversionMapping() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE2,
-                        "backfill.jsonl",
-                        "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl"),
+                        "backfill.avro",
+                        "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE2)
                         .setMinRows(2)
                         .setMaxRows(2)
@@ -195,8 +200,8 @@ public void migrationTestWithAllDatatypeConversionMapping() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE2,
-                        "cdc1.jsonl",
-                        "DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.jsonl"),
+                        "cdc1.avro",
+                        "DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE2)
                         .setMinRows(1)
                         .setMaxRows(1)
@@ -219,72 +224,83 @@ private void assertAllDatatypeColumnsTableBackfillContents() {
     Map<String, Object> row = new HashMap<>();
     row.put("varchar_column", "value1");
     row.put("tinyint_column", "10");
-    row.put("text_column", "dGV4dF9kYXRhXzEK");
-    row.put("date_column", "2024-02-08T00:00:00.000Z");
+    row.put("date_column", "2024-02-08T00:00:00Z");
     row.put("smallint_column", "50");
     row.put("mediumint_column", "1000");
     row.put("int_column", "50000");
     row.put("bigint_column", "987654321");
     row.put("float_column", "45.67");
     row.put("double_column", "123.789");
-    row.put("datetime_column", "2024-02-08T08:15:30.000Z");
-    row.put("timestamp_column", "2024-02-08T08:15:30.000Z");
+    row.put("decimal_column", "456.12");
+    row.put("datetime_column", "2024-02-08T08:15:30Z");
+    row.put("timestamp_column", "2024-02-08T08:15:30Z");
     row.put("time_column", "29730000000");
     row.put("year_column", "2022");
-    row.put("char_column", "Y2hhcjEK");
+    // text, char, tinytext, mediumtext, longtext are BYTE columns
+    row.put("text_column", "/u/9n58P");
+    row.put("char_column", "v58P");
+    row.put("tinytext_column", "7+/+7/2fnw8=");
+    row.put("mediumtext_column", "/+3v79/v2vrx");
+    row.put("longtext_column", "/+/v3+/a+vE=");
     row.put("tinyblob_column", "74696e79626c6f625f646174615f31");
-    row.put("tinytext_column", "dGlueXRleHRfZGF0YV8xCg==");
     row.put("blob_column", "626c6f625f646174615f31");
     row.put("mediumblob_column", "6d656469756d626c6f625f646174615f31");
-    row.put("mediumtext_column", "bWVkaXVtdGV4dF9kYXRhXzE=");
     row.put("longblob_column", "6c6f6e67626c6f625f646174615f31");
-    row.put("longtext_column", "bG9uZ3RleHRfZGF0YV8x");
     row.put("enum_column", "2");
     row.put("bool_column", 0);
     row.put("other_bool_column", "1");
-    row.put("binary_column", "62696e6172795f31");
+    // The result which is shown in the matcher does not contain the full 40 characters
+    // of the binary and the ending characters seem to be getting truncated.
+    // Have manually verified that the values in spanner and source are identical for all the
+    // 40 characters.
+    // TODO: This is likely an issue with the matcher, figure out why this is happening.
+    row.put("binary_column", "62696e6172795f3100000000000000000...");
     row.put("varbinary_column", "76617262696e6172795f646174615f31");
     row.put("bit_column", "102");
     events.add(row);
 
+    row.clear();
     row.put("varchar_column", "value2");
     row.put("tinyint_column", "5");
-    row.put("text_column", "dGV4dF9kYXRhXzIK");
-    row.put("date_column", "2024-02-09T00:00:00.000Z");
+    row.put("date_column", "2024-02-09T00:00:00Z");
     row.put("smallint_column", "25");
     row.put("mediumint_column", "500");
     row.put("int_column", "25000");
     row.put("bigint_column", "987654");
     row.put("float_column", "12.34");
     row.put("double_column", "56.789");
-    row.put("datetime_column", "2024-02-09T15:30:45.000Z");
-    row.put("timestamp_column", "2024-02-09T15:30:45.000Z");
+    row.put("decimal_column", 123.45);
+    row.put("datetime_column", "2024-02-09T15:30:45Z");
+    row.put("timestamp_column", "2024-02-09T15:30:45Z");
     row.put("time_column", "55845000000");
     row.put("year_column", "2023");
-    row.put("char_column", "Y2hhcjIK");
+    // text, char, tinytext, mediumtext, longtext are BYTE columns
+    row.put("text_column", "/u/9n58f");
+    row.put("char_column", "v58f");
+    row.put("tinytext_column", "7+/+7/2fnx8=");
+    row.put("mediumtext_column", "/+3v79/v2vry");
+    row.put("longtext_column", "/+/v3+/a+vI=");
     row.put("tinyblob_column", "74696e79626c6f625f646174615f32");
-    row.put("tinytext_column", "dGlueXRleHRfZGF0YV8yCg==");
     row.put("blob_column", "626c6f625f646174615f32");
     row.put("mediumblob_column", "6d656469756d626c6f625f646174615f32");
-    row.put("mediumtext_column", "bWVkaXVtdGV4dF9kYXRhXzI=");
     row.put("longblob_column", "6c6f6e67626c6f625f646174615f32");
-    row.put("longtext_column", "bG9uZ3RleHRfZGF0YV8y");
     row.put("enum_column", "3");
     row.put("bool_column", 1);
     row.put("other_bool_column", "0");
-    row.put("binary_column", "62696e6172795f32");
+    row.put("binary_column", "62696e6172795f3200000000000000000...");
     row.put("varbinary_column", "76617262696e6172795f646174615f32");
     row.put("bit_column", "25");
     events.add(row);
 
     SpannerAsserts.assertThatStructs(
             spannerResourceManager.runQuery(
-                "select varchar_column, tinyint_column, text_column, date_column"
+                "select varchar_column, tinyint_column, date_column"
                     + ", smallint_column, mediumint_column, int_column, bigint_column, float_column"
-                    + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column"
-                    + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, "
-                    + " longblob_column, longtext_column, enum_column, bool_column, other_bool_column, binary_column"
-                    + ", varbinary_column, bit_column from AllDatatypeColumns"))
+                    + ", double_column, datetime_column, timestamp_column, time_column, year_column"
+                    + ", tinyblob_column, blob_column, mediumblob_column"
+                    + ", longblob_column, enum_column, bool_column, other_bool_column"
+                    + ", varbinary_column, bit_column, decimal_column, text_column, binary_column"
+                    + ", char_column, tinytext_column, mediumtext_column, longtext_column from AllDatatypeColumns"))
         .hasRecordsUnorderedCaseInsensitiveColumns(events);
   }
 
@@ -294,42 +310,45 @@ private void assertAllDatatypeColumnsTableCdcContents() {
     Map<String, Object> row = new HashMap<>();
     row.put("varchar_column", "value1");
     row.put("tinyint_column", "15");
-    row.put("text_column", "dGV4dF9kYXRhXzEK");
-    row.put("date_column", "2024-02-08T00:00:00.000Z");
+    row.put("date_column", "2024-02-08T00:00:00Z");
     row.put("smallint_column", "50");
     row.put("mediumint_column", "1000");
     row.put("int_column", "50000");
     row.put("bigint_column", "987654321");
     row.put("float_column", "45.67");
     row.put("double_column", "123.789");
-    row.put("datetime_column", "2024-02-08T08:15:30.000Z");
-    row.put("timestamp_column", "2024-02-08T08:15:30.000Z");
+    row.put("decimal_column", "456.12");
+    row.put("datetime_column", "2024-02-08T08:15:30Z");
+    row.put("timestamp_column", "2024-02-08T08:15:30Z");
     row.put("time_column", "29730000000");
     row.put("year_column", "2022");
-    row.put("char_column", "Y2hhcjEK");
+    // text, char, tinytext, mediumtext, longtext are BYTE columns
+    row.put("text_column", "/u/9n58P");
+    row.put("char_column", "v58P");
+    row.put("tinytext_column", "7+/+7/2fnw8=");
+    row.put("mediumtext_column", "/+3v79/v2vrx");
+    row.put("longtext_column", "/+/v3+/a+vE=");
     row.put("tinyblob_column", "74696e79626c6f625f646174615f31");
-    row.put("tinytext_column", "dGlueXRleHRfZGF0YV8xCg==");
     row.put("blob_column", "626c6f625f646174615f31");
     row.put("mediumblob_column", "6d656469756d626c6f625f646174615f31");
-    row.put("mediumtext_column", "bWVkaXVtdGV4dF9kYXRhXzE=");
     row.put("longblob_column", "6c6f6e67626c6f625f646174615f31");
-    row.put("longtext_column", "bG9uZ3RleHRfZGF0YV8x");
     row.put("enum_column", "2");
     row.put("bool_column", 0);
     row.put("other_bool_column", "1");
-    row.put("binary_column", "62696e6172795f31");
+    row.put("binary_column", "62696e6172795f3100000000000000000...");
     row.put("varbinary_column", "76617262696e6172795f646174615f31");
     row.put("bit_column", "102");
     events.add(row);
 
     SpannerAsserts.assertThatStructs(
             spannerResourceManager.runQuery(
-                "select varchar_column, tinyint_column, text_column, date_column"
+                "select varchar_column, tinyint_column, date_column"
                     + ", smallint_column, mediumint_column, int_column, bigint_column, float_column"
-                    + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column"
-                    + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, "
-                    + " longblob_column, longtext_column, enum_column, bool_column, other_bool_column, binary_column"
-                    + ", varbinary_column, bit_column from AllDatatypeColumns"))
+                    + ", double_column, datetime_column, timestamp_column, time_column, year_column"
+                    + ", tinyblob_column, blob_column, mediumblob_column"
+                    + ", longblob_column, enum_column, bool_column, other_bool_column"
+                    + ", varbinary_column, bit_column, decimal_column, text_column, binary_column"
+                    + ", char_column, tinytext_column, mediumtext_column, longtext_column from AllDatatypeColumns"))
         .hasRecordsUnorderedCaseInsensitiveColumns(events);
   }
 
@@ -347,6 +366,7 @@ private void assertAllDatatypeColumns2TableBackfillContents() {
     row.put("bigint_column", 987654321);
     row.put("float_column", 45.67);
     row.put("double_column", 123.789);
+    row.put("decimal_column", 456.12);
     row.put("datetime_column", "2024-02-08T08:15:30Z");
     row.put("timestamp_column", "2024-02-08T08:15:30Z");
     row.put("time_column", "29730000000");
@@ -368,6 +388,7 @@ private void assertAllDatatypeColumns2TableBackfillContents() {
     row.put("bit_column", "AQI=");
     events.add(row);
 
+    row.clear();
     row.put("varchar_column", "value2");
     row.put("tinyint_column", 5);
     row.put("text_column", "text2");
@@ -378,6 +399,7 @@ private void assertAllDatatypeColumns2TableBackfillContents() {
     row.put("bigint_column", 987654);
     row.put("float_column", 12.34);
     row.put("double_column", 56.789);
+    row.put("decimal_column", 123.45);
     row.put("datetime_column", "2024-02-09T15:30:45Z");
     row.put("timestamp_column", "2024-02-09T15:30:45Z");
     row.put("time_column", "55845000000");
@@ -402,9 +424,9 @@ private void assertAllDatatypeColumns2TableBackfillContents() {
                 "select varchar_column, tinyint_column, text_column, date_column"
                     + ", smallint_column, mediumint_column, int_column, bigint_column, float_column"
                     + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column"
-                    + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, "
-                    + " longblob_column, longtext_column, enum_column, bool_column, binary_column"
-                    + ", varbinary_column, bit_column from AllDatatypeColumns2"))
+                    + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column"
+                    + ", longblob_column, longtext_column, enum_column, bool_column, binary_column"
+                    + ", varbinary_column, bit_column, decimal_column from AllDatatypeColumns2"))
         .hasRecordsUnorderedCaseInsensitiveColumns(events);
   }
 
@@ -421,6 +443,7 @@ private void assertAllDatatypeColumns2TableCdcContents() {
     row.put("bigint_column", 987654321);
     row.put("float_column", 45.67);
     row.put("double_column", 123.789);
+    row.put("decimal_column", 456.12);
     row.put("datetime_column", "2024-02-08T08:15:30Z");
     row.put("timestamp_column", "2024-02-08T08:15:30Z");
     row.put("time_column", "29730000000");
@@ -447,9 +470,9 @@ private void assertAllDatatypeColumns2TableCdcContents() {
                 "select varchar_column, tinyint_column, text_column, date_column"
                     + ", smallint_column, mediumint_column, int_column, bigint_column, float_column"
                     + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column"
-                    + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, "
-                    + " longblob_column, longtext_column, enum_column, bool_column, binary_column"
-                    + ", varbinary_column, bit_column from AllDatatypeColumns2"))
+                    + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column"
+                    + ", longblob_column, longtext_column, enum_column, bool_column, binary_column"
+                    + ", varbinary_column, bit_column, decimal_column from AllDatatypeColumns2"))
         .hasRecordsUnorderedCaseInsensitiveColumns(events);
   }
 }
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java
similarity index 85%
rename from v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationIT.java
rename to v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java
index 5ef80ea572..fcbdfce2d1 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationIT.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java
@@ -44,30 +44,36 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** Sharded data migration Integration test for {@link DataStreamToSpanner} Flex template. */
+/**
+ * Sharded data migration Integration test with addition of migration_shard_id column in the schema
+ * for each table in the {@link DataStreamToSpanner} Flex template.
+ */
 @Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class})
 @TemplateIntegrationTest(DataStreamToSpanner.class)
 @RunWith(JUnit4.class)
-public class DataStreamToSpannerShardedMigrationIT extends DataStreamToSpannerITBase {
+public class DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT
+    extends DataStreamToSpannerITBase {
 
   private static final Logger LOG =
-      LoggerFactory.getLogger(DataStreamToSpannerShardedMigrationIT.class);
+      LoggerFactory.getLogger(
+          DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.class);
 
   private static final String TABLE = "Users";
   private static final String MOVIE_TABLE = "Movie";
 
   private static final String SESSION_FILE_RESOURCE =
-      "DataStreamToSpannerShardedMigrationIT/mysql-session.json";
+      "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-session.json";
 
   private static final String TRANSFORMATION_CONTEXT_RESOURCE_SHARD1 =
-      "DataStreamToSpannerShardedMigrationIT/transformation-context-shard1.json";
+      "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard1.json";
   private static final String TRANSFORMATION_CONTEXT_RESOURCE_SHARD2 =
-      "DataStreamToSpannerShardedMigrationIT/transformation-context-shard2.json";
+      "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard2.json";
 
   private static final String SPANNER_DDL_RESOURCE =
-      "DataStreamToSpannerShardedMigrationIT/spanner-schema.sql";
+      "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/spanner-schema.sql";
 
-  private static HashSet<DataStreamToSpannerShardedMigrationIT> testInstances = new HashSet<>();
+  private static HashSet<DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT>
+      testInstances = new HashSet<>();
   private static PipelineLauncher.LaunchInfo jobInfo1;
   private static PipelineLauncher.LaunchInfo jobInfo2;
 
@@ -83,7 +89,7 @@ public class DataStreamToSpannerShardedMigrationIT extends DataStreamToSpannerIT
   public void setUp() throws IOException {
     // Prevent cleaning up of dataflow job after a test method is executed.
     skipBaseCleanup = true;
-    synchronized (DataStreamToSpannerShardedMigrationIT.class) {
+    synchronized (DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.class) {
       testInstances.add(this);
       if (spannerResourceManager == null) {
         spannerResourceManager = setUpSpannerResourceManager();
@@ -132,7 +138,7 @@ public void setUp() throws IOException {
    */
   @AfterClass
   public static void cleanUp() throws IOException {
-    for (DataStreamToSpannerShardedMigrationIT instance : testInstances) {
+    for (DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT instance : testInstances) {
       instance.tearDownBase();
     }
     ResourceManagerUtils.cleanResources(spannerResourceManager, pubsubResourceManager);
@@ -151,17 +157,17 @@ public void multiShardMigration() {
                         jobInfo1,
                         TABLE,
                         "Users-backfill-logical-shard1.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard1.jsonl"),
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl"),
                     uploadDataStreamFile(
                         jobInfo1,
                         TABLE,
                         "Users-backfill-logical-shard2.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard2.jsonl"),
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl"),
                     uploadDataStreamFile(
                         jobInfo1,
                         TABLE,
                         "Users-cdc-shard1.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Users-cdc-shard1.jsonl")))
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard1.jsonl")))
             .build();
 
     // Wait for conditions
@@ -179,17 +185,17 @@ public void multiShardMigration() {
                         jobInfo2,
                         TABLE,
                         "Users-backfill-logical-shard3.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard3.jsonl"),
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl"),
                     uploadDataStreamFile(
                         jobInfo2,
                         TABLE,
                         "Users-backfill-logical-shard4.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard4.jsonl"),
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl"),
                     uploadDataStreamFile(
                         jobInfo2,
                         TABLE,
                         "Users-cdc-shard2.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Users-cdc-shard2.jsonl")))
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard2.jsonl")))
             .build();
 
     result =
@@ -224,12 +230,12 @@ public void pkReorderedMultiShardMigration() {
                         jobInfo1,
                         MOVIE_TABLE,
                         "Movie-shard1.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Movie-shard1.jsonl"),
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard1.jsonl"),
                     uploadDataStreamFile(
                         jobInfo1,
                         MOVIE_TABLE,
                         "Movie-shard2.jsonl",
-                        "DataStreamToSpannerShardedMigrationIT/Movie-shard2.jsonl")))
+                        "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard2.jsonl")))
             .build();
 
     // Wait for conditions
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java
new file mode 100644
index 0000000000..4d05fc4cdf
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
+
+import com.google.cloud.teleport.metadata.SkipDirectRunnerTest;
+import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
+import java.io.IOException;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.it.common.PipelineLauncher;
+import org.apache.beam.it.common.PipelineOperator;
+import org.apache.beam.it.common.utils.ResourceManagerUtils;
+import org.apache.beam.it.conditions.ChainedConditionCheck;
+import org.apache.beam.it.conditions.ConditionCheck;
+import org.apache.beam.it.gcp.pubsub.PubsubResourceManager;
+import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
+import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck;
+import org.apache.beam.it.gcp.spanner.matchers.SpannerAsserts;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Sharded data migration Integration test without any migration_shard_id column transformation for
+ * {@link DataStreamToSpanner} Flex template.
+ */
+@Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class})
+@TemplateIntegrationTest(DataStreamToSpanner.class)
+@RunWith(JUnit4.class)
+public class DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT
+    extends DataStreamToSpannerITBase {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(
+          DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.class);
+
+  private static final String TABLE = "Users";
+  private static final String SPANNER_DDL_RESOURCE =
+      "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql";
+
+  private static HashSet<DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT>
+      testInstances = new HashSet<>();
+  private static PipelineLauncher.LaunchInfo jobInfo1;
+  private static PipelineLauncher.LaunchInfo jobInfo2;
+
+  public static PubsubResourceManager pubsubResourceManager;
+  public static SpannerResourceManager spannerResourceManager;
+
+  /**
+   * Setup resource managers and Launch dataflow job once during the execution of this test class.
+   *
+   * @throws IOException
+   */
+  @Before
+  public void setUp() throws IOException {
+    // Prevent cleaning up of dataflow job after a test method is executed.
+    skipBaseCleanup = true;
+    synchronized (DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.class) {
+      testInstances.add(this);
+      if (spannerResourceManager == null) {
+        spannerResourceManager = setUpSpannerResourceManager();
+        createSpannerDDL(spannerResourceManager, SPANNER_DDL_RESOURCE);
+      }
+      if (pubsubResourceManager == null) {
+        pubsubResourceManager = setUpPubSubResourceManager();
+      }
+      if (jobInfo1 == null) {
+        jobInfo1 =
+            launchDataflowJob(
+                getClass().getSimpleName() + "shard1",
+                null,
+                null,
+                "shard1",
+                spannerResourceManager,
+                pubsubResourceManager,
+                new HashMap<>() {
+                  {
+                    put("inputFileFormat", "json");
+                  }
+                });
+      }
+      if (jobInfo2 == null) {
+        jobInfo2 =
+            launchDataflowJob(
+                getClass().getSimpleName() + "shard2",
+                null,
+                null,
+                "shard2",
+                spannerResourceManager,
+                pubsubResourceManager,
+                new HashMap<>() {
+                  {
+                    put("inputFileFormat", "json");
+                  }
+                });
+      }
+    }
+  }
+
+  /**
+   * Cleanup dataflow job and all the resources and resource managers.
+   *
+   * @throws IOException
+   */
+  @AfterClass
+  public static void cleanUp() throws IOException {
+    for (DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT instance :
+        testInstances) {
+      instance.tearDownBase();
+    }
+    ResourceManagerUtils.cleanResources(spannerResourceManager, pubsubResourceManager);
+  }
+
+  @Test
+  public void multiShardMigration() {
+    // Two dataflow jobs are running corresponding to two physical shards containing two logical
+    // shards each. Migrates Users table from 4 logical shards. Asserts data from all the shards are
+    // going to Spanner. Checks whether migration shard id column is populated properly based on the
+    // transformation context.
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    uploadDataStreamFile(
+                        jobInfo1,
+                        TABLE,
+                        "Users-backfill-logical-shard1.jsonl",
+                        "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl"),
+                    uploadDataStreamFile(
+                        jobInfo1,
+                        TABLE,
+                        "Users-backfill-logical-shard2.jsonl",
+                        "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl"),
+                    uploadDataStreamFile(
+                        jobInfo1,
+                        TABLE,
+                        "Users-cdc-shard1.jsonl",
+                        "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl")))
+            .build();
+
+    // Wait for conditions
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo1, Duration.ofMinutes(8)), conditionCheck);
+
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+
+    conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    uploadDataStreamFile(
+                        jobInfo2,
+                        TABLE,
+                        "Users-backfill-logical-shard3.jsonl",
+                        "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl"),
+                    uploadDataStreamFile(
+                        jobInfo2,
+                        TABLE,
+                        "Users-backfill-logical-shard4.jsonl",
+                        "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl"),
+                    uploadDataStreamFile(
+                        jobInfo2,
+                        TABLE,
+                        "Users-cdc-shard2.jsonl",
+                        "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl")))
+            .build();
+
+    result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo2, Duration.ofMinutes(8)), conditionCheck);
+    assertThatResult(result).meetsConditions();
+
+    ConditionCheck rowsConditionCheck =
+        SpannerRowsCheck.builder(spannerResourceManager, TABLE)
+            .setMinRows(12)
+            .setMaxRows(12)
+            .build();
+    result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo1, Duration.ofMinutes(10)), rowsConditionCheck);
+    assertThatResult(result).meetsConditions();
+
+    // Assert specific rows
+    assertUsersTableContents();
+  }
+
+  private void assertUsersTableContents() {
+    List<Map<String, Object>> events = new ArrayList<>();
+
+    Map<String, Object> row = new HashMap<>();
+    row.put("id", 1);
+    row.put("name", "Tester1");
+    row.put("age", 21);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 5);
+    row.put("name", "Tester5");
+    row.put("age", 23);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 6);
+    row.put("name", "Tester6");
+    row.put("age", 22);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 7);
+    row.put("name", "Tester7");
+    row.put("age", 7);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 8);
+    row.put("name", "Tester8");
+    row.put("age", 8);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 9);
+    row.put("name", "Tester9");
+    row.put("age", 9);
+    events.add(row);
+
+    row.put("id", 10);
+    row.put("name", "Tester10");
+    row.put("age", 10);
+    events.add(row);
+
+    row.put("id", 11);
+    row.put("name", "Tester11");
+    row.put("age", 11);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 12);
+    row.put("name", "Tester12");
+    row.put("age", 12);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 13);
+    row.put("name", "Tester13");
+    row.put("age", 13);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 14);
+    row.put("name", "Tester14");
+    row.put("age", 14);
+    events.add(row);
+
+    row = new HashMap<>();
+    row.put("id", 15);
+    row.put("name", "Tester15");
+    row.put("age", 15);
+    events.add(row);
+
+    SpannerAsserts.assertThatStructs(spannerResourceManager.runQuery("select * from Users"))
+        .hasRecordsUnorderedCaseInsensitiveColumns(events);
+  }
+}
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java
new file mode 100644
index 0000000000..f471d90afa
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class DataStreamToSpannerTest {
+
+  @Rule public ExpectedException expectedEx = ExpectedException.none();
+
+  @Test
+  public void testGetSourceTypeWithDatastreamSourceType() {
+    String[] args = new String[] {"--datastreamSourceType=mysql"};
+    DataStreamToSpanner.Options options =
+        PipelineOptionsFactory.fromArgs(args)
+            .withValidation()
+            .as(DataStreamToSpanner.Options.class);
+    String result = DataStreamToSpanner.getSourceType(options);
+
+    assertEquals("mysql", result);
+  }
+
+  @Test
+  public void testGetSourceTypeWithEmptyStreamName() {
+    expectedEx.expect(IllegalArgumentException.class);
+    expectedEx.expectMessage("Stream name cannot be empty.");
+    String[] args = new String[] {""};
+    DataStreamToSpanner.Options options =
+        PipelineOptionsFactory.fromArgs(args)
+            .withValidation()
+            .as(DataStreamToSpanner.Options.class);
+    String result = DataStreamToSpanner.getSourceType(options);
+  }
+
+  @Test
+  public void testGetSourceTypeWithGcpCredentialsMissing() {
+    expectedEx.expect(IllegalArgumentException.class);
+    expectedEx.expectMessage("Unable to initialize DatastreamClient:");
+    String[] args =
+        new String[] {
+          "--streamName=projects/sample-project/locations/sample-location/streams/sample-stream"
+        };
+    DataStreamToSpanner.Options options =
+        PipelineOptionsFactory.fromArgs(args)
+            .withValidation()
+            .as(DataStreamToSpanner.Options.class);
+    String result = DataStreamToSpanner.getSourceType(options);
+  }
+}
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java
new file mode 100644
index 0000000000..d49453bfd2
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates.datastream;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.cloud.teleport.v2.spanner.ddl.Ddl;
+import java.io.IOException;
+import org.json.JSONObject;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class ChangeEventContextFactoryTest {
+
+  @Rule public ExpectedException expectedEx = ExpectedException.none();
+
+  private JsonNode getJsonNode(String json) throws IOException {
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
+    return mapper.readTree(json);
+  }
+
+  @Test
+  public void testCreateChangeEventContextWithEmptySourceType() throws Exception {
+    expectedEx.expect(InvalidChangeEventException.class);
+    JSONObject changeEvent = ChangeEventConvertorTest.getTestChangeEvent("Users2");
+    // Test Ddl
+    Ddl ddl = ChangeEventConvertorTest.getTestDdl();
+    ChangeEventContext changeEventContext =
+        ChangeEventContextFactory.createChangeEventContext(
+            getJsonNode(changeEvent.toString()),
+            ddl,
+            "shadow_",
+            DatastreamConstants.MYSQL_SOURCE_TYPE);
+  }
+
+  @Test
+  public void testCreateChangeEventContextWithNonMatchingSourceType() throws Exception {
+    expectedEx.expect(InvalidChangeEventException.class);
+    expectedEx.expectMessage("Change event with invalid source");
+    JSONObject changeEvent = ChangeEventConvertorTest.getTestChangeEvent("Users2");
+    changeEvent.put(
+        DatastreamConstants.EVENT_SOURCE_TYPE_KEY, DatastreamConstants.ORACLE_SOURCE_TYPE);
+    // Test Ddl
+    Ddl ddl = ChangeEventConvertorTest.getTestDdl();
+    ChangeEventContext changeEventContext =
+        ChangeEventContextFactory.createChangeEventContext(
+            getJsonNode(changeEvent.toString()),
+            ddl,
+            "shadow_",
+            DatastreamConstants.MYSQL_SOURCE_TYPE);
+  }
+
+  @Test
+  public void testCreateChangeEventContextWithNotSupportedSource() throws Exception {
+    expectedEx.expect(InvalidChangeEventException.class);
+    expectedEx.expectMessage("Unsupported source database");
+    JSONObject changeEvent = ChangeEventConvertorTest.getTestChangeEvent("Users2");
+    changeEvent.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, "xyz");
+    // Test Ddl
+    Ddl ddl = ChangeEventConvertorTest.getTestDdl();
+    ChangeEventContext changeEventContext =
+        ChangeEventContextFactory.createChangeEventContext(
+            getJsonNode(changeEvent.toString()), ddl, "shadow_", "xyz");
+  }
+}
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java
index 4ca5b67526..03f0c0d26d 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java
@@ -17,15 +17,28 @@
 
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
 
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.spanner.DatabaseAdminClient;
 import com.google.cloud.spanner.Dialect;
 import com.google.cloud.teleport.v2.spanner.ddl.Ddl;
+import com.google.spanner.admin.database.v1.UpdateDatabaseDdlMetadata;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig;
+import org.apache.beam.sdk.options.ValueProvider;
 import org.junit.Test;
 
 /** Unit tests for ProcessInformationSchema class. */
@@ -374,4 +387,54 @@ public void canListDataTablesWithNoShadowTablesInDdl() throws Exception {
         processInformationSchema.getDataTablesWithNoShadowTables(getTestDdlWithGSqlDialect());
     assertThat(dataTablesWithNoShadowTables, is(Arrays.asList("Users_interleaved")));
   }
+
+  @Test
+  public void canCreateShadowTablesInSpanner() throws Exception {
+    SpannerConfig spannerConfig = mock(SpannerConfig.class);
+    SpannerAccessor spannerAccessor = mock(SpannerAccessor.class);
+    DatabaseAdminClient databaseAdminClient = mock(DatabaseAdminClient.class);
+    OperationFuture<Void, UpdateDatabaseDdlMetadata> operationFuture = mock(OperationFuture.class);
+
+    ProcessInformationSchema.ProcessInformationSchemaFn processInformationSchema =
+        new ProcessInformationSchema.ProcessInformationSchemaFn(
+            spannerConfig, /* shouldCreateShadowTables= */ true, "shadow_", "mysql");
+    processInformationSchema.setDialect(Dialect.GOOGLE_STANDARD_SQL);
+    processInformationSchema.setSpannerAccessor(spannerAccessor);
+
+    // Mock method calls
+    when(databaseAdminClient.updateDatabaseDdl(anyString(), anyString(), any(), any()))
+        .thenReturn(operationFuture);
+    when(spannerAccessor.getDatabaseAdminClient()).thenReturn(databaseAdminClient);
+    when(operationFuture.get(anyLong(), any())).thenReturn(null);
+    ValueProvider<String> sampleValueProvider =
+        ValueProvider.StaticValueProvider.of("sample-value");
+    when(spannerConfig.getInstanceId()).thenReturn(sampleValueProvider);
+    when(spannerConfig.getDatabaseId()).thenReturn(sampleValueProvider);
+
+    processInformationSchema.createShadowTablesInSpanner(getTestDdlWithGSqlDialect());
+
+    List<String> createShadowTableStatements =
+        Collections.singletonList(
+            "CREATE TABLE `shadow_Users_interleaved` (\n"
+                + "\t`first_name`                            STRING(MAX),\n"
+                + "\t`last_name`                             STRING(5),\n"
+                + "\t`age`                                   INT64,\n"
+                + "\t`bool_field`                            BOOL,\n"
+                + "\t`int64_field`                           INT64,\n"
+                + "\t`float64_field`                         FLOAT64,\n"
+                + "\t`string_field`                          STRING(MAX),\n"
+                + "\t`bytes_field`                           BYTES(MAX),\n"
+                + "\t`timestamp_field`                       TIMESTAMP,\n"
+                + "\t`date_field`                            DATE,\n"
+                + "\t`id`                                    INT64,\n"
+                + "\t`timestamp`                             INT64,\n"
+                + "\t`log_file`                              STRING(MAX),\n"
+                + "\t`log_position`                          INT64,\n"
+                + ") PRIMARY KEY (`first_name` ASC, `last_name` DESC, `age` ASC, `bool_field` ASC, `int64_field` ASC, `float64_field` ASC, `string_field` ASC, `bytes_field` ASC, `timestamp_field` ASC, `date_field` ASC, `id` ASC)");
+    // Verify method calls
+    verify(databaseAdminClient, times(1))
+        .updateDatabaseDdl(
+            eq("sample-value"), eq("sample-value"), eq(createShadowTableStatements), eq(null));
+    verify(operationFuture, times(1)).get(anyLong(), any());
+  }
 }
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql
new file mode 100644
index 0000000000..f0e2752a42
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql
@@ -0,0 +1,73 @@
+INSERT INTO AllDatatypeColumns (
+    varchar_column, tinyint_column, text_column, date_column, smallint_column,
+    mediumint_column, int_column, bigint_column, float_column, double_column,
+    decimal_column, datetime_column, timestamp_column, time_column, year_column,
+    char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column,
+    mediumtext_column, longblob_column, longtext_column, enum_column, bool_column,
+    other_bool_column, binary_column, varbinary_column, bit_column
+) VALUES (
+             'value1', 10, UNHEX('746578745f646174615f310a'), '2024-02-08', 50,
+             1000, 50000, 987654321, 45.67, 123.789, 456.12,
+             '2024-02-08 08:15:30', '2024-02-08 08:15:30', SEC_TO_TIME(29730), 2022,
+             UNHEX('63686172310a'), UNHEX('74696e79626c6f625f646174615f31'),
+             UNHEX('74696e79746578745f646174615f310a'), UNHEX('626c6f625f646174615f31'),
+             UNHEX('6d656469756d626c6f625f646174615f31'),
+             UNHEX('6d656469756d746578745f646174615f31'),
+             UNHEX('6c6f6e67626c6f625f646174615f31'),
+             UNHEX('6c6f6e67746578745f646174615f31'), '2', FALSE, TRUE,
+             UNHEX('62696e6172795f31'), UNHEX('76617262696e6172795f646174615f31'), b'1100110'
+         );
+
+INSERT INTO AllDatatypeColumns (
+    varchar_column, tinyint_column, text_column, date_column, smallint_column,
+    mediumint_column, int_column, bigint_column, float_column, double_column,
+    decimal_column, datetime_column, timestamp_column, time_column, year_column,
+    char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column,
+    mediumtext_column, longblob_column, longtext_column, enum_column, bool_column,
+    other_bool_column, binary_column, varbinary_column, bit_column
+) VALUES (
+             'value2', 5, UNHEX('746578745f646174615f320a'), '2024-02-09', 25,
+             500, 25000, 987654, 12.34, 56.789, 123.45,
+             '2024-02-09 15:30:45', '2024-02-09 15:30:45', SEC_TO_TIME(55845), 2023,
+             UNHEX('63686172320a'), UNHEX('74696e79626c6f625f646174615f32'),
+             UNHEX('74696e79746578745f646174615f320a'), UNHEX('626c6f625f646174615f32'),
+             UNHEX('6d656469756d626c6f625f646174615f32'),
+             UNHEX('6d656469756d746578745f646174615f32'),
+             UNHEX('6c6f6e67626c6f625f646174615f32'),
+             UNHEX('6c6f6e67746578745f646174615f32'), '3', TRUE, FALSE,
+             UNHEX('62696e6172795f32'),UNHEX('76617262696e6172795f646174615f32'), b'11001'
+         );
+
+DELETE FROM AllDatatypeColumns where varchar_column = "value2";
+
+UPDATE AllDatatypeColumns
+SET
+    tinyint_column = 15,
+    text_column = UNHEX('746578745f646174615f310a'),
+    date_column = '2024-02-08',
+    smallint_column = 50,
+    mediumint_column = 1000,
+    int_column = 50000,
+    bigint_column = 987654321,
+    float_column = 45.67,
+    double_column = 123.789,
+    decimal_column = 456.12,
+    datetime_column = '2024-02-08 08:15:30',
+    timestamp_column = '2024-02-08 08:15:30',
+    time_column = SEC_TO_TIME(29730),
+    year_column = 2022,
+    char_column = UNHEX('63686172310a'),
+    tinyblob_column = UNHEX('74696e79626c6f625f646174615f31'),
+    tinytext_column = UNHEX('74696e79746578745f646174615f310a'),
+    blob_column = UNHEX('626c6f625f646174615f31'),
+    mediumblob_column = UNHEX('6d656469756d626c6f625f646174615f31'),
+    mediumtext_column = UNHEX('6d656469756d746578745f646174615f31'),
+    longblob_column = UNHEX('6c6f6e67626c6f625f646174615f31'),
+    longtext_column = UNHEX('6c6f6e67746578745f646174615f31'),
+    enum_column = '2',
+    bool_column = FALSE,
+    other_bool_column = TRUE,
+    binary_column = UNHEX('62696e6172795f31'),
+    varbinary_column = UNHEX('76617262696e6172795f646174615f31'),
+    bit_column = b'1100110'
+WHERE varchar_column = 'value1';
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql
new file mode 100644
index 0000000000..5e24936526
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql
@@ -0,0 +1,70 @@
+INSERT INTO AllDatatypeColumns2 (
+    varchar_column, tinyint_column, text_column, date_column, smallint_column,
+    mediumint_column, int_column, bigint_column, float_column, double_column,
+    decimal_column, datetime_column, timestamp_column, time_column, year_column,
+    char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column,
+    mediumtext_column, longblob_column, longtext_column, enum_column, bool_column,
+    binary_column, varbinary_column, bit_column
+) VALUES (
+             'value1', 10, 'text1', '2024-02-08', 50,
+             1000, 50000, 987654321, 45.67, 123.789, 456.12,
+             '2024-02-08 08:15:30', '2024-02-08 08:15:30', SEC_TO_TIME(29730), 2022,
+             'char_1', UNHEX('74696e79626c6f625f646174615f31'), 'tinytext_data_1',
+             UNHEX('626c6f625f646174615f31'), UNHEX('6d656469756d626c6f625f646174615f31'),
+             'mediumtext_data_1', UNHEX('6c6f6e67626c6f625f646174615f31'), 'longtext_data_1',
+             '2', FALSE,
+             UNHEX('62696e6172795f646174615f3100000000000000'), UNHEX('76617262696e6172795f646174615f31'),
+             b'1100110'
+         );
+
+INSERT INTO AllDatatypeColumns2 (
+    varchar_column, tinyint_column, text_column, date_column, smallint_column,
+    mediumint_column, int_column, bigint_column, float_column, double_column,
+    decimal_column, datetime_column, timestamp_column, time_column, year_column,
+    char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column,
+    mediumtext_column, longblob_column, longtext_column, enum_column, bool_column,
+    binary_column, varbinary_column, bit_column
+) VALUES (
+             'value2', 5, 'text2', '2024-02-09', 25,
+             500, 25000, 987654, 12.34, 56.789, 123.45,
+             '2024-02-09 15:30:45', '2024-02-09 15:30:45', SEC_TO_TIME(55845), 2023,
+             'char_2', UNHEX('74696e79626c6f625f646174615f32'), 'tinytext_data_2',
+             UNHEX('626c6f625f646174615f32'), UNHEX('6d656469756d626c6f625f646174615f32'),
+             'mediumtext_data_2', UNHEX('6c6f6e67626c6f625f646174615f32'), 'longtext_data_2',
+             '3', TRUE,
+             UNHEX('62696e6172795f646174615f3200000000000000'), UNHEX('76617262696e6172795f646174615f32'),
+             b'11001'
+         );
+
+UPDATE AllDatatypeColumns2
+SET
+    tinyint_column = 15,
+    text_column = 'text1',
+    date_column = '2024-02-08',
+    smallint_column = 50,
+    mediumint_column = 1000,
+    int_column = 50000,
+    bigint_column = 987654321,
+    float_column = 45.67,
+    double_column = 123.789,
+    decimal_column = 456.12,
+    datetime_column = '2024-02-08 08:15:30',
+    timestamp_column = '2024-02-08 08:15:30',
+    time_column = SEC_TO_TIME(29730),
+    year_column = 2022,
+    char_column = 'char_1',
+    tinyblob_column = UNHEX('74696e79626c6f625f646174615f31'),
+    tinytext_column = 'tinytext_data_1',
+    blob_column = UNHEX('626c6f625f646174615f31'),
+    mediumblob_column = UNHEX('6d656469756d626c6f625f646174615f31'),
+    mediumtext_column = 'mediumtext_data_1',
+    longblob_column = UNHEX('6c6f6e67626c6f625f646174615f31'),
+    longtext_column = 'longtext_data_1',
+    enum_column = '2',
+    bool_column = FALSE,
+    binary_column = UNHEX('62696e6172795f646174615f3100000000000000'),
+    varbinary_column = UNHEX('76617262696e6172795f646174615f31'),
+    bit_column = b'1100110'
+WHERE varchar_column = 'value1';
+
+DELETE FROM AllDatatypeColumns2 WHERE varchar_column = 'value2';
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro
new file mode 100644
index 0000000000000000000000000000000000000000..739dec2fcff919c7771e355d906f3590f19fbaa7
GIT binary patch
literal 4398
zcmd^CPiP!f7~i~x6iTI+ARda!;7NA7J1@I4J0VJ{4Jb%0+FOaso0&JsOW({}XWqmv
zC9)tC1-+;T4<1VJ;z18Z3LZp{MO&#C6^nvB=}9mk{z373JG--+-JRKz30_PfncesO
z`MvM=d*A!!S$k^^-A}edaVl~)sZYMEUe;u%O|@k$q>cz(t*iyaSD(4wI?eeRB8f71
zQt;RhB3Skq<!+=cU${K5jAN$!`f(x|vS35oDW~fpC2mVHpGK1S?bI^O%mvt&H7+)o
zL-_dwPHJEA8Rsn0c19vbA`TsTxG{yjMID)<(=ztqQ@JT}w+oJx_^n<UbE=XM(7H%F
zc{w8yN@b%epA<MP8wo?Ae3_)a;$whVRT2uiKBZs)p18p1WUz7e`xPE>nB-j&o6|JZ
zApHnUDwR#5T$^^td=hr<ZI5Y=kt86Smd8ebT+o7;bHLSSaDB|UEg~kF2vX#!L%(iA
z<vN=r*Z`d=Ne=3tT2BejqLxcJm2@bO)Z7+=QxX6#`<Q8y4i{u-pVB+d$k1?-iER=B
z)Uf6BPH8sOvgT5c#9V62szxJ)ku2!2KxRgOHElh|onFfJ9TY0t9fU%&Ql|x(*(RJ&
zK%2-x8TkZS$-#H&Q<uelfdB<uZMKo;ok%yk!XAh$o1xxn9>1=L;jCv-q$SI_0jLA#
zoM%NyNwzzZs$RGMGH?@b_`VItf<@}|F6%Wo;&5TWWrg!tSK?B)$;{lv1(se~|52`k
zr>TILXt+%{T(2|FO^4?676_C_{2}xun+rQ@r)iVVmcD9jvRHpAh#=`3hZ3aVMwG-0
z;!q(9qM$+S7a&yKq?ztB<S67O4O>Mb*ep=<%E7N%xU37uuvvC9pMDj<(6PQ#`uy=f
z&pzgzSb6i6>Nnq>yziy?w-=u>Nz>LHs#ls#y;i9=EvG`TX;y4P+*<$V>%ZQ;^Rv_a
zP5pgz>YKqSzZ1R4!O6J@(-^OKFkiT_-+pJV9SSvBN7Y8XQP-PRqiNL|RW2N&rm(0Q
zM-_(#G9+BDCbM%?-HJq@UQ^TrsirR`<F@MK=PVDKHny6MZR^;B>C&y&UD70mt1nr0
z)3Y0nYuA^Y1(bj-l~Ael$mEO6JlE05{bLhY!te2?&pyBQ+<El`-7%&*T0{@@^scb9
z^kb;o{o==8K6-Tlx#;!@boGq|ba!{}gQIBiL*ohbm3HTkmw$a>6)lvml~MQF`@6TU
zgXze_51)NMhi*QEj;qcH;a1GLVHwo43~V@tXW)is)D0cJ)f?WDjvh|?*8o5{!}5&b
z8V&HZ%mzG7kESKPXk<fZrI(f(W`@gy>4{GOAcUbh2%Q6a01i;GX(^}!v5fymtEQo>
z_S^5<<juKzk)i(ol_5sQMrDYV46l5;_c3S-qo0nV+gFaD*Kb@x@3q%XqusUZdw;vn
ztfKjvg}S%?x$)K~N`u3P-(S0W1bx}pAjHboARgBscudzIMzb}D(S!!^3=Lv5ra_F*
X5-6}Vp+P*Zy}{?18pH^XH^+Yg0!Y9v

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.jsonl
deleted file mode 100644
index 791c47b7c7..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"uuid":"5deaf70b-2f02-486c-831a-b92400000000","read_timestamp":"2024-02-07T07:03:46.226Z","source_timestamp":"2024-02-07T07:02:44.000Z","object":"it_AllDatatypeColumns","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"0df5741bfd9144e7b58ce8cd018728d345ed6acd","sort_keys":[1707289364000,"mysql-bin.000029",9610730],"source_metadata":{"table":"AllDatatypeColumns","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9610730,"change_type":"INSERT","is_deleted":false},"payload":{"varchar_column":"value1","tinyint_column":10,"text_column":"746578745f646174615f310a","date_column":"2024-02-08T00:00:00.000Z","smallint_column":50,"mediumint_column":1000,"int_column":50000,"bigint_column":987654321,"float_column":45.67,"double_column":123.789,"decimal_column":456.12,"datetime_column":"2024-02-08T08:15:30.000Z","timestamp_column":"2024-02-08T08:15:30.000Z","time_column":29730000000,"year_column":2022,"char_column":"63686172310a","tinyblob_column":"74696e79626c6f625f646174615f31","tinytext_column":"74696e79746578745f646174615f310a","blob_column":"626c6f625f646174615f31","mediumblob_column":"6d656469756d626c6f625f646174615f31","mediumtext_column":"6d656469756d746578745f646174615f31","longblob_column":"6c6f6e67626c6f625f646174615f31","longtext_column":"6c6f6e67746578745f646174615f31","enum_column":"2","bool_column":0,"other_bool_column":1,"binary_column":"62696e6172795f31","varbinary_column":"76617262696e6172795f646174615f31","bit_column":102}}
-{"uuid":"5deaf70b-2f02-486c-831a-b92400000001","read_timestamp":"2024-02-07T07:03:46.226Z","source_timestamp":"2024-02-07T07:02:55.000Z","object":"it_AllDatatypeColumns","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"0df5741bfd9144e7b58ce8cd018728d345ed6acd","sort_keys":[1707289375000,"mysql-bin.000029",9611373],"source_metadata":{"table":"AllDatatypeColumns","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9611373,"change_type":"INSERT","is_deleted":false},"payload":{"varchar_column":"value2","tinyint_column":5,"text_column":"746578745f646174615f320a","date_column":"2024-02-09T00:00:00.000Z","smallint_column":25,"mediumint_column":500,"int_column":25000,"bigint_column":987654,"float_column":12.34,"double_column":56.789,"decimal_column":123.45,"datetime_column":"2024-02-09T15:30:45.000Z","timestamp_column":"2024-02-09T15:30:45.000Z","time_column":55845000000,"year_column":2023,"char_column":"63686172320a","tinyblob_column":"74696e79626c6f625f646174615f32","tinytext_column":"74696e79746578745f646174615f320a","blob_column":"626c6f625f646174615f32","mediumblob_column":"6d656469756d626c6f625f646174615f32","mediumtext_column":"6d656469756d746578745f646174615f32","longblob_column":"6c6f6e67626c6f625f646174615f32","longtext_column":"6c6f6e67746578745f646174615f32","enum_column":"3","bool_column":1,"other_bool_column":0,"binary_column":"62696e6172795f32","varbinary_column":"76617262696e6172795f646174615f32","bit_column":25}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.avro
new file mode 100644
index 0000000000000000000000000000000000000000..632bc90a626f6cd7af266dc28134e6f67e188690
GIT binary patch
literal 4317
zcmd^CPiP!f7~i~x6iPwF;z=sZK@fJ6%*)Q~PAHh#1{C~LY;PehZ{EB~Uh-z%IDbNy
z5(yrP1qH#22M;Co;=vw@6g-F?3${{kid0Yp^<u$z(1WGl+u5D{lbzX`30}-Xc6Q(I
z`}4lv?|tvvSM7}{^d#8|<!nUPnMb~zy`;tMfN6_b$fyh*t*ZINQ=f(2IxWOm5)%bD
zBSqr*5r%b_GvP$q;>x8SG)cHJ>^h3#ko#-eb~RfM8F5-M_gEAYFUXKtXdwY#)r4H*
zl!yyyJ{i8|aUpo5Z4Y>iWD-(#xIU%5!DyUir{(O!WAVBy+%6b0;<Y+;ET~G#Kx>n=
zi(*DHj1?xzKv~&H7!qa6W9BJ7dKs%qLP1x@6bzCl4p6$^*(m+p3J)0U=RGF2V4164
z_yL+!D4Rrum>}dH3EPjh$1KJ`5RgsF<pYVVXnrCD<ke+xbp#y9h{s&|S>~BRw{Csq
z(skmmfzFg9dsJrVG46TPa+qK->kA}<+foWfeBfn>nSiu~Bz=5F?<gaE;C?2yNC>Hh
zE!sJz`KeYlhq)vXv9_pcG*FnB`)%&WxgKCmTP-kW7xQg<g~~Svp|HHrSwSYY2^vah
z69rjD9)VVJ@LhV$;fYtmK?zfvuN7IR(#@~13nI&BsIywcuOkyU>v<Gu>2je1M&X=`
zJP#Skb~|RO*9~6=PHGL`2XHKSq)zXmJ`YD6E)2M>a1rZDT<SKN7`wc{vP<hfigoZX
zH4qaEw}^o2b>g{cvx43Nj*5ssguY}8Y3J=UYx4QhSFJ5B*PlvekoJv3aZ++4O5!E+
zP%cVl!TiK4L8!V(bIm8n(dNFIHV%GK!c|+^ht2a_@%Tm#Bg5);W#z+P9{=wAvoBwJ
z&i{Gyqchj1|9s*FS|=tWO{ZqlI<3{~hF!BQ)2i9H;m&ve-nxI~-f#51-_`%>_`!bG
zYe#R0noZ~?3{#@!!g$~$Uhu(G5K1)|M{`EqsOwG3Xj=2eoRE~LAu5_nq8ep>91@|=
zrK4{&w-HHS{mf7Upc<ZdoCK;3pCg3T32QROj0V*m+Gx@RteYk^%_hZelNdI2EZ3Sr
zX||OLs#KmC^Fw4>=;*28u?L{=I{xA-=a*l*pq`++B}_*r&@rV~9qsIV_2d0d-=0AZ
zy89Tq`tA(+X?O3Fqv*tS^Lg}*_Wj>){_*+}nyK8VqMaKb@7}oy(1C{^zPvYuZa<BV
zcUqc`PBrSLWwM54V#wb$vEiC^Q-^PL!(GtP$*l3HGPHD5%>kY@9n%0)t6{*?cxPJL
zLpleq>+V!1Dl;27CMUC#+BHBJs&Wp{kTv9q8`f~fJ-MXsXsKZ;R6ZWvo_ZAd>HlZ>
zVRS5&9|rkd{$lSl&=f|$9z}OAA3^Wjx`?g?%cs%q^3A<}oR^l+^t^?3?mW2l{^v@2
z{fED<Ts?xm?r9HC&>kMu9?sDoMw7LN(LU`#{6g(P{9)~3gccAg&>sGm+QSHsHp7RJ
Cn5eA)

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl
deleted file mode 100644
index ee35c320b7..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"uuid":"2a646066-40f2-44b7-bec0-61d100000000","read_timestamp":"2024-02-08T09:32:05.695Z","source_timestamp":"2024-02-08T09:30:59.000Z","object":"it_AllDatatypeColumns2","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"20f68e0ee69f7b864b730a1273b6f1b698ab4c28","sort_keys":[1707384659000,"mysql-bin.000029",9615213],"source_metadata":{"table":"AllDatatypeColumns2","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9615213,"change_type":"INSERT","is_deleted":false},"payload":{"varchar_column":"value1","tinyint_column":10,"text_column":"text1","date_column":"2024-02-08T00:00:00.000Z","smallint_column":50,"mediumint_column":1000,"int_column":50000,"bigint_column":987654321,"float_column":45.67,"double_column":123.789,"decimal_column":456.12,"datetime_column":"2024-02-08T08:15:30.000Z","timestamp_column":"2024-02-08T08:15:30.000Z","time_column":29730000000,"year_column":2022,"char_column":"char_1","tinyblob_column":"74696e79626c6f625f646174615f31","tinytext_column":"tinytext_data_1","blob_column":"626c6f625f646174615f31","mediumblob_column":"6d656469756d626c6f625f646174615f31","mediumtext_column":"mediumtext_data_1","longblob_column":"6c6f6e67626c6f625f646174615f31","longtext_column":"longtext_data_1","enum_column":"2","bool_column":0,"binary_column":"62696e6172795f646174615f3100000000000000","varbinary_column":"76617262696e6172795f646174615f31","bit_column":102}}
-{"uuid":"2a646066-40f2-44b7-bec0-61d100000001","read_timestamp":"2024-02-08T09:32:05.695Z","source_timestamp":"2024-02-08T09:30:59.000Z","object":"it_AllDatatypeColumns2","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"20f68e0ee69f7b864b730a1273b6f1b698ab4c28","sort_keys":[1707384659000,"mysql-bin.000029",9615856],"source_metadata":{"table":"AllDatatypeColumns2","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9615856,"change_type":"INSERT","is_deleted":false},"payload":{"varchar_column":"value2","tinyint_column":5,"text_column":"text2","date_column":"2024-02-09T00:00:00.000Z","smallint_column":25,"mediumint_column":500,"int_column":25000,"bigint_column":987654,"float_column":12.34,"double_column":56.789,"decimal_column":123.45,"datetime_column":"2024-02-09T15:30:45.000Z","timestamp_column":"2024-02-09T15:30:45.000Z","time_column":55845000000,"year_column":2023,"char_column":"char_2","tinyblob_column":"74696e79626c6f625f646174615f32","tinytext_column":"tinytext_data_2","blob_column":"626c6f625f646174615f32","mediumblob_column":"6d656469756d626c6f625f646174615f32","mediumtext_column":"mediumtext_data_2","longblob_column":"6c6f6e67626c6f625f646174615f32","longtext_column":"longtext_data_2","enum_column":"3","bool_column":1,"binary_column":"62696e6172795f646174615f3200000000000000","varbinary_column":"76617262696e6172795f646174615f32","bit_column":25}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl
deleted file mode 100644
index a6c835c373..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"uuid":"45c148ba-a57f-4882-879e-831700000001","read_timestamp":"2024-02-09T09:59:32.777Z","source_timestamp":"2024-02-09T09:59:19.000Z","object":"it_AllDatatypeColumns","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"0df5741bfd9144e7b58ce8cd018728d345ed6acd","sort_keys":[1707472759000,"mysql-bin.000029",9618920],"source_metadata":{"table":"AllDatatypeColumns","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9618920,"change_type":"DELETE","is_deleted":true},"payload":{"varchar_column":"value2","tinyint_column":5,"text_column":"746578745f646174615f320a","date_column":"2024-02-09T00:00:00.000Z","smallint_column":25,"mediumint_column":500,"int_column":25000,"bigint_column":987654,"float_column":12.34,"double_column":56.789,"decimal_column":123.45,"datetime_column":"2024-02-09T15:30:45.000Z","timestamp_column":"2024-02-09T15:30:45.000Z","time_column":55845000000,"year_column":2023,"char_column":"63686172320a","tinyblob_column":"74696e79626c6f625f646174615f32","tinytext_column":"74696e79746578745f646174615f320a","blob_column":"626c6f625f646174615f32","mediumblob_column":"6d656469756d626c6f625f646174615f32","mediumtext_column":"6d656469756d746578745f646174615f32","longblob_column":"6c6f6e67626c6f625f646174615f32","longtext_column":"6c6f6e67746578745f646174615f32","enum_column":"3","bool_column":1,"other_bool_column":0,"binary_column":"62696e6172795f32","varbinary_column":"76617262696e6172795f646174615f32","bit_column":25}}
-{"uuid":"45c148ba-a57f-4882-879e-831700000000","read_timestamp":"2024-02-09T09:59:32.777Z","source_timestamp":"2024-02-09T09:58:59.000Z","object":"it_AllDatatypeColumns","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"0df5741bfd9144e7b58ce8cd018728d345ed6acd","sort_keys":[1707472739000,"mysql-bin.000029",9618285],"source_metadata":{"table":"AllDatatypeColumns","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9618285,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"varchar_column":"value1","tinyint_column":15,"text_column":"746578745f646174615f310a","date_column":"2024-02-08T00:00:00.000Z","smallint_column":50,"mediumint_column":1000,"int_column":50000,"bigint_column":987654321,"float_column":45.67,"double_column":123.789,"decimal_column":456.12,"datetime_column":"2024-02-08T08:15:30.000Z","timestamp_column":"2024-02-08T08:15:30.000Z","time_column":29730000000,"year_column":2022,"char_column":"63686172310a","tinyblob_column":"74696e79626c6f625f646174615f31","tinytext_column":"74696e79746578745f646174615f310a","blob_column":"626c6f625f646174615f31","mediumblob_column":"6d656469756d626c6f625f646174615f31","mediumtext_column":"6d656469756d746578745f646174615f31","longblob_column":"6c6f6e67626c6f625f646174615f31","longtext_column":"6c6f6e67746578745f646174615f31","enum_column":"2","bool_column":0,"other_bool_column":1,"binary_column":"62696e6172795f31","varbinary_column":"76617262696e6172795f646174615f31","bit_column":102}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro
new file mode 100644
index 0000000000000000000000000000000000000000..3a21fccabea252fa6b19b5bec0f0f3e29bb8a29d
GIT binary patch
literal 4382
zcmdT{PiP}m98P{)y0{j+EFMHwnS-aK?Yv|rnX-pg8&FuZXgw`8yqS5^PJJ^I=TF(N
zRC-zz^yER0wFv$}7b&_RD2nc3#a*p<5oAU8q<9c4h#nMwFOy7@G&5N{MX-l<n)&_t
z-tYU~_g)^;*NbpB-SEBXz*u7r{dW4I6t-JTnv;BHc)lr>B$qnke=au{IA5e86#)-<
zJaXKC;Cstv+zh0-^A~%eQDlq6ZlNIbZFg1LEG6%KM$KkuJ1hvP(@G+f*4)GRlEl4L
z+n{_o#*@UC9h-AIkT&}&243VF?09Q>`Z_bh1f7(!4u^$nUgmN!kx{4FnPbLOVg{N|
zvvyX`!1F^96C^>&*r@MQk!y#{5q$Izi%H@@S4IpB!BZ0{JzQ*<{_ceP5)ShoQk}D;
zRIhy>O`?@fgUpzgZ#&d)-`*ON9(_$vHciX!19Dz+BhC?5m%){x;FcHIq3yW|@+6@<
zw*%!GYt&suoe4?ys7#_ql(&PX$v6wyfk2XQ-Sar3F7h(KOpCU;M-So?dWRV~2t3Th
z2K5oD-!wYgG#zS5GMPmq9!hg!Mty?`ZMSW^VQK|@C#__pvkU3Dy+Ng?qfl5{>trBf
zZNl(9w22IqfkV+sj=oBVnRevlD3Bvnw^y^WW9g<(*g}z|Gt_yT#joi_xYyGtlH%8y
z1(<>RoTpVtNY>jS6J9rX7?^Q3e%``uVFzOO&dIa5#qq$v(~4xVF2p5Hld-+?3oJRb
z{-a(;Z&OAwF@J+{JYL7{n>Nev7A%xS{4w~F&OJSCrHRR>OJ7)9n9rYbB8YwCSc>Fa
zh>&<r9HK-{6wHmB9E6IKG&OyU9Ch0jw~eD8lz7zUuA$rMRy^9s@yf8WS@`g?&tJdQ
zzWV;B>Yc@3c3XE${C@i3s-o0ntxn3bDk0@+jml-slFf3ZVpV3lKUZIV_vX*W-dAtn
z@6zat{ryfmc!rk^(<tNe(0yMiwtP?Ah=UoWs#N8=rqs1rWrlkO6}P8gCJM?1b3>nU
zc_zN|1~cn{=Zfza;^rrAO?DWyg#Rv4%Bqyr8DmP#kWHgjH!4I{RYO(l2C?c?(G63x
zv`L6z7P^2C%(PgAT!qW8P8J>*@ttIX%W&`D4X6Mk)8U>cmlhY68|6nHeWLOBG8AA6
z9WY`toPkq9(=u#reevV3?>#pOChXk>+b>SSt)2aMPQvNS>VxpL^xdD&{`S;7Ocr)a
zu(kW<&W$S=+V}ANCpU|5{XUrL*q#h$YgJWKSxr+3F4a<rVyRVC#!qF%s$k#5oW#VM
zGL%vR7gbYLFjcE5_%ynh6#GnPb9T#}?Q~^oEhXhlvYWdJKyWHc7>%eAC#h+Jh<lQX
z|H6Dt6;bK&aJ_gt5|v|#Zh!Ok|3RWL%n=g(;jLT}#dMfjY@BN>H=x+nCV^A2HVJC;
z(#QKBpx_AnbQ1P1oq!jvU4YkGjRn|gT-pE2JU0&$vl?vO`1{&R9|>U|JpA$e_6hj1
tCrmO%m}FR(Bt@76#tW0cAz@<wOkraGL17Yr3P6T1$-fjP0W$2S{{V9;zJ&k)

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.jsonl
deleted file mode 100644
index 86ec409f68..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"uuid":"631f1c50-33fc-46ea-aa50-61a900000000","read_timestamp":"2024-02-09T05:09:12.430Z","source_timestamp":"2024-02-09T05:08:13.000Z","object":"it_AllDatatypeColumns2","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"20f68e0ee69f7b864b730a1273b6f1b698ab4c28","sort_keys":[1707455293000,"mysql-bin.000029",9616753],"source_metadata":{"table":"AllDatatypeColumns2","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9616753,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"varchar_column":"value1","tinyint_column":15,"text_column":"text1","date_column":"2024-02-08T00:00:00.000Z","smallint_column":50,"mediumint_column":1000,"int_column":50000,"bigint_column":987654321,"float_column":45.67,"double_column":123.789,"decimal_column":456.12,"datetime_column":"2024-02-08T08:15:30.000Z","timestamp_column":"2024-02-08T08:15:30.000Z","time_column":29730000000,"year_column":2022,"char_column":"char_1","tinyblob_column":"74696e79626c6f625f646174615f31","tinytext_column":"tinytext_data_1","blob_column":"626c6f625f646174615f31","mediumblob_column":"6d656469756d626c6f625f646174615f31","mediumtext_column":"mediumtext_data_1","longblob_column":"6c6f6e67626c6f625f646174615f31","longtext_column":"longtext_data_1","enum_column":"2","bool_column":0,"binary_column":"62696e6172795f646174615f3100000000000000","varbinary_column":"76617262696e6172795f646174615f31","bit_column":102}}
-{"uuid":"631f1c50-33fc-46ea-aa50-61a900000001","read_timestamp":"2024-02-09T05:09:12.430Z","source_timestamp":"2024-02-09T05:08:42.000Z","object":"it_AllDatatypeColumns2","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"20f68e0ee69f7b864b730a1273b6f1b698ab4c28","sort_keys":[1707455322000,"mysql-bin.000029",9617388],"source_metadata":{"table":"AllDatatypeColumns2","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9617388,"change_type":"DELETE","is_deleted":true},"payload":{"varchar_column":"value2","tinyint_column":5,"text_column":"text2","date_column":"2024-02-09T00:00:00.000Z","smallint_column":25,"mediumint_column":500,"int_column":25000,"bigint_column":987654,"float_column":12.34,"double_column":56.789,"decimal_column":123.45,"datetime_column":"2024-02-09T15:30:45.000Z","timestamp_column":"2024-02-09T15:30:45.000Z","time_column":55845000000,"year_column":2023,"char_column":"char_2","tinyblob_column":"74696e79626c6f625f646174615f32","tinytext_column":"tinytext_data_2","blob_column":"626c6f625f646174615f32","mediumblob_column":"6d656469756d626c6f625f646174615f32","mediumtext_column":"mediumtext_data_2","longblob_column":"6c6f6e67626c6f625f646174615f32","longtext_column":"longtext_data_2","enum_column":"3","bool_column":1,"binary_column":"62696e6172795f646174615f3200000000000000","varbinary_column":"76617262696e6172795f646174615f32","bit_column":25}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc1-AllDatatypeColumns.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc1-AllDatatypeColumns.avro
new file mode 100644
index 0000000000000000000000000000000000000000..7b249b8f1cbc766b90adf66aa122e295b2b831d5
GIT binary patch
literal 3780
zcmcgv&5PSc6!%Wj6qY^oR7xpr5Iq$;wq!}RmK+kgh8_Y9CAVcUBh6@cwj+(J87(nk
z!KDyd$Ss!~L+L-zOQ4i~Tnf3g^j!J}w1@Vxmt0c%#<FK^uca-kr9Sv;-pB8~-<$WI
zJ`4{x(Chdp6E|dZKok61`+=p#BWi718I43ntfrOXME~aY@HXc=SYa*jzTkP1N;r-g
z<wRQByAS4)dCs)n%#KtUOZTngra8|jCPT#%DiuyfM$&lZ0_2+(7yB&2{9Xaa$hQ*4
zIg{4$f<-CvETWfdQ-p^!QU;w>5=TPSfv5}@6e&)I(==9Or66GVF&$U=lp<4FMrXcA
zut^)vGOTr(qKW2Xj#y_B61ul0VE`TzVDzl9RrY5YUQjs8yTT!-#?;*X0!`Xgh-IbB
zC}RoE#{Vrw<72@Il+7?^3qW?QH0K;}%^2KU5grN26cedIZWLyDJFVR40H^y<XSyWk
z`8U#Q%riM8lv71d3&e<rLU4*x;AM%K5gv1aPvs50tBjlqo@L?)X8<)DM$=nb4z+0!
z8snTRYg^}NAu)=jW0tB?3vg!bRjAYZ<+}4km8(Oc&@$I1A?LP<C=<{oDp1M<Lo2y>
zlnEs)Pih#bp&GLND(#|l%QGB9k(DzvJ+0!Gh#c;E8AVocTxoztaL;*}1w(Q;R#f-8
z<zYYyZTNo#*MdoXd$;WlTyYo}Fsz&^)^%~|X>zXb`UPf&*8k+|;%-_{Of);f9LDRp
z_hw8hdJ7n;BK{KkQpiPE7O83S<)yD%o2u8JY9c85#-$jk8Hg_NnmDwHnkZ<RCp8Gw
zleAR*oN^TEfM&xw7DATledXe#7KU|g8zC!i=IciREFF8tjgJn0`g`lIH$QEE{n|@^
zvYVnGH$UhG)alvXev5>p({f|lY4tl@(xP_kb!M+;FFyPI&uH?ket&Q6lfm_5EI;9`
zh(s-j4%+vVjZr4_N-f)->$!G6@cKc=YjY7{y?)7dE?W^zRff6UE|zB5K9nNW&nJ3y
z)N2=0`A9eLyFu*qLMP})VQ4!std+!d3GQQ`*xexP$DtRI(CtRoP=T^B0|a1h=pbwt
zeSh*w<K45*G*`Ke-df(E8fdLH+S(c18QdG74YUQVGp7#PL~l$lgT^~_Hqm7A@Q0Ic
zKEH+>^vA2{*N0coqv!Y0SEIpg^yT2$$xCwQ7P{IA(B$WTo<IH$<QBfX_~Pjm^vgSF
zOZQB$b+hOCflqsZ@A#1)`;Hg;u5ZJ?wikCDblr5ex%Xy?<c?2#4|Ic`2jA9Pvx+XY
z5DmIHbxZwTiOSoi7Zd;x>LP?1=q|_uaDeLc0u6OuMysFbR?pW`%koP>I;$o7FC!5E
Ax&QzG

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro
new file mode 100644
index 0000000000000000000000000000000000000000..2fc7656943b335499bc87fdb77622f8d745681e3
GIT binary patch
literal 3791
zcmcgvO=u)V6rL0^!m{8+R7BR$c#`Rz^vrb6blAhLW<f6+G@h0?RCQM;6|1|(uIjKu
zh~%aR1=*7zg5;zJe^^AX;?)Rx5&Q+c35uX6VFh1J&s1h6(;Z2Nox}Xq`}yAcUcLAF
zS+cW)9>BY)xGaNh8sqPqdzKmwsI_XPG!SWMHLL{3`Zss_*EnCp3TuHU1<&F{!g0hX
z52dwwV{amvWlZah?MRifWXsxbnDdn4u&-E5rNZ&RNE**vfPBN^Vv7ZsZ{%={yd5*n
znY8w&EJ~530X<)vg6z;h8FW@b95GefqBLAkq&V)6(pZv}oPhpIbXew7id1PCo%uY$
zCT%!PvDRgZ#+r`_Vx37y=;nfi0eBn&qbH5cvp>%8l)_2g6(*b-Qxo@7G-+1^%Tk#^
z%3_=j|6h#8$CMK&n|{QmfZVW>jB~&>W^i*(cpxNGOe6-mQ5fg#xN?JSoNPgz>5`n}
z-$*Yo&tyNOoGN-;AV!=B!6{CFml<XTc*q4lmN)dyGjc3=l8IfM0@So0jBaT$)P@z(
z2xnYbt2#$hiBT*WvP2bHfHP~eM4jF&)}17(SRD$57P&SFIkim$sem?7f>Op9TFKd?
zjA_WSxPpNSs)TKoY3HR|oZ$$HteBzEX&Jwv$l$IQQDo)Er3PpK_na45FeH<qqPo}3
z4ude)hW`g}Etu4|chy;eD-Ht#hE=PKbzNL~nw;vpdV!gtb(egd-Hi>!MAKc&VZ5Gt
zZ-%s_w}7EC;?JQk5iUqkq^8Lim%eUos#<@lh#>D9=VGK{AiBgW;?O24qM%6@S0GeR
z(n9r9%2B9on)a($AS}`Q%GpOP4C~4^2rF*pi$?)09h>{LKi*m1`=a^rgWrAj-r+~L
zZoQ4Zz3^1qSs`xZx9xV+>)9Q*M{GhJ*KT)RvNC@Ce)Rf}zXfmp`zpLYzVOLlIUdTF
zxgCUo4begSersuv3cXUxrrU8lPS1CH{)*e=BEWk6lFdxo0Zmkjxzo&-X4%}4BGJz$
zdUe!m7gO0lH}G{oYIRA=?*)W7tq9i2u+s@~4|}20_DL@yZV-}AJE)@^Wo--yz+Bcr
zI30B7pk90I<TK4Z+(D1bZcsI}P#Zn`!ur~^jqCQ)&pdzqxeZi97ome@)IpcfL!&#P
zVfUR2=-}YXU;g~$wK@vX*Y}}!->9Qs5C8t)UUdJ5-V^8>>xX|{`Rm22s9w9>KnJ(q
zKm6)b5S{w+^XES;p`%C9McqBY+?8&}^F7-2y_Oevk=Jq~uj4uJuj58-2Q8Z(Hv!Nn
zkX-XZ&jnq->%zCi)~vi=O$CImjNDSMTcC2ubcGxMLM_M+LhB$8zyWHj>uac!f|~zC
SSG%5;nw6gk@@Q$kdjA8V2MCz}

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard1.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard1.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard1.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard2.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard2.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard2.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard1.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard2.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard3.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard3.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard4.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard4.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard1.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard1.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard1.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard2.jsonl
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard2.jsonl
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard2.jsonl
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-schema.sql
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-schema.sql
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-schema.sql
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-session.json b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-session.json
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-session.json
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-session.json
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/spanner-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/spanner-schema.sql
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/spanner-schema.sql
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/spanner-schema.sql
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard1.json b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard1.json
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard1.json
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard1.json
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard2.json b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard2.json
similarity index 100%
rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard2.json
rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard2.json
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl
new file mode 100644
index 0000000000..52919efdfe
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl
@@ -0,0 +1,3 @@
+{"uuid":"3a1cd9d6-33e5-4d24-bedf-6bbc00000000","read_timestamp":"2024-01-29T11:42:44.860Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L1_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":1,"name":"Tester1","age":1}}
+{"uuid":"3a1cd9d6-33e5-4d24-bedf-6bbc00000001","read_timestamp":"2024-01-29T11:42:44.861Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L1_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":2,"name":"Tester2","age":2}}
+{"uuid":"3a1cd9d6-33e5-4d24-bedf-6bbc00000010","read_timestamp":"2024-01-29T11:42:44.861Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L1_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":3,"name":"Tester3","age":3}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl
new file mode 100644
index 0000000000..a2fabc868c
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl
@@ -0,0 +1,3 @@
+{"uuid":"dfcc3068-25f2-4b97-854f-e88600000000","read_timestamp":"2024-01-29T11:42:45.075Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L2_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":4,"name":"Tester4","age":4}}
+{"uuid":"dfcc3068-25f2-4b97-854f-e88600000001","read_timestamp":"2024-01-29T11:42:45.075Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L2_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":5,"name":"Tester5","age":5}}
+{"uuid":"dfcc3068-25f2-4b97-854f-e88600000010","read_timestamp":"2024-01-29T11:42:45.075Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L2_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":6,"name":"Tester6","age":6}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl
new file mode 100644
index 0000000000..0db2ef9105
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl
@@ -0,0 +1,3 @@
+{"uuid":"51d0ca59-733b-4603-8532-eaaf00000000","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:43:57.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1706528637000,"mysql-bin.000012",84440344],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84440344,"change_type":"INSERT","is_deleted":false},"payload":{"id":7,"name":"Tester7","age":7}}
+{"uuid":"51d0ca59-733b-4603-8532-eaaf00000001","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:43:57.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1706528637000,"mysql-bin.000012",84440646],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84440646,"change_type":"INSERT","is_deleted":false},"payload":{"id":8,"name":"Tester8","age":8}}
+{"uuid":"51d0ca59-733b-4603-8532-eaaf00000010","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:43:57.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1706528637000,"mysql-bin.000012",84440948],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84440948,"change_type":"INSERT","is_deleted":false},"payload":{"id":9,"name":"Tester9","age":9}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl
new file mode 100644
index 0000000000..719f782299
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl
@@ -0,0 +1,3 @@
+{"uuid":"51d0ca59-733b-4603-8532-eaaf00000011","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:44:42.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1706528682000,"mysql-bin.000012",84441250],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84441250,"change_type":"INSERT","is_deleted":false},"payload":{"id":10,"name":"Tester10","age":10}}
+{"uuid":"51d0ca59-733b-4603-8532-eaaf00000100","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:44:42.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1706528682000,"mysql-bin.000012",84441552],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84441552,"change_type":"INSERT","is_deleted":false},"payload":{"id":11,"name":"Tester11","age":11}}
+{"uuid":"51d0ca59-733b-4603-8532-eaaf00000101","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:44:42.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1706528682000,"mysql-bin.000012",84441854],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84441854,"change_type":"INSERT","is_deleted":false},"payload":{"id":12,"name":"Tester12","age":12}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl
new file mode 100644
index 0000000000..cb50a3986d
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl
@@ -0,0 +1,6 @@
+{"uuid":"1448336d-45a7-4d31-b4ab-9d0500000000","read_timestamp":"2024-02-06T08:17:59.481Z","source_timestamp":"2024-02-06T08:16:58.000Z","object":"S1L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1707207418000,"mysql-bin.000013",6185380],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6185380,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":1,"name":"Tester1","age":20}}
+{"uuid":"1448336d-45a7-4d31-b4ab-9d0500000001","read_timestamp":"2024-02-06T08:17:59.481Z","source_timestamp":"2024-02-06T08:16:58.000Z","object":"S1L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1707207418000,"mysql-bin.000013",6185683],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6185683,"change_type":"INSERT","is_deleted":false},"payload":{"id":13,"name":"Tester13","age":13}}
+{"uuid":"1448336d-45a7-4d31-b4ab-9d0500000010","read_timestamp":"2024-02-06T08:17:59.481Z","source_timestamp":"2024-02-06T08:16:58.000Z","object":"S1L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1707207418000,"mysql-bin.000013",6185985],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6185985,"change_type":"DELETE","is_deleted":true},"payload":{"id":3,"name":"Tester3","age":3}}
+{"uuid":"caa80302-374f-40d8-b98b-c9f900000000","read_timestamp":"2024-02-06T08:19:06.299Z","source_timestamp":"2024-02-06T08:18:28.000Z","object":"S1L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1707207508000,"mysql-bin.000013",6186956],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6186956,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":1,"name":"Tester1","age":21}}
+{"uuid":"caa80302-374f-40d8-b98b-c9f900000010","read_timestamp":"2024-02-06T08:19:06.299Z","source_timestamp":"2024-02-06T08:18:28.000Z","object":"S1L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1707207508000,"mysql-bin.000013",6187561],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6187561,"change_type":"DELETE","is_deleted":true},"payload":{"id":2,"name":"Tester2","age":2}}
+{"uuid":"caa80302-374f-40d8-b98b-c9f900000001","read_timestamp":"2024-02-06T08:19:06.299Z","source_timestamp":"2024-02-06T08:18:28.000Z","object":"S1L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1707207508000,"mysql-bin.000013",6187259],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6187259,"change_type":"INSERT","is_deleted":false},"payload":{"id":14,"name":"Tester14","age":14}}
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl
new file mode 100644
index 0000000000..190ea30fa2
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl
@@ -0,0 +1,6 @@
+{"uuid":"d65ee5e2-bc34-420f-b926-261700000000","read_timestamp":"2024-02-06T08:20:16.811Z","source_timestamp":"2024-02-06T08:19:09.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1707207549000,"mysql-bin.000013",6187895],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6187895,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":6,"name":"Tester6","age":22}}
+{"uuid":"d65ee5e2-bc34-420f-b926-261700000001","read_timestamp":"2024-02-06T08:20:16.811Z","source_timestamp":"2024-02-06T08:19:09.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1707207549000,"mysql-bin.000013",6188198],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6188198,"change_type":"INSERT","is_deleted":false},"payload":{"id":15,"name":"Tester15","age":15}}
+{"uuid":"d65ee5e2-bc34-420f-b926-261700000010","read_timestamp":"2024-02-06T08:20:16.811Z","source_timestamp":"2024-02-06T08:19:09.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1707207549000,"mysql-bin.000013",6188500],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6188500,"change_type":"DELETE","is_deleted":true},"payload":{"id":4,"name":"Tester4","age":4}}
+{"uuid":"6dd400e4-2772-42cc-b6b5-4bf700000000","read_timestamp":"2024-02-06T08:21:27.083Z","source_timestamp":"2024-02-06T08:20:38.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1707207638000,"mysql-bin.000013",6188834],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6188834,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":5,"name":"Tester5","age":23}}
+{"uuid":"6dd400e4-2772-42cc-b6b5-4bf700000001","read_timestamp":"2024-02-06T08:21:27.083Z","source_timestamp":"2024-02-06T08:20:38.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1707207638000,"mysql-bin.000013",6189137],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6189137,"change_type":"INSERT","is_deleted":false},"payload":{"id":16,"name":"Tester16","age":16}}
+{"uuid":"6dd400e4-2772-42cc-b6b5-4bf700000010","read_timestamp":"2024-02-06T08:21:27.083Z","source_timestamp":"2024-02-06T08:20:38.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1707207638000,"mysql-bin.000013",6189439],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6189439,"change_type":"DELETE","is_deleted":true},"payload":{"id":16,"name":"Tester16","age":16}}
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql
new file mode 100644
index 0000000000..54d3d18d50
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql
@@ -0,0 +1,6 @@
+CREATE TABLE `Users` (
+    `id` int NOT NULL,     -- To: id INT64
+    `name` varchar(200),   -- To: name STRING(200)
+    `age` bigint,          -- To: age_spanner INT64  Column name renamed
+    PRIMARY KEY (`id`)
+);
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql
new file mode 100644
index 0000000000..ceeeb2114e
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql
@@ -0,0 +1,5 @@
+CREATE TABLE IF NOT EXISTS Users (
+    id INT64 NOT NULL,
+    name STRING(200),
+    age INT64,
+) PRIMARY KEY (id);
diff --git a/v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt b/v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt
deleted file mode 100644
index 3258e093fd..0000000000
--- a/v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-apache-beam[gcp,aws, azure, dataframe]==2.56.0
\ No newline at end of file
diff --git a/v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt b/v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt
deleted file mode 100644
index 3258e093fd..0000000000
--- a/v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-apache-beam[gcp,aws, azure, dataframe]==2.56.0
\ No newline at end of file
diff --git a/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java b/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java
index 68f088913e..f2b8d82597 100644
--- a/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java
+++ b/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java
@@ -38,6 +38,7 @@
 import org.apache.beam.it.neo4j.conditions.Neo4jQueryCheck;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
@@ -241,6 +242,7 @@ public void canResetDatabase() throws Exception {
   @Category(TemplateIntegrationTest.class)
   @TemplateIntegrationTest(GoogleCloudToNeo4j.class)
   @RunWith(JUnit4.class)
+  @Ignore("Has known issues to be fixed in Beam 2.57")
   public static class Neo4j5EnterpriseIT extends ConstraintsIndicesIT {
 
     @Override
diff --git a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java
index febfc46e3c..81ef282f1d 100644
--- a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java
+++ b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java
@@ -17,6 +17,7 @@
 
 import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
 
+import com.google.cloud.teleport.metadata.MultiTemplate;
 import com.google.cloud.teleport.metadata.Template;
 import com.google.cloud.teleport.metadata.TemplateCategory;
 import com.google.cloud.teleport.metadata.TemplateParameter;
@@ -28,7 +29,7 @@
 import com.google.cloud.teleport.v2.transforms.CsvConverters.LineToFailsafeJson;
 import com.google.cloud.teleport.v2.transforms.CsvConverters.ReadCsv;
 import com.google.cloud.teleport.v2.transforms.ErrorConverters.LogErrors;
-import com.google.cloud.teleport.v2.transforms.JavascriptTextTransformer.JavascriptTextTransformerOptions;
+import com.google.cloud.teleport.v2.transforms.PythonExternalTextTransformer.PythonExternalTextTransformerOptions;
 import com.google.cloud.teleport.v2.transforms.SplunkConverters;
 import com.google.cloud.teleport.v2.transforms.SplunkConverters.FailsafeStringToSplunkEvent;
 import com.google.cloud.teleport.v2.transforms.SplunkConverters.SplunkOptions;
@@ -37,6 +38,7 @@
 import com.google.cloud.teleport.v2.values.FailsafeElement;
 import com.google.cloud.teleport.v2.values.SplunkTokenSource;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Strings;
 import org.apache.beam.repackaged.core.org.apache.commons.lang3.EnumUtils;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.PipelineResult;
@@ -70,21 +72,47 @@
  * href="https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/v2/googlecloud-to-splunk/README_GCS_To_Splunk.md">README</a>
  * for instructions on how to use or modify this template.
  */
-@Template(
-    name = "GCS_To_Splunk",
-    category = TemplateCategory.BATCH,
-    displayName = "Cloud Storage To Splunk",
-    description = {
-      "A pipeline that reads a set of Text (CSV) files in Cloud Storage and writes to Splunk's"
-          + " HTTP Event Collector (HEC).",
-      "The template creates the Splunk payload as a JSON element using either CSV headers (default), JSON schema or JavaScript UDF. "
-          + "If a Javascript UDF and JSON schema are both inputted as parameters, only the Javascript UDF will be executed."
-    },
-    optionsClass = GCSToSplunkOptions.class,
-    skipOptions = {"javascriptTextTransformReloadIntervalMinutes"},
-    flexContainerName = "gcs-to-splunk",
-    contactInformation = "https://cloud.google.com/support",
-    hidden = true)
+@MultiTemplate({
+  @Template(
+      name = "GCS_To_Splunk",
+      category = TemplateCategory.BATCH,
+      displayName = "Cloud Storage To Splunk",
+      description = {
+        "A pipeline that reads a set of Text (CSV) files in Cloud Storage and writes to Splunk's"
+            + " HTTP Event Collector (HEC).",
+        "The template creates the Splunk payload as a JSON element using either CSV headers (default), JSON schema or JavaScript UDF. "
+            + "If a Javascript UDF and JSON schema are both inputted as parameters, only the Javascript UDF will be executed."
+      },
+      optionsClass = GCSToSplunkOptions.class,
+      skipOptions = {
+        "javascriptTextTransformReloadIntervalMinutes",
+        "pythonExternalTextTransformGcsPath",
+        "pythonExternalTextTransformFunctionName"
+      },
+      flexContainerName = "gcs-to-splunk",
+      contactInformation = "https://cloud.google.com/support",
+      hidden = true),
+  @Template(
+      name = "GCS_To_Splunk_Xlang",
+      category = TemplateCategory.BATCH,
+      displayName = "Cloud Storage To Splunk with Python UDFs",
+      type = Template.TemplateType.XLANG,
+      description = {
+        "A pipeline that reads a set of Text (CSV) files in Cloud Storage and writes to Splunk's"
+            + " HTTP Event Collector (HEC).",
+        "The template creates the Splunk payload as a JSON element using either CSV headers (default), JSON schema or Python UDF. "
+            + "If a Python UDF and JSON schema are both inputted as parameters, only the Python UDF will be executed."
+      },
+      optionsClass = GCSToSplunkOptions.class,
+      skipOptions = {
+        "javascriptTextTransformGcsPath",
+        "javascriptTextTransformFunctionName",
+        "javascriptTextTransformReloadIntervalMinutes"
+      },
+      flexContainerName = "gcs-to-splunk-xlang",
+      contactInformation = "https://cloud.google.com/support",
+      hidden = true)
+})
 public final class GCSToSplunk {
 
   /** String/String Coder for FailsafeElement. */
@@ -127,7 +155,9 @@ public final class GCSToSplunk {
    * executor at the command-line.
    */
   public interface GCSToSplunkOptions
-      extends CsvConverters.CsvPipelineOptions, SplunkOptions, JavascriptTextTransformerOptions {
+      extends CsvConverters.CsvPipelineOptions,
+          SplunkOptions,
+          PythonExternalTextTransformerOptions {
 
     @TemplateParameter.GcsWriteFolder(
         order = 1,
@@ -208,16 +238,32 @@ static ReadCsv readFromCsv(GCSToSplunkOptions options) {
   }
 
   static LineToFailsafeJson convertToFailsafeAndMaybeApplyUdf(GCSToSplunkOptions options) {
-    return CsvConverters.LineToFailsafeJson.newBuilder()
-        .setDelimiter(options.getDelimiter())
-        .setJavascriptUdfFileSystemPath(options.getJavascriptTextTransformGcsPath())
-        .setJavascriptUdfFunctionName(options.getJavascriptTextTransformFunctionName())
-        .setJsonSchemaPath(options.getJsonSchemaPath())
-        .setHeaderTag(CSV_HEADERS)
-        .setLineTag(CSV_LINES)
-        .setUdfOutputTag(UDF_OUT)
-        .setUdfDeadletterTag(UDF_ERROR_OUT)
-        .build();
+
+    boolean usePythonUdf = !Strings.isNullOrEmpty(options.getPythonExternalTextTransformGcsPath());
+    boolean useJavascriptUdf = !Strings.isNullOrEmpty(options.getJavascriptTextTransformGcsPath());
+
+    if (usePythonUdf && useJavascriptUdf) {
+      throw new IllegalArgumentException(
+          "Either javascript or Python gcs path must be provided, but not both.");
+    }
+    CsvConverters.LineToFailsafeJson.Builder lineToFailsafeJsonBuilder =
+        CsvConverters.LineToFailsafeJson.newBuilder()
+            .setDelimiter(options.getDelimiter())
+            .setJsonSchemaPath(options.getJsonSchemaPath())
+            .setHeaderTag(CSV_HEADERS)
+            .setLineTag(CSV_LINES)
+            .setUdfOutputTag(UDF_OUT)
+            .setUdfDeadletterTag(UDF_ERROR_OUT);
+    if (usePythonUdf) {
+      lineToFailsafeJsonBuilder
+          .setPythonUdfFileSystemPath(options.getPythonExternalTextTransformGcsPath())
+          .setPythonUdfFunctionName(options.getPythonExternalTextTransformFunctionName());
+    } else {
+      lineToFailsafeJsonBuilder
+          .setJavascriptUdfFileSystemPath(options.getJavascriptTextTransformGcsPath())
+          .setJavascriptUdfFunctionName(options.getJavascriptTextTransformFunctionName());
+    }
+    return lineToFailsafeJsonBuilder.build();
   }
 
   static FailsafeStringToSplunkEvent convertToSplunkEvent() {
diff --git a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java
index 5cb62962fb..251cdbd7f6 100644
--- a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java
+++ b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java
@@ -15,7 +15,6 @@
  */
 package com.google.cloud.teleport.v2.kafka.transforms;
 
-import io.confluent.kafka.serializers.KafkaAvroDeserializer;
 import java.io.IOException;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumReader;
@@ -25,8 +24,9 @@
 import org.apache.avro.io.DecoderFactory;
 import org.apache.kafka.common.errors.SerializationException;
 import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.Deserializer;
 
-public class BinaryAvroDeserializer extends KafkaAvroDeserializer {
+public class BinaryAvroDeserializer implements Deserializer<GenericRecord> {
   private Schema schema;
 
   public BinaryAvroDeserializer() {}
@@ -35,7 +35,13 @@ public BinaryAvroDeserializer(Schema schema) {
     this.schema = schema;
   }
 
+  @Override
   public GenericRecord deserialize(String topic, Headers header, byte[] bytes) {
+    return deserialize(topic, bytes);
+  }
+
+  @Override
+  public GenericRecord deserialize(String topic, byte[] bytes) {
     try {
       Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
       DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(this.schema);
diff --git a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java
index 307e5f1363..44924e5e49 100644
--- a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java
+++ b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java
@@ -15,7 +15,6 @@
  */
 package com.google.cloud.teleport.v2.kafka.transforms;
 
-import io.confluent.kafka.serializers.KafkaAvroSerializer;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import org.apache.avro.Schema;
@@ -25,8 +24,10 @@
 import org.apache.avro.io.Encoder;
 import org.apache.avro.io.EncoderFactory;
 import org.apache.kafka.common.errors.SerializationException;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.Serializer;
 
-public class BinaryAvroSerializer extends KafkaAvroSerializer {
+public class BinaryAvroSerializer implements Serializer<GenericRecord> {
   private Schema schema;
 
   public BinaryAvroSerializer() {}
@@ -35,7 +36,13 @@ public BinaryAvroSerializer(Schema schema) {
     this.schema = schema;
   }
 
-  public byte[] serialize(String subject, GenericRecord record) {
+  @Override
+  public byte[] serialize(String subject, Headers headers, GenericRecord record) {
+    return serialize(subject, record);
+  }
+
+  @Override
+  public byte[] serialize(String topic, GenericRecord record) {
     try {
       ByteArrayOutputStream out = new ByteArrayOutputStream();
       Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java
index d6bbd251b5..bcc2fccda0 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java
@@ -122,9 +122,6 @@ public class KafkaToBigQueryFlex {
   public static final TupleTag<FailsafeElement<KV<String, String>, String>>
       TRANSFORM_DEADLETTER_OUT = new TupleTag<FailsafeElement<KV<String, String>, String>>() {};
 
-  /** The default suffix for error tables if dead letter table is not specified. */
-  private static final String DEFAULT_DEADLETTER_TABLE_SUFFIX = "_error_records";
-
   /** String/String Coder for FailsafeElement. */
   private static final FailsafeElementCoder<String, String> FAILSAFE_ELEMENT_CODER =
       FailsafeElementCoder.of(
@@ -253,8 +250,7 @@ public static PipelineResult runAvroPipeline(
 
     if (options.getAvroFormat().equals("NON_WIRE_FORMAT") && options.getAvroSchemaPath() != null) {
 
-      throw new UnsupportedOperationException("Only Confluent Wire Format is supported");
-      // writeResult = kafkaRecords.apply(AvroTransform.of(options));
+      writeResult = kafkaRecords.apply(AvroTransform.of(options));
 
     } else {
 
@@ -382,10 +378,7 @@ public static PipelineResult runJsonPipeline(
       failedInserts.apply(
           "WriteInsertionFailedRecords",
           ErrorConverters.WriteStringMessageErrors.newBuilder()
-              .setErrorRecordsTable(
-                  ObjectUtils.firstNonNull(
-                      options.getOutputDeadletterTable(),
-                      options.getOutputTableSpec() + DEFAULT_DEADLETTER_TABLE_SUFFIX))
+              .setErrorRecordsTable(ObjectUtils.firstNonNull(options.getOutputDeadletterTable()))
               .setErrorRecordsTableSchema(SchemaUtils.DEADLETTER_SCHEMA)
               .build());
     } else {
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java
index 43bd63273a..1f6752f353 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java
@@ -119,7 +119,8 @@ private static class KafkaRecordToGenericRecordFailsafeElementFn
           KafkaRecord<byte[], byte[]>, FailsafeElement<KafkaRecord<byte[], byte[]>, GenericRecord>>
       implements Serializable {
 
-    private transient KafkaAvroDeserializer deserializer;
+    private transient KafkaAvroDeserializer kafkaDeserializer;
+    private transient BinaryAvroDeserializer binaryDeserializer;
     private transient SchemaRegistryClient schemaRegistryClient;
     private Schema schema = null;
     private String topicName;
@@ -135,12 +136,12 @@ private static class KafkaRecordToGenericRecordFailsafeElementFn
     @Setup
     public void setup() throws IOException, RestClientException {
       if (this.schema != null && this.useConfluentWireFormat.equals("NON_WIRE_FORMAT")) {
-        this.deserializer = new BinaryAvroDeserializer(this.schema);
+        this.binaryDeserializer = new BinaryAvroDeserializer(this.schema);
       } else if (this.schema != null
           && this.useConfluentWireFormat.equals("CONFLUENT_WIRE_FORMAT")) {
         this.schemaRegistryClient = new MockSchemaRegistryClient();
         this.schemaRegistryClient.register(this.topicName, this.schema, 1, 1);
-        this.deserializer = new KafkaAvroDeserializer(schemaRegistryClient);
+        this.kafkaDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
       } else {
         throw new IllegalArgumentException(
             "An Avro schema is needed in order to deserialize values.");
@@ -153,10 +154,16 @@ public void processElement(ProcessContext context) {
       GenericRecord result = null;
       try {
         // Serialize to Generic Record
-        result =
-            (GenericRecord)
-                this.deserializer.deserialize(
-                    element.getTopic(), element.getHeaders(), element.getKV().getValue());
+        if (this.useConfluentWireFormat.equals("NON_WIRE_FORMAT")) {
+          result =
+              this.binaryDeserializer.deserialize(
+                  element.getTopic(), element.getHeaders(), element.getKV().getValue());
+        } else {
+          result =
+              (GenericRecord)
+                  this.kafkaDeserializer.deserialize(
+                      element.getTopic(), element.getHeaders(), element.getKV().getValue());
+        }
       } catch (Exception e) {
         LOG.error("Failed during deserialization: " + e.toString());
       }
diff --git a/v2/kafka-to-gcs/pom.xml b/v2/kafka-to-gcs/pom.xml
index c460feb5fb..956cafb7aa 100644
--- a/v2/kafka-to-gcs/pom.xml
+++ b/v2/kafka-to-gcs/pom.xml
@@ -30,6 +30,17 @@
             <artifactId>kafka-common</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>com.google.cloud.teleport</groupId>
+            <artifactId>it-google-cloud-platform</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.beam</groupId>
+            <artifactId>beam-it-kafka</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
 </project>
diff --git a/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java b/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java
index 4130cabb26..30e4a33710 100644
--- a/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java
+++ b/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java
@@ -266,6 +266,8 @@ public String getFilename(
       String subDir = avroDestination.name;
       return subDir
           + "/"
+          + outputFilenamePrefix()
+          + "_"
           + defaultNaming.getFilename(window, pane, numShards, shardIndex, compression);
     }
   }
diff --git a/v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java b/v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java
new file mode 100644
index 0000000000..f507c87eca
--- /dev/null
+++ b/v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
+
+import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
+import com.google.common.io.Resources;
+import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
+import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
+import io.confluent.kafka.serializers.KafkaAvroSerializer;
+import java.io.IOException;
+import java.net.URL;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Function;
+import java.util.regex.Pattern;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.beam.it.common.PipelineLauncher.LaunchConfig;
+import org.apache.beam.it.common.PipelineLauncher.LaunchInfo;
+import org.apache.beam.it.common.PipelineOperator.Result;
+import org.apache.beam.it.common.TestProperties;
+import org.apache.beam.it.common.utils.ResourceManagerUtils;
+import org.apache.beam.it.gcp.TemplateTestBase;
+import org.apache.beam.it.gcp.artifacts.Artifact;
+import org.apache.beam.it.kafka.KafkaResourceManager;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Integration test for {@link KafkaToGcs2} (Kafka_to_GCS_2). */
+@Category(TemplateIntegrationTest.class)
+@TemplateIntegrationTest(KafkaToGcs2.class)
+@RunWith(JUnit4.class)
+public class KafkaToGcsIT extends TemplateTestBase {
+
+  private static final Logger LOG = LoggerFactory.getLogger(KafkaToGcsIT.class);
+
+  private KafkaResourceManager kafkaResourceManager;
+  private Schema avroSchema;
+
+  @Before
+  public void setup() throws IOException {
+    kafkaResourceManager =
+        KafkaResourceManager.builder(testName).setHost(TestProperties.hostIp()).build();
+
+    URL avroschemaResource = Resources.getResource("KafkaToGcsIT/avro_schema.avsc");
+    gcsClient.uploadArtifact("avro_schema.avsc", avroschemaResource.getPath());
+    avroSchema = new Schema.Parser().parse(avroschemaResource.openStream());
+  }
+
+  @After
+  public void tearDown() {
+    ResourceManagerUtils.cleanResources(kafkaResourceManager);
+  }
+
+  @Test
+  public void testKafkaToGcsText() throws IOException, RestClientException {
+    baseKafkaToGcs(b -> b.addParameter("outputFileFormat", "TEXT"));
+  }
+
+  @Test
+  public void testKafkaToGcsAvro() throws IOException, RestClientException {
+    baseKafkaToGcs(b -> b.addParameter("outputFileFormat", "AVRO"));
+  }
+
+  private void baseKafkaToGcs(Function<LaunchConfig.Builder, LaunchConfig.Builder> paramsAdder)
+      throws IOException, RestClientException {
+
+    // Arrange
+    String topicName = kafkaResourceManager.createTopic(testName, 5);
+
+    LaunchConfig.Builder options =
+        paramsAdder.apply(
+            LaunchConfig.builder(testName, specPath)
+                .addParameter(
+                    "bootstrapServers",
+                    kafkaResourceManager.getBootstrapServers().replace("PLAINTEXT://", ""))
+                .addParameter("inputTopics", topicName)
+                .addParameter("windowDuration", "10s")
+                .addParameter("schemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("offset", "earliest")
+                .addParameter("outputDirectory", getGcsPath(testName))
+                .addParameter("outputFilenamePrefix", testName + "-")
+                .addParameter("numShards", "2"));
+
+    // Act
+    LaunchInfo info = launchTemplate(options);
+    assertThatPipeline(info).isRunning();
+
+    MockSchemaRegistryClient registryClient = new MockSchemaRegistryClient();
+    registryClient.register(topicName + "-value", avroSchema, 1, 1);
+
+    KafkaProducer<String, Object> kafkaProducer =
+        kafkaResourceManager.buildProducer(
+            new StringSerializer(), new KafkaAvroSerializer(registryClient));
+
+    for (int i = 1; i <= 10; i++) {
+      GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0);
+      publish(kafkaProducer, topicName, i + "1", dataflow);
+
+      GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0);
+      publish(kafkaProducer, topicName, i + "2", pubsub);
+
+      GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0);
+      publish(kafkaProducer, topicName, i + "3", invalid);
+
+      try {
+        TimeUnit.SECONDS.sleep(3);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
+    }
+
+    AtomicReference<List<Artifact>> artifacts = new AtomicReference<>();
+    Pattern expectedFilePattern = Pattern.compile(".*" + testName + "-.*");
+
+    Result result =
+        pipelineOperator()
+            .waitForConditionAndFinish(
+                createConfig(info),
+                () -> {
+                  artifacts.set(gcsClient.listArtifacts(testName, expectedFilePattern));
+                  return !artifacts.get().isEmpty();
+                });
+
+    // Assert
+    assertThatResult(result).meetsConditions();
+  }
+
+  private void publish(
+      KafkaProducer<String, Object> producer, String topicName, String key, GenericRecord value) {
+    try {
+      RecordMetadata recordMetadata =
+          producer.send(new ProducerRecord<>(topicName, key, value)).get();
+      LOG.info(
+          "Published record {}, partition {} - offset: {}",
+          recordMetadata.topic(),
+          recordMetadata.partition(),
+          recordMetadata.offset());
+    } catch (Exception e) {
+      throw new RuntimeException("Error publishing record to Kafka", e);
+    }
+  }
+
+  private GenericRecord createRecord(int id, String productName, double value) {
+    return new GenericRecordBuilder(avroSchema)
+        .set("productId", id)
+        .set("productName", productName)
+        .build();
+  }
+}
diff --git a/v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc b/v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc
new file mode 100644
index 0000000000..d907a7f17a
--- /dev/null
+++ b/v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc
@@ -0,0 +1,15 @@
+{
+    "type": "record",
+    "namespace": "org.example.avro",
+    "name": "AvroProductKafkaRecord",
+    "fields": [
+        {
+            "name": "productId",
+            "type": "int"
+        },
+        {
+            "name": "productName",
+            "type": "string"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt b/v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt
deleted file mode 100644
index 3258e093fd..0000000000
--- a/v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-apache-beam[gcp,aws, azure, dataframe]==2.56.0
\ No newline at end of file
diff --git a/v2/pubsub-to-mongodb/src/main/resources/requirements.txt b/v2/pubsub-to-mongodb/src/main/resources/requirements.txt
deleted file mode 100644
index d761175ee9..0000000000
--- a/v2/pubsub-to-mongodb/src/main/resources/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-apache-beam[gcp,dataframe,azure,aws]==2.55.1
\ No newline at end of file
diff --git a/v2/sourcedb-to-spanner/pom.xml b/v2/sourcedb-to-spanner/pom.xml
index c22f3029d5..2fa55834ea 100644
--- a/v2/sourcedb-to-spanner/pom.xml
+++ b/v2/sourcedb-to-spanner/pom.xml
@@ -48,6 +48,17 @@
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.syncope.identityconnectors</groupId>
+      <artifactId>framework</artifactId>
+      <version>0.4.3</version>
+    </dependency>
+    <dependency>
+      <groupId>org.syncope.identityconnectors</groupId>
+      <artifactId>framework-internal</artifactId>
+      <version>0.4.3</version>
+      <scope>runtime</scope>
+    </dependency>
 
     <!-- Test Dependencies -->
     <dependency>
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java
new file mode 100644
index 0000000000..5d9b457775
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.options;
+
+import static com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig.builderWithMySqlDefaults;
+
+import com.google.cloud.teleport.v2.source.reader.auth.dbauth.LocalCredentialsProvider;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.TableConfig;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchemaReference;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+public final class OptionsToConfigBuilder {
+
+  public static final class MySql {
+
+    public static JdbcIOWrapperConfig configWithMySqlDefaultsFromOptions(
+        SourceDbToSpannerOptions options) {
+      JdbcIOWrapperConfig.Builder builder = builderWithMySqlDefaults();
+      builder =
+          builder
+              .setSourceHost(options.getSourceHost())
+              .setSourcePort(options.getSourcePort())
+              .setSourceSchemaReference(
+                  SourceSchemaReference.builder().setDbName(options.getSourceDB()).build())
+              .setDbAuth(
+                  LocalCredentialsProvider.builder()
+                      .setUserName(options.getUsername())
+                      .setPassword(options.getPassword())
+                      .build())
+              .setJdbcDriverClassName(options.getJdbcDriverClassName())
+              .setJdbcDriverJars(options.getJdbcDriverJars())
+              .setShardID("Unsupported"); /*TODO: Support Sharded Migration */
+      if (options.getSourceConnectionProperties() != "") {
+        builder = builder.setConnectionProperties(options.getSourceConnectionProperties());
+      }
+      if (options.getMaxConnections() != 0) {
+        builder.setMaxConnections((long) options.getMaxConnections());
+      }
+      if (options.getReconnectsEnabled()) {
+        builder.setAutoReconnect(true);
+        if (options.getReconnectAttempts() != 0) {
+          builder.setReconnectAttempts((long) options.getReconnectAttempts());
+        }
+      }
+      ImmutableMap<String, String> tablesWithPartitionColumns =
+          getTablesWithPartitionColumn(options);
+      ImmutableList<TableConfig> tableConfigs =
+          tablesWithPartitionColumns.entrySet().stream()
+              .map(
+                  entry -> {
+                    TableConfig.Builder configBuilder =
+                        TableConfig.builder(entry.getKey()).withPartitionColum(entry.getValue());
+                    if (options.getNumPartitions() != 0) {
+                      configBuilder = configBuilder.setMaxPartitions(options.getNumPartitions());
+                    }
+                    if (options.getFetchSize() != 0) {
+                      configBuilder = configBuilder.setMaxFetchSize(options.getFetchSize());
+                    }
+                    return configBuilder.build();
+                  })
+              .collect(ImmutableList.toImmutableList());
+      builder = builder.setTableConfigs(tableConfigs);
+      return builder.build();
+    }
+  }
+
+  private static ImmutableMap<String, String> getTablesWithPartitionColumn(
+      SourceDbToSpannerOptions options) {
+    String[] tables = options.getTables().split(",");
+    String[] partitionColumns = options.getPartitionColumns().split(",");
+    if (tables.length != partitionColumns.length) {
+      throw new RuntimeException(
+          "invalid configuration. Partition column count does not match " + "tables count.");
+    }
+    ImmutableMap.Builder<String, String> tableWithPartitionColumnBuilder = ImmutableMap.builder();
+    for (int i = 0; i < tables.length; i++) {
+      tableWithPartitionColumnBuilder.put(tables[i], partitionColumns[i]);
+    }
+    return tableWithPartitionColumnBuilder.build();
+  }
+
+  private OptionsToConfigBuilder() {}
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java
index b2b24e4506..f348eafc80 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java
@@ -29,6 +29,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
       description = "Comma-separated Cloud Storage path(s) of the JDBC driver(s)",
       helpText = "The comma-separated list of driver JAR files.",
       example = "gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar")
+  @Default.String("")
   String getJdbcDriverJars();
 
   void setJdbcDriverJars(String driverJar);
@@ -47,23 +48,39 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
 
   @TemplateParameter.Text(
       order = 3,
-      optional = true,
-      regexes = {
-        "(^jdbc:[a-zA-Z0-9/:@.?_+!*=&-;]+$)|(^([A-Za-z0-9+/]{4}){1,}([A-Za-z0-9+/]{0,3})={0,3})"
-      },
+      regexes = {"(^jdbc:[a-zA-Z0-9/:@.]+$)"},
       groupName = "Source",
-      description = "Connection URL to connect to the source database.",
-      helpText =
-          "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. Can"
-              + " be passed in as a string that's Base64-encoded and then encrypted with a Cloud"
-              + " KMS key. Currently supported sources: MySQL",
-      example = "jdbc:mysql://some-host:3306/sampledb")
-  String getSourceConnectionURL();
+      description =
+          "Connection URL to connect to the source database host. Port number and connection properties must be supplied separately.",
+      helpText = "The JDBC connection URL string. For example, `jdbc:mysql://some-host`.")
+  String getSourceHost();
 
-  void setSourceConnectionURL(String connectionURL);
+  void setSourceHost(String host);
 
   @TemplateParameter.Text(
       order = 4,
+      optional = false,
+      regexes = {"(^[0-9]+$)"},
+      groupName = "Source",
+      description = "Port number of source database.",
+      helpText = "Port Number of Source Database. For example, `3306`.")
+  String getSourcePort();
+
+  void setSourcePort(String port);
+
+  /* TODO: (support Sharding, PG namespaces) */
+  @TemplateParameter.Text(
+      order = 5,
+      regexes = {"(^[a-zA-Z0-9]+$)"},
+      groupName = "Source",
+      description = "source database name.",
+      helpText = "Name of the Source Database. For example, `person9`.")
+  String getSourceDB();
+
+  void setSourceDB(String db);
+
+  @TemplateParameter.Text(
+      order = 6,
       optional = true,
       regexes = {"^[a-zA-Z0-9_;!*&=@#-:\\/]+$"},
       groupName = "Source",
@@ -72,12 +89,13 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
           "Properties string to use for the JDBC connection. Format of the string must be"
               + " [propertyName=property;]*.",
       example = "unicode=true;characterEncoding=UTF-8")
+  @Default.String("")
   String getSourceConnectionProperties();
 
   void setSourceConnectionProperties(String connectionProperties);
 
   @TemplateParameter.Text(
-      order = 5,
+      order = 7,
       optional = true,
       regexes = {"^.+$"},
       groupName = "Source",
@@ -85,24 +103,26 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
       helpText =
           "The username to be used for the JDBC connection. Can be passed in as a Base64-encoded"
               + " string encrypted with a Cloud KMS key.")
+  @Default.String("")
   String getUsername();
 
   void setUsername(String username);
 
   @TemplateParameter.Password(
-      order = 6,
+      order = 8,
       optional = true,
       groupName = "Source",
       description = "JDBC connection password.",
       helpText =
           "The password to be used for the JDBC connection. Can be passed in as a Base64-encoded"
               + " string encrypted with a Cloud KMS key.")
+  @Default.String("")
   String getPassword();
 
   void setPassword(String password);
 
   @TemplateParameter.Text(
-      order = 7,
+      order = 9,
       optional = true,
       groupName = "Source Parameters",
       description = "The name of a column of numeric type that will be used for partitioning.",
@@ -116,7 +136,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
   void setPartitionColumns(String partitionColumns);
 
   @TemplateParameter.Text(
-      order = 8,
+      order = 10,
       optional = true,
       groupName = "Source Parameters",
       description = "Comma-separated names of the tables in the source database.",
@@ -125,8 +145,9 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
 
   void setTables(String table);
 
+  /* TODO(pipelineController) allow per table NumPartitions. */
   @TemplateParameter.Integer(
-      order = 9,
+      order = 11,
       optional = true,
       groupName = "Source",
       description = "The number of partitions.",
@@ -134,12 +155,25 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
           "The number of partitions. This, along with the lower and upper bound, form partitions"
               + " strides for generated WHERE clause expressions used to split the partition column"
               + " evenly. When the input is less than 1, the number is set to 1.")
+  @Default.Integer(0) /* Use Auto Inference */
   Integer getNumPartitions();
 
-  void setNumPartitions(Integer numPartitions);
+  void setNumPartitions(Integer value);
+
+  /* TODO(pipelineController) allow per table FetchSize. */
+  @TemplateParameter.Integer(
+      order = 12,
+      optional = true,
+      groupName = "Source",
+      description = "Table Read Fetch Size.",
+      helpText = "The fetch size of a single table read.")
+  @Default.Integer(0) /* Use Beam Default */
+  Integer getFetchSize();
+
+  void setFetchSize(Integer numPartitions);
 
   @TemplateParameter.Text(
-      order = 10,
+      order = 13,
       description = "Cloud Spanner Instance Id.",
       helpText = "The destination Cloud Spanner instance.")
   String getInstanceId();
@@ -147,7 +181,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
   void setInstanceId(String value);
 
   @TemplateParameter.Text(
-      order = 11,
+      order = 14,
       description = "Cloud Spanner Database Id.",
       helpText = "The destination Cloud Spanner database.")
   String getDatabaseId();
@@ -155,7 +189,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
   void setDatabaseId(String value);
 
   @TemplateParameter.ProjectId(
-      order = 12,
+      order = 15,
       description = "Cloud Spanner Project Id.",
       helpText = "This is the name of the Cloud Spanner project.")
   String getProjectId();
@@ -163,7 +197,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
   void setProjectId(String projectId);
 
   @TemplateParameter.Text(
-      order = 13,
+      order = 16,
       optional = true,
       description = "Cloud Spanner Endpoint to call",
       helpText = "The Cloud Spanner endpoint to call in the template.",
@@ -174,7 +208,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
   void setSpannerHost(String value);
 
   @TemplateParameter.Text(
-      order = 14,
+      order = 17,
       optional = true,
       description = "Source database columns to ignore",
       helpText =
@@ -185,14 +219,48 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
   void setIgnoreColumns(String value);
 
   @TemplateParameter.Text(
-      order = 15,
+      order = 18,
       optional = true,
       description = "Maximum number of connections to Source database per worker",
       helpText =
-          "Configures the JDBC connection pool on each worker with maximum number of connections. Use a negative number for no limit. Default value is 100.",
+          "Configures the JDBC connection pool on each worker with maximum number of connections. Use a negative number for no limit.",
       example = "-1")
-  @Default.Integer(100)
+  @Default.Integer(0) // Take Dialect Specific default in the wrapper
   Integer getMaxConnections();
 
   void setMaxConnections(Integer value);
+
+  @TemplateParameter.Text(
+      order = 19,
+      optional = true,
+      description = "enable connection reconnects",
+      helpText = "Enables the JDBC connection reconnects.",
+      example = "10")
+  @Default.Boolean(true) // Take Dialect Specific default in the wrapper.
+  Boolean getReconnectsEnabled();
+
+  void setReconnectsEnabled(Boolean value);
+
+  @TemplateParameter.Text(
+      order = 20,
+      optional = true,
+      description = "Maximum number of connection reconnect attempts, if reconnects are enabled",
+      helpText = "Configures the JDBC connection reconnect attempts.",
+      example = "10")
+  @Default.Integer(0) // Take Dialect Specific default in the wrapper.
+  Integer getReconnectAttempts();
+
+  void setReconnectAttempts(Integer value);
+
+  @TemplateParameter.GcsReadFile(
+      order = 21,
+      optional = true,
+      description =
+          "Session File Path in Cloud Storage, to provide mapping information in the form of a session file",
+      helpText =
+          "Session file path in Cloud Storage that contains mapping information from"
+              + " Spanner Migration Tool")
+  String getSessionFilePath();
+
+  void setSessionFilePath(String value);
 }
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java
deleted file mode 100644
index 177da15ce5..0000000000
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2023 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.teleport.v2.source;
-
-import static com.google.cloud.teleport.v2.utils.KMSUtils.maybeDecrypt;
-
-import com.google.cloud.teleport.v2.options.SourceDbToSpannerOptions;
-import java.lang.invoke.MethodHandles;
-import java.lang.invoke.VarHandle;
-import javax.sql.DataSource;
-import org.apache.beam.sdk.io.jdbc.JdbcIO;
-import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DataSourceProvider
-    implements org.apache.beam.sdk.transforms.SerializableFunction<Void, DataSource> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(DataSourceProvider.class);
-  private static volatile DataSource dataSource = null;
-  // VarHandle provides a strongly typed reference to @dataSource
-  // and supports various atomic access modes like the acquire-release semantics.
-  private static final VarHandle DATA_SOURCE;
-  private final JdbcIO.DataSourceConfiguration config;
-
-  // Bind DATA_SOURCE to dataSource
-  static {
-    try {
-      MethodHandles.Lookup lookup = MethodHandles.lookup();
-      DATA_SOURCE =
-          lookup
-              .in(DataSourceProvider.class)
-              .findStaticVarHandle(DataSourceProvider.class, "dataSource", DataSource.class);
-    } catch (ReflectiveOperationException e) {
-      // Logger might not be initialized in static scope!
-      System.err.println("Error while binding VarHandle: " + e.toString());
-      throw new ExceptionInInitializerError(e);
-    }
-  }
-
-  /**
-   * Constructs an instance of DataSourceProvider.
-   *
-   * @param options Pipeline options.
-   */
-  public DataSourceProvider(SourceDbToSpannerOptions options) {
-    config = getDataSourceConfiguration(options);
-  }
-
-  /**
-   * Returns a Singleton {@link DataSource} after initializing it if necessary.
-   *
-   * @see <a href = "https://en.wikipedia.org/wiki/Double-checked_locking#Usage_in_Java"> Idomatic
-   *     implementation of Double checked locking pattern.</a>
-   */
-  @Override
-  public DataSource apply(Void input) {
-    DataSource localRef = getDataSourceAcquire();
-    if (localRef == null) {
-      synchronized (this) {
-        localRef = getDataSourceAcquire();
-        if (localRef == null) {
-          localRef = JdbcIO.PoolableDataSourceProvider.of(config).apply(null);
-          setDataSourceRelease(localRef);
-          LOG.debug("initialize DataSource dataSource {}", localRef);
-        }
-      }
-    }
-    return localRef;
-  }
-
-  /**
-   * Access {@link DataSourceProvider#dataSource} with <code>memory_order_acquire</code> memory
-   * ordering semantics.
-   *
-   * <p>The load operation of {@link DataSourceProvider#dataSource} to the returned value
-   * guarantees:
-   *
-   * <ol>
-   *   <li>No reads or writes in the current thread can be reordered before this load.
-   *   <li>All writes in other threads that release the same variable are visible in the current
-   *       thread.
-   * </ol>
-   *
-   * @see VarHandle
-   * @see <a href="https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering">
-   *     <code>Release-Acquire_ordering</code></a>
-   */
-  private DataSource getDataSourceAcquire() {
-    return (DataSource) DATA_SOURCE.getAcquire();
-  }
-
-  /**
-   * Set {@link DataSourceProvider#dataSource} with <code>memory_order_release</code> memory
-   * ordering semantics.
-   *
-   * <p>The store operation to {@link DataSourceProvider#dataSource} with this memory ordering
-   * guarantees:
-   *
-   * <ol>
-   *   <li>No reads or writes in the current thread can be reordered after this store.
-   *   <li>All writes in the current thread are visible in other threads that acquire the same
-   *       variable.
-   * </ol>
-   *
-   * @see VarHandle
-   * @see <a href="https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering">
-   *     <code>Release-Acquire_ordering</code></a>
-   */
-  private void setDataSourceRelease(DataSource value) {
-    DATA_SOURCE.setRelease(value);
-  }
-
-  private static JdbcIO.DataSourceConfiguration getDataSourceConfiguration(
-      SourceDbToSpannerOptions options) {
-    var config =
-        JdbcIO.DataSourceConfiguration.create(
-                StaticValueProvider.of(options.getJdbcDriverClassName()),
-                maybeDecrypt(options.getSourceConnectionURL(), null))
-            .withUsername(maybeDecrypt(options.getUsername(), null))
-            .withPassword(maybeDecrypt(options.getPassword(), null))
-            .withMaxConnections(options.getMaxConnections());
-
-    if (options.getSourceConnectionProperties() != null) {
-      config = config.withConnectionProperties(options.getSourceConnectionProperties());
-    }
-    if (options.getJdbcDriverJars() != null) {
-      config = config.withDriverJars(options.getJdbcDriverJars());
-    }
-    return config;
-  }
-}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java
new file mode 100644
index 0000000000..174d08c287
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader;
+
+import com.google.auto.value.AutoValue;
+import com.google.cloud.teleport.v2.source.reader.io.IoWrapper;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema;
+import com.google.cloud.teleport.v2.source.reader.io.transform.ReaderTransform;
+import java.io.Serializable;
+
+@AutoValue
+public abstract class ReaderImpl implements Reader, Serializable {
+
+  abstract SourceSchema sourceSchema();
+
+  abstract ReaderTransform readerTransform();
+
+  public static ReaderImpl of(IoWrapper ioWrapper) {
+    SourceSchema sourceSchema = ioWrapper.discoverTableSchema();
+    ReaderTransform.Builder readerTransformBuilder = ReaderTransform.builder();
+    ioWrapper
+        .getTableReaders()
+        .entrySet()
+        .forEach(entry -> readerTransformBuilder.withTableReader(entry.getKey(), entry.getValue()));
+
+    return ReaderImpl.create(sourceSchema, readerTransformBuilder.build());
+  }
+
+  @Override
+  public SourceSchema getSourceSchema() {
+    return this.sourceSchema();
+  }
+
+  @Override
+  public ReaderTransform getReaderTransform() {
+    return this.readerTransform();
+  }
+
+  static ReaderImpl create(SourceSchema sourceSchema, ReaderTransform readerTransform) {
+    return new AutoValue_ReaderImpl(sourceSchema, readerTransform);
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java
new file mode 100644
index 0000000000..3c45ac0b3d
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.auth.dbauth;
+
+import java.io.Serializable;
+import org.apache.beam.sdk.options.ValueProvider;
+
+/**
+ * Interface for Various ways of providing DB credentials to be provided to {@link
+ * org.apache.beam.sdk.io.jdbc.JdbcIO JdbcIO}.
+ */
+public interface DbAuth extends Serializable {
+
+  /**
+   * Get Value provider for DB userName.
+   *
+   * @return the username to be provided to {@link org.apache.beam.sdk.io.jdbc.JdbcIO JdbcIO}
+   */
+  ValueProvider<String> getUserName();
+
+  /**
+   * Get Value provider for DB password.
+   *
+   * @return the password to be provided to {@link org.apache.beam.sdk.io.jdbc.JdbcIO JdbcIO}
+   */
+  ValueProvider<String> getPassword();
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java
new file mode 100644
index 0000000000..3071ec7e36
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.auth.dbauth;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.concurrent.atomic.AtomicReference;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+import org.identityconnectors.common.security.GuardedString;
+
+/**
+ * Utility Class to wrap the password in a {@link GuardedString}. Wrapping the password in {@link
+ * GuardedString} helps prevent accidental logging of the password from the reader code. {@link
+ * GuardedString} also zeros the string before it is freed.
+ */
+public final class GuardedStringValueProvider implements ValueProvider<String>, Serializable {
+  private GuardedString guardedString;
+
+  /**
+   * Creates a new Instance of {@link GuardedStringValueProvider}.
+   *
+   * @param value Value to guard
+   * @return created instance.
+   */
+  public static GuardedStringValueProvider create(String value) {
+    return new GuardedStringValueProvider(new GuardedString(value.toCharArray()));
+  }
+
+  /**
+   * Implementation {@link ValueProvider#get()}.
+   *
+   * @return the wrapped string.
+   */
+  @Override
+  public String get() {
+    AtomicReference<String> ret = new AtomicReference<>("");
+    this.guardedString().access((clearChars) -> ret.set(new String(clearChars)));
+    return ret.get();
+  }
+
+  private GuardedString guardedString() {
+    return this.guardedString;
+  }
+
+  private GuardedStringValueProvider(GuardedString guardedString) {
+    this.guardedString = guardedString;
+  }
+
+  @Override
+  public @UnknownKeyFor @NonNull @Initialized boolean isAccessible() {
+    return true;
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws IOException {
+    // TODO: wok on an encrypted version of this.
+    out.writeObject(this.get());
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
+    this.guardedString = new GuardedString(((String) in.readObject()).toCharArray());
+  }
+
+  private void readObjectNoData() {}
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java
new file mode 100644
index 0000000000..e1aeff804a
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.auth.dbauth;
+
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+
+/**
+ * Implements the {@link DbAuth} interface for credentails provided to the dataflow job locally,
+ * like via input parameters as against through a secretManager url.
+ */
+@AutoValue
+public abstract class LocalCredentialsProvider implements DbAuth {
+  abstract String userName();
+
+  abstract GuardedStringValueProvider password();
+
+  @Override
+  public ValueProvider<String> getUserName() {
+    return StaticValueProvider.of(this.userName());
+  }
+
+  @Override
+  public ValueProvider<String> getPassword() {
+    return password();
+  }
+
+  public static Builder builder() {
+    return new AutoValue_LocalCredentialsProvider.Builder();
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    public abstract Builder setUserName(String value);
+
+    abstract Builder setPassword(GuardedStringValueProvider value);
+
+    public Builder setPassword(String password) {
+      return this.setPassword(GuardedStringValueProvider.create(password));
+    }
+
+    public abstract LocalCredentialsProvider build();
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java
new file mode 100644
index 0000000000..60e0709a3e
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2024 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+/** DB Authentication for Reader. */
+package com.google.cloud.teleport.v2.source.reader.auth.dbauth;
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java
new file mode 100644
index 0000000000..d71c1afb94
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2024 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** DB and Instance Authentication for Reader. */
+package com.google.cloud.teleport.v2.source.reader.auth;
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java
new file mode 100644
index 0000000000..a4d90671d3
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.io;
+
+import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference;
+import com.google.common.collect.ImmutableMap;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+
+public interface IoWrapper {
+  ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>> getTableReaders();
+
+  SourceSchema discoverTableSchema();
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java
index ff25a85d0b..8c714bf7ae 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java
@@ -108,7 +108,7 @@ protected static String getSchemaDiscoveryQuery(SourceSchemaReference sourceSche
         .append(String.join(",", InformationSchemaCols.colList()))
         .append(
             String.format(
-                " FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = %s AND",
+                " FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = '%s' AND",
                 sourceSchemaReference.dbName()))
         .append(" TABLE_NAME = ?")
         .toString();
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java
new file mode 100644
index 0000000000..df6587977b
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper;
+
+import com.google.cloud.teleport.v2.source.reader.io.IoWrapper;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.TableConfig;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcSourceRowMapper;
+import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SchemaDiscovery;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SchemaDiscoveryImpl;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableSchema;
+import com.google.cloud.teleport.v2.spanner.migrations.schema.SourceColumnType;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.util.Map;
+import javax.sql.DataSource;
+import org.apache.beam.sdk.io.jdbc.JdbcIO;
+import org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration;
+import org.apache.beam.sdk.io.jdbc.JdbcIO.ReadWithPartitions;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public final class JdbcIoWrapper implements IoWrapper {
+  private final ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>>
+      tableReaders;
+  private final SourceSchema sourceSchema;
+
+  private static final Logger logger = LoggerFactory.getLogger(JdbcIoWrapper.class);
+
+  public static JdbcIoWrapper of(JdbcIOWrapperConfig config) {
+    DataSourceConfiguration dataSourceConfiguration = getDataSourceConfiguration(config);
+
+    SourceSchema sourceSchema = getSourceSchema(config, dataSourceConfiguration);
+    ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>> tableReaders =
+        buildTableReaders(config, dataSourceConfiguration, sourceSchema);
+    return new JdbcIoWrapper(tableReaders, sourceSchema);
+  }
+
+  @Override
+  public ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>>
+      getTableReaders() {
+    return this.tableReaders;
+  }
+
+  @Override
+  public SourceSchema discoverTableSchema() {
+    return this.sourceSchema;
+  }
+
+  static ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>>
+      buildTableReaders(
+          JdbcIOWrapperConfig config,
+          DataSourceConfiguration dataSourceConfiguration,
+          SourceSchema sourceSchema) {
+    return config.tableConfigs().stream()
+        .map(
+            tableConfig -> {
+              SourceTableSchema sourceTableSchema =
+                  findSourceTableSchema(sourceSchema, tableConfig);
+              return Map.entry(
+                  SourceTableReference.builder()
+                      .setSourceSchemaReference(sourceSchema.schemaReference())
+                      .setSourceTableName(sourceTableSchema.tableName())
+                      .setSourceTableSchemaUUID(sourceTableSchema.tableSchemaUUID())
+                      .build(),
+                  getJdbcIO(config, dataSourceConfiguration, tableConfig, sourceTableSchema));
+            })
+        .collect(ImmutableMap.toImmutableMap(Map.Entry::getKey, Map.Entry::getValue));
+  }
+
+  static SourceTableSchema findSourceTableSchema(
+      SourceSchema sourceSchema, TableConfig tableConfig) {
+    return sourceSchema.tableSchemas().stream()
+        .filter(schema -> schema.tableName().equals(tableConfig.tableName()))
+        .findFirst()
+        .orElseThrow();
+  }
+
+  static SourceSchema getSourceSchema(
+      JdbcIOWrapperConfig config, DataSourceConfiguration dataSourceConfiguration) {
+    SchemaDiscovery schemaDiscovery =
+        new SchemaDiscoveryImpl(config.dialectAdapter(), config.schemaDiscoveryBackOff());
+    SourceSchema.Builder sourceSchemaBuilder =
+        SourceSchema.builder().setSchemaReference(config.sourceSchemaReference());
+    DataSource dataSource = dataSourceConfiguration.buildDatasource();
+    ImmutableList<String> tables =
+        config.tableConfigs().stream()
+            .map(TableConfig::tableName)
+            .collect(ImmutableList.toImmutableList());
+    ImmutableMap<String, ImmutableMap<String, SourceColumnType>> tableSchemas =
+        schemaDiscovery.discoverTableSchema(dataSource, config.sourceSchemaReference(), tables);
+    tableSchemas.entrySet().stream()
+        .map(
+            tableEntry -> {
+              SourceTableSchema.Builder sourceTableSchemaBuilder =
+                  SourceTableSchema.builder().setTableName(tableEntry.getKey());
+              tableEntry
+                  .getValue()
+                  .entrySet()
+                  .forEach(
+                      colEntry ->
+                          sourceTableSchemaBuilder.addSourceColumnNameToSourceColumnType(
+                              colEntry.getKey(), colEntry.getValue()));
+              return sourceTableSchemaBuilder.build();
+            })
+        .forEach(sourceSchemaBuilder::addTableSchema);
+    return sourceSchemaBuilder.build();
+  }
+
+  private static PTransform<PBegin, PCollection<SourceRow>> getJdbcIO(
+      JdbcIOWrapperConfig config,
+      DataSourceConfiguration dataSourceConfiguration,
+      TableConfig tableConfig,
+      SourceTableSchema sourceTableSchema) {
+    ReadWithPartitions<SourceRow, @UnknownKeyFor @NonNull @Initialized Long> jdbcIO =
+        JdbcIO.<SourceRow>readWithPartitions()
+            .withTable(tableConfig.tableName())
+            .withPartitionColumn(tableConfig.partitionColumns().get(0))
+            .withDataSourceProviderFn(JdbcIO.PoolableDataSourceProvider.of(dataSourceConfiguration))
+            .withRowMapper(
+                new JdbcSourceRowMapper(config.valueMappingsProvider(), sourceTableSchema));
+    if (tableConfig.maxFetchSize() != null) {
+      jdbcIO = jdbcIO.withFetchSize(tableConfig.maxFetchSize());
+    }
+    if (tableConfig.maxPartitions() != null) {
+      jdbcIO = jdbcIO.withNumPartitions(tableConfig.maxPartitions());
+    }
+    return jdbcIO;
+  }
+
+  private static DataSourceConfiguration getDataSourceConfiguration(JdbcIOWrapperConfig config) {
+
+    DataSourceConfiguration dataSourceConfig =
+        JdbcIO.DataSourceConfiguration.create(
+                StaticValueProvider.of(config.jdbcDriverClassName()),
+                StaticValueProvider.of(getUrl(config)))
+            .withDriverJars(config.jdbcDriverJars())
+            .withMaxConnections(Math.toIntExact(config.maxConnections()));
+
+    if (!config.dbAuth().getUserName().get().isBlank()) {
+      dataSourceConfig = dataSourceConfig.withUsername(config.dbAuth().getUserName().get());
+    }
+    if (!config.dbAuth().getPassword().get().isBlank()) {
+      dataSourceConfig = dataSourceConfig.withPassword(config.dbAuth().getPassword().get());
+    }
+    return dataSourceConfig;
+  }
+
+  private static String getUrl(JdbcIOWrapperConfig config) {
+    StringBuffer urlBuilder =
+        new StringBuffer()
+            .append(config.sourceHost())
+            .append(":")
+            .append(config.sourcePort())
+            .append("/")
+            .append(config.sourceSchemaReference().dbName());
+    /* TODO: Handle PG Namespace */
+    ImmutableList.Builder<String> attributesBuilder = new ImmutableList.Builder<>();
+    if (config.autoReconnect()) {
+      attributesBuilder
+          .add("autoReconnect=true")
+          .add("maxReconnects=" + config.reconnectAttempts());
+    }
+    String attributes = String.join("&", attributesBuilder.build());
+    if (!attributes.isBlank()) {
+      urlBuilder.append("?").append(attributes);
+    }
+    logger.debug("connection url is" + urlBuilder.toString());
+    return urlBuilder.toString();
+  }
+
+  private JdbcIoWrapper(
+      ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>> tableReaders,
+      SourceSchema sourceSchema) {
+    this.tableReaders = tableReaders;
+    this.sourceSchema = sourceSchema;
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java
new file mode 100644
index 0000000000..e6291af82d
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config;
+
+import com.google.auto.value.AutoValue;
+import com.google.cloud.teleport.v2.source.reader.auth.dbauth.DbAuth;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.DialectAdapter;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.defaults.MySqlConfigDefaults;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcValueMappingsProvider;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchemaReference;
+import com.google.cloud.teleport.v2.source.reader.io.schema.typemapping.UnifiedTypeMapper.MapperType;
+import com.google.common.collect.ImmutableList;
+import org.apache.beam.sdk.util.FluentBackoff;
+
+/**
+ * Configuration for {@link
+ * com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.JdbcIoWrapper JdbcIoWrapper}.
+ */
+@AutoValue
+public abstract class JdbcIOWrapperConfig {
+
+  /** Source Endpoint. */
+  public abstract String sourceHost();
+
+  /** Source Port. */
+  public abstract String sourcePort();
+
+  /** {@link SourceSchemaReference}. */
+  public abstract SourceSchemaReference sourceSchemaReference();
+
+  /** Table Configurations. */
+  public abstract ImmutableList<TableConfig> tableConfigs();
+
+  /** Shard ID. */
+  public abstract String shardID();
+
+  /** DB credentials. */
+  public abstract DbAuth dbAuth();
+
+  /*
+   * A comma-separated list of driver JAR files. (Example:
+   * "gs://bucket/driver_jar1.jar,gs://bucket/driver_jar2.jar")
+   */
+  public abstract String jdbcDriverJars();
+
+  /* Name of the JDbc Driver Class. */
+  public abstract String jdbcDriverClassName();
+
+  /** Schema Mapper Type, defaults to MySQl. */
+  public abstract MapperType schemaMapperType();
+
+  /** Dialect Adapter. */
+  public abstract DialectAdapter dialectAdapter();
+
+  /** Source Row Mapping Provider. */
+  public abstract JdbcValueMappingsProvider valueMappingsProvider();
+
+  /*
+   * Properties string to use for the JDBC connection.
+   * Format of the string must be [propertyName=property;]
+   * Defaults to a vetted configuration based on benchmarking results.
+   * Example:
+   *    "maxTotal=160;maxpoolsize=160;maxIdle=160;minIdle=160"
+   *       + ";wait_timeout=57600"
+   *        + ";interactive_timeout=57600"
+   *        + ";idletimeout=3600"
+   *        + ";maxwaittime=600_000"
+   *        + ";maxWaitMillis=600_000"
+   *        + ";maxConnLifetimeMillis=600_000"
+   *        +
+   * ";testOnCreate=true;testOnBorrow=true;testOnReturn=true;testWhileIdle=true"
+   */
+  public abstract String connectionProperties();
+
+  /** Auto Reconnect for dropped connections. */
+  public abstract Boolean autoReconnect();
+
+  /** Reconnect Attempts for Auto Reconnect default 10. */
+  public abstract Long reconnectAttempts();
+
+  /** Max Number of connections. */
+  public abstract Long maxConnections();
+
+  /** BackOff Strategy for Schema Discovery retries. Defaults to {@link FluentBackoff#DEFAULT}. */
+  public abstract FluentBackoff schemaDiscoveryBackOff();
+
+  public static Builder builderWithMySqlDefaults() {
+    return new AutoValue_JdbcIOWrapperConfig.Builder()
+        .setSchemaMapperType(MySqlConfigDefaults.DEFAULT_MYSQL_SCHEMA_MAPPER_TYPE)
+        .setDialectAdapter(MySqlConfigDefaults.DEFAULT_MYSQL_DIALECT_ADAPTER)
+        .setValueMappingsProvider(MySqlConfigDefaults.DEFAULT_MYSQL_VALUE_MAPPING_PROVIDER)
+        .setAutoReconnect(MySqlConfigDefaults.DEFAULT_MYSQL_AUTO_RECONNECT)
+        .setReconnectAttempts(MySqlConfigDefaults.DEFAULT_MYSQL_RECONNECT_ATTEMPTS)
+        .setConnectionProperties(MySqlConfigDefaults.DEFAULT_MYSQL_CONNECTION_PROPERTIES)
+        .setMaxConnections(MySqlConfigDefaults.DEFAULT_MYSQL_MAX_CONNECTIONS)
+        .setSchemaDiscoveryBackOff(MySqlConfigDefaults.DEFAULT_MYSQL_SCHEMA_DISCOVERY_BACKOFF);
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    public abstract Builder setSourceHost(String value);
+
+    public abstract Builder setSourcePort(String value);
+
+    public abstract Builder setSourceSchemaReference(SourceSchemaReference value);
+
+    public abstract Builder setTableConfigs(ImmutableList<TableConfig> value);
+
+    public abstract Builder setShardID(String value);
+
+    public abstract Builder setDbAuth(DbAuth value);
+
+    public abstract Builder setSchemaMapperType(MapperType value);
+
+    public abstract Builder setDialectAdapter(DialectAdapter value);
+
+    public abstract Builder setValueMappingsProvider(JdbcValueMappingsProvider value);
+
+    public abstract Builder setJdbcDriverJars(String value);
+
+    public abstract Builder setJdbcDriverClassName(String value);
+
+    public abstract Builder setConnectionProperties(String value);
+
+    public abstract Builder setReconnectAttempts(Long value);
+
+    public abstract Builder setAutoReconnect(Boolean value);
+
+    public abstract Builder setSchemaDiscoveryBackOff(FluentBackoff value);
+
+    public abstract Builder setMaxConnections(Long value);
+
+    public abstract JdbcIOWrapperConfig build();
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java
new file mode 100644
index 0000000000..aa3f025fd1
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import javax.annotation.Nullable;
+
+/** Table Configuration. */
+@AutoValue
+public abstract class TableConfig {
+
+  /** Name of the table. */
+  public abstract String tableName();
+
+  /**
+   * Max number of read partitions. If not-null uses the user supplied maxPartitions, instead of
+   * auto-inference. defaults to null.
+   */
+  @Nullable
+  public abstract Integer maxPartitions();
+
+  /**
+   * Configures the size of data read in db, per db read call. Defaults to beam's DEFAULT_FETCH_SIZE
+   * of 50_000. For manually fine-tuning this, take into account the read ahead buffer pool settings
+   * (innodb_read_ahead_threshold) and the worker memory.
+   */
+  @Nullable
+  public abstract Integer maxFetchSize();
+
+  /** Partition Column. As of now only a single partition column is supported */
+  public abstract ImmutableList<String> partitionColumns();
+
+  public static Builder builder(String tableName) {
+    return new AutoValue_TableConfig.Builder()
+        .setTableName(tableName)
+        .setMaxPartitions(null)
+        .setMaxFetchSize(null);
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    abstract Builder setTableName(String value);
+
+    public abstract Builder setMaxPartitions(Integer value);
+
+    public abstract Builder setMaxFetchSize(Integer value);
+
+    abstract ImmutableList.Builder<String> partitionColumnsBuilder();
+
+    public Builder withPartitionColum(String column) {
+      this.partitionColumnsBuilder().add(column);
+      return this;
+    }
+
+    abstract TableConfig autoBuild();
+
+    public TableConfig build() {
+      TableConfig tableConfig = this.autoBuild();
+      Preconditions.checkState(
+          tableConfig.partitionColumns().size() == 1,
+          "A single partition column is required. Currently Partition Columns are not auto inferred and composite partition columns are not supported.");
+      return tableConfig;
+    }
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java
new file mode 100644
index 0000000000..d61cd88cf5
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.defaults;
+
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.DialectAdapter;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.mysql.MysqlDialectAdapter;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.mysql.MysqlDialectAdapter.MySqlVersion;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcValueMappingsProvider;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.provider.MysqlJdbcValueMappings;
+import com.google.cloud.teleport.v2.source.reader.io.schema.typemapping.UnifiedTypeMapper.MapperType;
+import org.apache.beam.sdk.util.FluentBackoff;
+
+// TODO: Fine-tune the defaults based on benchmarking.
+
+/**
+ * MySql Default Configuration for {@link
+ * com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.JdbcIoWrapper JdbcIoWrapper}.
+ */
+public class MySqlConfigDefaults {
+
+  public static final MapperType DEFAULT_MYSQL_SCHEMA_MAPPER_TYPE = MapperType.MYSQL;
+  public static final DialectAdapter DEFAULT_MYSQL_DIALECT_ADAPTER =
+      new MysqlDialectAdapter(MySqlVersion.DEFAULT);
+  public static final JdbcValueMappingsProvider DEFAULT_MYSQL_VALUE_MAPPING_PROVIDER =
+      new MysqlJdbcValueMappings();
+
+  public static final String DEFAULT_MYSQL_CONNECTION_PROPERTIES =
+      "maxTotal=160;maxpoolsize=160;maxIdle=160;minIdle=160"
+          + ";wait_timeout=57600"
+          + ";interactive_timeout=57600"
+          + ";idletimeout=3600"
+          + ";maxwaittime=600_000"
+          + ";maxWaitMillis=600_000"
+          + ";maxConnLifetimeMillis=600_000"
+          + ";testOnCreate=true;testOnBorrow=true;testOnReturn=true;testWhileIdle=true";
+
+  public static final Long DEFAULT_MYSQL_MAX_CONNECTIONS = 160L;
+
+  public static final boolean DEFAULT_MYSQL_AUTO_RECONNECT = true;
+
+  public static final long DEFAULT_MYSQL_RECONNECT_ATTEMPTS = 10L;
+  public static final FluentBackoff DEFAULT_MYSQL_SCHEMA_DISCOVERY_BACKOFF = FluentBackoff.DEFAULT;
+
+  private MySqlConfigDefaults() {}
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java
new file mode 100644
index 0000000000..a4583e01c2
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2024 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** Config Defaults for jdbc sources. */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.defaults;
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java
new file mode 100644
index 0000000000..b598a31c7b
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2024 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** Config for JdbcIoWrapper. */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config;
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java
new file mode 100644
index 0000000000..473e81c656
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2024 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** IoWrapper for jdbc sources. */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper;
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java
index 766007fd26..f568f2d325 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper;
 
+import java.io.Serializable;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import org.apache.avro.Schema;
@@ -25,7 +26,7 @@
  *
  * @param <T>
  */
-public class JdbcValueMapper<T extends Object> {
+public class JdbcValueMapper<T extends Object> implements Serializable {
 
   private ResultSetValueExtractor<T> valueExtractor;
   private ResultSetValueMapper<T> valueMapper;
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java
index eee89df914..8dbf46b0b5 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java
@@ -16,12 +16,13 @@
 package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper;
 
 import com.google.common.collect.ImmutableMap;
+import java.io.Serializable;
 
 /**
  * An interface to be implemented for various jdbc source types to get the {@link JdbcValueMapper}
  * for various source types.
  */
-public interface JdbcValueMappingsProvider {
+public interface JdbcValueMappingsProvider extends Serializable {
 
   /**
    * Get Mapping of source types to {@link JdbcValueMapper}.
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java
index 82fc9d6e7e..99c7771887 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper;
 
+import java.io.Serializable;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import javax.annotation.Nullable;
@@ -24,7 +25,7 @@
  *
  * @param <T> type of the value extracted.
  */
-public interface ResultSetValueExtractor<T extends Object> {
+public interface ResultSetValueExtractor<T extends Object> extends Serializable {
 
   /**
    * Extract the requested field from the result set.
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java
index fe93b58997..95b6c342db 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper;
 
+import java.io.Serializable;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import org.apache.avro.Schema;
@@ -27,7 +28,7 @@
  *
  * @param <T> Type of the filed extracted from {@link ResultSet}.
  */
-public interface ResultSetValueMapper<T> {
+public interface ResultSetValueMapper<T> extends Serializable {
 
   /**
    * Map the extracted value to an object accepted by {@link
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java
index 4096af05dc..a977c0f97c 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java
@@ -66,7 +66,11 @@ public Schema getAvroPayload() {
    */
 
   public static Builder builder() {
-    var builder = new AutoValue_SourceTableSchema.Builder();
+    return builder(MapperType.MYSQL);
+  }
+
+  public static Builder builder(MapperType mapperType) {
+    var builder = new AutoValue_SourceTableSchema.Builder().initialize(mapperType);
     builder.setTableSchemaUUID(UUID.randomUUID().toString());
     return builder;
   }
@@ -77,6 +81,8 @@ public abstract static class Builder {
 
     public abstract Builder setTableName(String value);
 
+    private UnifiedTypeMapper.MapperType mapperType;
+
     abstract ImmutableMap.Builder<String, SourceColumnType>
         sourceColumnNameToSourceColumnTypeBuilder();
 
@@ -88,7 +94,7 @@ public final Builder addSourceColumnNameToSourceColumnType(
       this.payloadFieldAssembler =
           this.payloadFieldAssembler
               .name(sourceColumnName)
-              .type(new UnifiedTypeMapper(MapperType.MYSQL).getSchema(sourceColumnType))
+              .type(new UnifiedTypeMapper(this.mapperType).getSchema(sourceColumnType))
               .noDefault();
       return this;
     }
@@ -119,6 +125,11 @@ public Builder() {
 
     abstract SourceTableSchema autoBuild();
 
+    public Builder initialize(UnifiedTypeMapper.MapperType mapperType) {
+      this.mapperType = mapperType;
+      return this;
+    }
+
     public SourceTableSchema build() {
       this.setAvroSchema(this.payloadFieldAssembler.endRecord().noDefault().endRecord());
       SourceTableSchema sourceTableSchema = autoBuild();
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java
index 05e64228af..c365b55879 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java
@@ -39,12 +39,12 @@
  */
 @AutoValue
 abstract class AccumulatingTableReader extends PTransform<PBegin, PCollectionTuple> {
-  abstract ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>>
+  public abstract ImmutableMap<SourceTableReference, PTransform<PBegin, PCollection<SourceRow>>>
       tableTransforms();
 
-  abstract TupleTag<SourceRow> sourceRowTag();
+  public abstract TupleTag<SourceRow> sourceRowTag();
 
-  abstract TupleTag<SourceTableReference> sourceTableReferenceTag();
+  public abstract TupleTag<SourceTableReference> sourceTableReferenceTag();
 
   @Override
   public PCollectionTuple expand(PBegin input) {
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java
index b00174fe8a..a02b700f0f 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java
@@ -18,6 +18,7 @@
 import com.google.auto.value.AutoValue;
 import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow;
 import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference;
+import java.io.Serializable;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
@@ -56,12 +57,12 @@
  * table separately in the `PCollectionTuple`.
  */
 @AutoValue
-public abstract class ReaderTransform {
-  abstract TupleTag<SourceRow> sourceRowTag();
+public abstract class ReaderTransform implements Serializable {
+  public abstract TupleTag<SourceRow> sourceRowTag();
 
-  abstract TupleTag<SourceTableReference> sourceTableReferenceTag();
+  public abstract TupleTag<SourceTableReference> sourceTableReferenceTag();
 
-  abstract PTransform<PBegin, PCollectionTuple> readTransform();
+  public abstract PTransform<PBegin, PCollectionTuple> readTransform();
 
   public static Builder builder() {
     TupleTag<SourceRow> sourceRowTupleTag = new TupleTag<>();
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java
index 6d171a493a..202f605af5 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java
@@ -15,27 +15,44 @@
  */
 package com.google.cloud.teleport.v2.templates;
 
+import com.google.cloud.spanner.BatchClient;
+import com.google.cloud.spanner.BatchReadOnlyTransaction;
+import com.google.cloud.spanner.DatabaseAdminClient;
+import com.google.cloud.spanner.Dialect;
 import com.google.cloud.spanner.Mutation;
+import com.google.cloud.spanner.TimestampBound;
 import com.google.cloud.teleport.metadata.Template;
 import com.google.cloud.teleport.metadata.TemplateCategory;
 import com.google.cloud.teleport.v2.common.UncaughtExceptionLogger;
+import com.google.cloud.teleport.v2.options.OptionsToConfigBuilder;
 import com.google.cloud.teleport.v2.options.SourceDbToSpannerOptions;
-import com.google.cloud.teleport.v2.source.DataSourceProvider;
-import com.google.cloud.teleport.v2.spanner.ResultSetToMutation;
+import com.google.cloud.teleport.v2.source.reader.ReaderImpl;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.JdbcIoWrapper;
+import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference;
+import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableSchema;
+import com.google.cloud.teleport.v2.source.reader.io.transform.ReaderTransform;
+import com.google.cloud.teleport.v2.spanner.ddl.Ddl;
+import com.google.cloud.teleport.v2.spanner.ddl.InformationSchemaScanner;
+import com.google.cloud.teleport.v2.spanner.migrations.schema.ISchemaMapper;
+import com.google.cloud.teleport.v2.spanner.migrations.schema.IdentityMapper;
+import com.google.cloud.teleport.v2.spanner.migrations.schema.SessionBasedMapper;
+import com.google.cloud.teleport.v2.transformer.SourceRowToMutationDoFn;
 import com.google.common.annotations.VisibleForTesting;
-import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
-import java.util.Set;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor;
+import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerIO;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerIO.Write;
-import org.apache.beam.sdk.io.jdbc.JdbcIO;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
 
 /**
  * A template that copies data from a relational database using JDBC to an existing Spanner
@@ -101,63 +118,70 @@ public static void main(String[] args) {
   @VisibleForTesting
   static PipelineResult run(SourceDbToSpannerOptions options) {
     Pipeline pipeline = Pipeline.create(options);
-    Map<String, Set<String>> columnsToIgnore = getColumnsToIgnore(options);
-    Map<String, String> tableVsPartitionMap = getTablesVsPartitionColumn(options);
-    for (String table : getTablesVsPartitionColumn(options).keySet()) {
-      PCollection<Mutation> rows =
-          pipeline.apply(
-              "ReadPartitions_" + table,
-              getJdbcReader(
-                  table, tableVsPartitionMap.get(table), columnsToIgnore.get(table), options));
-      rows.apply("Write_" + table, getSpannerWrite(options));
-    }
+
+    ReaderImpl reader =
+        ReaderImpl.of(
+            JdbcIoWrapper.of(
+                OptionsToConfigBuilder.MySql.configWithMySqlDefaultsFromOptions(options)));
+    SourceSchema srcSchema = reader.getSourceSchema();
+    ReaderTransform readerTransform = reader.getReaderTransform();
+
+    PCollectionTuple rowsAndTables = pipeline.apply("Read rows", readerTransform.readTransform());
+    PCollection<SourceRow> sourceRows = rowsAndTables.get(readerTransform.sourceRowTag());
+
+    SourceRowToMutationDoFn transformDoFn =
+        SourceRowToMutationDoFn.create(getSchemaMapper(options), getTableIDToRefMap(srcSchema));
+    PCollection<Mutation> mutations = sourceRows.apply("Transform", ParDo.of(transformDoFn));
+    mutations.apply("Write", getSpannerWrite(options));
+
     return pipeline.run();
   }
 
-  private static Map<String, String> getTablesVsPartitionColumn(SourceDbToSpannerOptions options) {
-    String[] tables = options.getTables().split(",");
-    String[] partitionColumns = options.getPartitionColumns().split(",");
-    if (tables.length != partitionColumns.length) {
-      throw new RuntimeException(
-          "invalid configuration. Partition column count does not match " + "tables count.");
+  private static ISchemaMapper getSchemaMapper(SourceDbToSpannerOptions options) {
+    SpannerConfig spannerConfig =
+        SpannerConfig.create()
+            .withProjectId(ValueProvider.StaticValueProvider.of(options.getProjectId()))
+            .withHost(ValueProvider.StaticValueProvider.of(options.getSpannerHost()))
+            .withInstanceId(ValueProvider.StaticValueProvider.of(options.getInstanceId()))
+            .withDatabaseId(ValueProvider.StaticValueProvider.of(options.getDatabaseId()));
+    Ddl ddl = getInformationSchemaAsDdl(spannerConfig);
+    ISchemaMapper schemaMapper = new IdentityMapper(ddl);
+    if (options.getSessionFilePath() != null && !options.getSessionFilePath().equals("")) {
+      schemaMapper = new SessionBasedMapper(options.getSessionFilePath(), ddl);
     }
-    Map<String, String> tableVsPartitionColumn = new HashMap();
-    for (int i = 0; i < tables.length; i++) {
-      tableVsPartitionColumn.put(tables[i], partitionColumns[i]);
-    }
-    return tableVsPartitionColumn;
+    return schemaMapper;
   }
 
-  private static Map<String, Set<String>> getColumnsToIgnore(SourceDbToSpannerOptions options) {
-    String ignoreStr = options.getIgnoreColumns();
-    if (ignoreStr == null || ignoreStr.isEmpty()) {
-      return Collections.emptyMap();
-    }
-    Map<String, Set<String>> ignore = new HashMap<>();
-    for (String tableColumns : ignoreStr.split(",")) {
-      int tableNameIndex = tableColumns.indexOf(':');
-      if (tableNameIndex == -1) {
-        continue;
-      }
-      String table = tableColumns.substring(0, tableNameIndex);
-      String columnStr = tableColumns.substring(tableNameIndex + 1);
-      Set<String> columns = new HashSet<>(Arrays.asList(columnStr.split(";")));
-      ignore.put(table, columns);
-    }
-    return ignore;
+  // TODO: SpannerInfoschema scanner code is duplicated across live, bulk and reverse replication
+  // templates. We should refactor everything to spanner-common.
+  private static Ddl getInformationSchemaAsDdl(SpannerConfig spannerConfig) {
+    SpannerAccessor spannerAccessor = SpannerAccessor.getOrCreate(spannerConfig);
+    DatabaseAdminClient databaseAdminClient = spannerAccessor.getDatabaseAdminClient();
+    Dialect dialect =
+        databaseAdminClient
+            .getDatabase(spannerConfig.getInstanceId().get(), spannerConfig.getDatabaseId().get())
+            .getDialect();
+    BatchClient batchClient = spannerAccessor.getBatchClient();
+    BatchReadOnlyTransaction context =
+        batchClient.batchReadOnlyTransaction(TimestampBound.strong());
+    InformationSchemaScanner scanner = new InformationSchemaScanner(context, dialect);
+    Ddl ddl = scanner.scan();
+    spannerAccessor.close();
+    return ddl;
   }
 
-  private static JdbcIO.ReadWithPartitions<Mutation, Long> getJdbcReader(
-      String table,
-      String partitionColumn,
-      Set<String> columnsToIgnore,
-      SourceDbToSpannerOptions options) {
-    return JdbcIO.<Mutation>readWithPartitions()
-        .withDataSourceProviderFn(new DataSourceProvider(options))
-        .withTable(table)
-        .withPartitionColumn(partitionColumn)
-        .withRowMapper(ResultSetToMutation.create(table, columnsToIgnore))
-        .withNumPartitions(options.getNumPartitions());
+  private static Map<String, SourceTableReference> getTableIDToRefMap(SourceSchema srcSchema) {
+    Map<String, SourceTableReference> tableIdMapper = new HashMap<>();
+    for (SourceTableSchema srcTableSchema : srcSchema.tableSchemas()) {
+      tableIdMapper.put(
+          srcTableSchema.tableSchemaUUID(),
+          SourceTableReference.builder()
+              .setSourceSchemaReference(srcSchema.schemaReference())
+              .setSourceTableName(srcTableSchema.tableName())
+              .setSourceTableSchemaUUID(srcTableSchema.tableSchemaUUID())
+              .build());
+    }
+    return tableIdMapper;
   }
 
   private static Write getSpannerWrite(SourceDbToSpannerOptions options) {
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
index c45f396f63..1f9375c15c 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
@@ -51,6 +51,8 @@ public static SourceRowToMutationDoFn create(
   @ProcessElement
   public void processElement(ProcessContext c) {
     SourceRow sourceRow = c.element();
+    LOG.debug("Starting transformation for Source Row {}", sourceRow);
+
     if (!tableIdMapper().containsKey(sourceRow.tableSchemaUUID())) {
       // TODO: Remove LOG statements from processElement once counters and DLQ is supported.
       LOG.error(
@@ -71,10 +73,7 @@ public void processElement(ProcessContext c) {
       c.output(mutation);
     } catch (Exception e) {
       // TODO: Add DLQ integration once supported.
-      LOG.error(
-          "Unable to transform source row to spanner mutation: {} {}",
-          e.getMessage(),
-          e.fillInStackTrace());
+      LOG.error("Unable to transform source row to spanner mutation: {}", e.getMessage());
     }
   }
 
diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java
new file mode 100644
index 0000000000..7aa4112705
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.options;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig;
+import com.google.common.collect.ImmutableList;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.junit.MockitoJUnitRunner;
+
+/** Test class for {@link OptionsToConfigBuilder}. */
+@RunWith(MockitoJUnitRunner.class)
+public class OptionsToConfigBuilderTest {
+
+  @Test
+  public void testConfigWithMySqlDefaultsFromOptions() {
+    final String testdriverClassName = "org.apache.derby.jdbc.EmbeddedDriver";
+    final String testHost = "localHost";
+    final String testPort = "3306";
+    final String testuser = "user";
+    final String testpassword = "password";
+    SourceDbToSpannerOptions sourceDbToSpannerOptions =
+        PipelineOptionsFactory.as(SourceDbToSpannerOptions.class);
+    sourceDbToSpannerOptions.setSourceHost(testHost);
+    sourceDbToSpannerOptions.setSourcePort(testPort);
+    sourceDbToSpannerOptions.setJdbcDriverClassName(testdriverClassName);
+    sourceDbToSpannerOptions.setSourceConnectionProperties(
+        "maxTotal=160;maxpoolsize=160;maxIdle=160;minIdle=160" + ";wait_timeout=57600");
+    sourceDbToSpannerOptions.setFetchSize(50000);
+    sourceDbToSpannerOptions.setMaxConnections(150);
+    sourceDbToSpannerOptions.setNumPartitions(4000);
+    sourceDbToSpannerOptions.setUsername(testuser);
+    sourceDbToSpannerOptions.setPassword(testpassword);
+    sourceDbToSpannerOptions.setReconnectsEnabled(true);
+    sourceDbToSpannerOptions.setReconnectAttempts(10);
+    sourceDbToSpannerOptions.setSourceDB("testDB");
+    sourceDbToSpannerOptions.setTables("table1,table2");
+    sourceDbToSpannerOptions.setPartitionColumns("col1,col2");
+    JdbcIOWrapperConfig config =
+        OptionsToConfigBuilder.MySql.configWithMySqlDefaultsFromOptions(sourceDbToSpannerOptions);
+    assertThat(config.autoReconnect()).isTrue();
+    assertThat(config.jdbcDriverClassName()).isEqualTo(testdriverClassName);
+    assertThat(config.sourceHost()).isEqualTo(testHost);
+    assertThat(config.sourcePort()).isEqualTo(testPort);
+    assertThat(
+            ImmutableList.of(
+                config.tableConfigs().get(0).tableName(), config.tableConfigs().get(1).tableName()))
+        .containsExactlyElementsIn(ImmutableList.of("table1", "table2"));
+    assertThat(config.dbAuth().getUserName().get()).isEqualTo(testuser);
+    assertThat(config.dbAuth().getPassword().get()).isEqualTo(testpassword);
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java
deleted file mode 100644
index 10bbad040c..0000000000
--- a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2024 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.teleport.v2.source;
-
-import com.google.cloud.teleport.v2.options.SourceDbToSpannerOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.mockito.junit.MockitoJUnitRunner;
-
-/** Test class for {@link DataSourceProvider}. */
-@RunWith(MockitoJUnitRunner.class)
-public class DataSourceProviderTest {
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    // by default, derby uses a lock timeout of 60 seconds. In order to speed up the test
-    // and detect the lock faster, we decrease this timeout
-    System.setProperty("derby.locks.waitTimeout", "2");
-    System.setProperty("derby.stream.error.file", "build/derby.log");
-  }
-
-  @Before
-  @Test
-  public void testDataSourceProvider() {
-    SourceDbToSpannerOptions sourceDbToSpannerOptions =
-        PipelineOptionsFactory.as(SourceDbToSpannerOptions.class);
-    sourceDbToSpannerOptions.setJdbcDriverClassName("org.apache.derby.jdbc.EmbeddedDriver");
-    sourceDbToSpannerOptions.setSourceConnectionURL("jdbc:derby:memory:testDB;create=true");
-    sourceDbToSpannerOptions.setSourceConnectionProperties("");
-    var firstSource = new DataSourceProvider(sourceDbToSpannerOptions).apply(null);
-    var secondSource = new DataSourceProvider(sourceDbToSpannerOptions).apply(null);
-    // To verify singleton behavior, check that the references are equal.
-    assert (firstSource == secondSource);
-  }
-}
diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java
new file mode 100644
index 0000000000..314a6d828b
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.auth.dbauth;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.junit.MockitoJUnitRunner;
+
+/** Test class for {@link GuardedStringValueProvider}. */
+@RunWith(MockitoJUnitRunner.class)
+public class GuardedStringValueProviderTest {
+  @Test
+  public void testGuardedStringValueProvider() {
+    final String testPassword = "testPassword";
+    GuardedStringValueProvider provider = GuardedStringValueProvider.create(testPassword);
+    assertThat(provider.isAccessible()).isTrue();
+    assertThat(provider.get()).isEqualTo(testPassword);
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java
new file mode 100644
index 0000000000..5f43addb94
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.auth.dbauth;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.junit.MockitoJUnitRunner;
+
+/** Test class for {@link GuardedStringValueProvider}. */
+@RunWith(MockitoJUnitRunner.class)
+public class LocalCredentialsProviderTest {
+  @Test
+  public void testLocalCredentialsProvider() {
+    final String testUserName = "testUserName";
+    final String testPassword = "testPassword";
+    LocalCredentialsProvider provider =
+        LocalCredentialsProvider.builder()
+            .setUserName(testUserName)
+            .setPassword(testPassword)
+            .build();
+    assertThat(provider.getUserName().get()).isEqualTo(testUserName);
+    assertThat(provider.getPassword().get()).isEqualTo(testPassword);
+  }
+}
diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java
index 8fcf0057b7..4d79dbb2e3 100644
--- a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java
+++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java
@@ -180,7 +180,7 @@ public void getSchemaDiscoveryQuery() {
             MysqlDialectAdapter.getSchemaDiscoveryQuery(
                 SourceSchemaReference.builder().setDbName("testDB").build()))
         .isEqualTo(
-            "SELECT COLUMN_NAME,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = testDB AND TABLE_NAME = ?");
+            "SELECT COLUMN_NAME,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = 'testDB' AND TABLE_NAME = ?");
   }
 
   private static ResultSet getMockInfoSchemaRs() throws SQLException {
diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java
new file mode 100644
index 0000000000..d50e9fd314
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.junit.MockitoJUnitRunner;
+
+/** Test class for {@link TableConfig}. */
+@RunWith(MockitoJUnitRunner.class)
+public class TableConfigTest {
+  @Test
+  public void testTableConfigBuildsWithDefaults() {
+    final String testTable = "testTable";
+    final String partitionColumn = "col_1";
+
+    TableConfig tableConfig =
+        TableConfig.builder(testTable).withPartitionColum(partitionColumn).build();
+    assertThat(tableConfig.tableName()).isEqualTo(testTable);
+    assertThat(tableConfig.maxFetchSize()).isNull();
+    assertThat(tableConfig.maxPartitions()).isNull();
+    assertThat(tableConfig.partitionColumns()).isEqualTo(ImmutableList.of(partitionColumn));
+  }
+
+  @Test
+  public void testTableConfigBuilds() {
+    final String testTable = "testTable";
+    final String partitionColumn = "col_1";
+    final int maxFetchSize = 100;
+    final int maxPartitions = 100;
+
+    TableConfig tableConfig =
+        TableConfig.builder(testTable)
+            .withPartitionColum(partitionColumn)
+            .setMaxFetchSize(maxFetchSize)
+            .setMaxPartitions(maxPartitions)
+            .build();
+    assertThat(tableConfig.tableName()).isEqualTo(testTable);
+    assertThat(tableConfig.maxFetchSize()).isEqualTo(maxFetchSize);
+    assertThat(tableConfig.maxPartitions()).isEqualTo(maxPartitions);
+    assertThat(tableConfig.partitionColumns()).isEqualTo(ImmutableList.of(partitionColumn));
+  }
+
+  @Test
+  public void testTableConfigPreconditions() {
+    final String testTable = "testTable";
+
+    assertThrows(IllegalStateException.class, () -> TableConfig.builder(testTable).build());
+    assertThrows(
+        IllegalStateException.class,
+        () ->
+            TableConfig.builder(testTable)
+                .withPartitionColum("col_1")
+                .withPartitionColum("col_2")
+                .build());
+  }
+}
diff --git a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java
index 3ec2b54e00..9822f23276 100644
--- a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java
+++ b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java
@@ -59,7 +59,7 @@ public GenericRecordTypeConvertor(ISchemaMapper schemaMapper, String namespace)
   /**
    * This method takes in a generic record and returns a map between the Spanner column name and the
    * corresponding Spanner column value. This handles the data conversion logic from a GenericRecord
-   * field to a spanner Value.
+   * field to a Map of Spanner column name to spanner Value.
    */
   public Map<String, Value> transformChangeEvent(GenericRecord record, String srcTableName) {
     Map<String, Value> result = new HashMap<>();
@@ -70,31 +70,80 @@ public Map<String, Value> transformChangeEvent(GenericRecord record, String srcT
        * TODO: Handle columns that will not exist at source - synth id - shard id - multi-column
        * transformations - auto-gen keys - Default columns - generated columns
        */
-      String srcColName =
-          schemaMapper.getSourceColumnName(namespace, spannerTableName, spannerColName);
-      Type spannerColumnType =
-          schemaMapper.getSpannerColumnType(namespace, spannerTableName, spannerColName);
-      Value value =
-          getSpannerValue(
-              record.get(srcColName),
-              record.getSchema().getField(srcColName).schema(),
-              srcColName,
-              spannerColumnType);
-      result.put(spannerColName, value);
+      try {
+        String srcColName =
+            schemaMapper.getSourceColumnName(namespace, spannerTableName, spannerColName);
+        Type spannerColumnType =
+            schemaMapper.getSpannerColumnType(namespace, spannerTableName, spannerColName);
+        LOG.debug(
+            "Transformer processing srcCol: {} spannerColumnType:{}",
+            srcColName,
+            spannerColumnType);
+
+        Value value =
+            getSpannerValue(
+                record.get(srcColName),
+                record.getSchema().getField(srcColName).schema(),
+                srcColName,
+                spannerColumnType);
+        result.put(spannerColName, value);
+      } catch (NullPointerException e) {
+        LOG.info("Unable to transform change event: {}", e.getMessage());
+        throw e;
+      } catch (IllegalArgumentException e) {
+        LOG.info("Unable to transform change event: {}", e.getMessage());
+        throw e;
+      } catch (Exception e) {
+        LOG.info(
+            "Unable to convert spanner value for spanner col: {}. {}",
+            spannerColName,
+            e.getMessage());
+        throw new RuntimeException(
+            String.format(
+                "Unable to convert spanner value for spanner col: {}. {}",
+                spannerColName,
+                e.getMessage()));
+      }
     }
     return result;
   }
 
   /** Extract the field value from Generic Record and try to convert it to @spannerType. */
-  Value getSpannerValue(
+  public Value getSpannerValue(
       Object recordValue, Schema fieldSchema, String recordColName, Type spannerType) {
     // Logical and record types should be converted to string.
+    LOG.debug(
+        "gettingSpannerValue for recordValue: {}, fieldSchema: {}, recordColName: {}, spannerType: {}",
+        recordColName,
+        recordValue,
+        fieldSchema,
+        spannerType);
+    if (fieldSchema.getType().equals(Schema.Type.UNION)) {
+      List<Schema> types = fieldSchema.getTypes();
+      LOG.debug("found union type: {}", types);
+      // Schema types can only union with Type NULL. Any other UNION is unsupported.
+      if (types.size() == 2 && types.stream().anyMatch(s -> s.getType().equals(Schema.Type.NULL))) {
+        if (recordValue == null) {
+          return null;
+        }
+        fieldSchema =
+            types.stream().filter(s -> !s.getType().equals(Schema.Type.NULL)).findFirst().get();
+      } else {
+        throw new IllegalArgumentException(
+            String.format(
+                "Unknown schema field type {} for field {} with value {}.",
+                fieldSchema,
+                recordColName,
+                recordValue));
+      }
+    }
     if (fieldSchema.getLogicalType() != null) {
       recordValue = handleLogicalFieldType(recordColName, recordValue, fieldSchema);
     } else if (fieldSchema.getType().equals(Schema.Type.RECORD)) {
       // Get the avro field of type record from the whole record.
       recordValue = handleRecordFieldType(recordColName, (GenericRecord) recordValue, fieldSchema);
     }
+    LOG.debug("Updated record value is {} for recordColName {}", recordValue, recordColName);
     Dialect dialect = schemaMapper.getDialect();
     if (dialect == null) {
       throw new NullPointerException("schemaMapper returned null spanner dialect.");
@@ -121,6 +170,7 @@ static class CustomAvroTypes {
 
   /** Avro logical types are converted to an equivalent string type. */
   static String handleLogicalFieldType(String fieldName, Object recordValue, Schema fieldSchema) {
+    LOG.debug("found logical type for col {} with schema {}", fieldName, fieldSchema);
     if (recordValue == null) {
       return null;
     }
@@ -152,14 +202,17 @@ static String handleLogicalFieldType(String fieldName, Object recordValue, Schem
     } else if (fieldSchema.getLogicalType() instanceof LogicalTypes.TimestampMillis) {
       Instant timestamp = Instant.ofEpochMilli(Long.valueOf(recordValue.toString()));
       return timestamp.atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
-    } // TODO: add support for custom logical types VARCHAR, JSON and NUMBER once format is
-    // finalised.
-    else {
-      LOG.error(
-          "Unknown field type {} for field {} in {}. Ignoring it.",
-          fieldSchema,
-          fieldName,
-          recordValue);
+    } else if (fieldSchema.getLogicalType() != null
+        && fieldSchema.getLogicalType().getName().equals(CustomAvroTypes.JSON)) {
+      return recordValue.toString();
+    } else if (fieldSchema.getLogicalType() != null
+        && fieldSchema.getLogicalType().getName().equals(CustomAvroTypes.NUMBER)) {
+      return recordValue.toString();
+    } else if (fieldSchema.getLogicalType() != null
+        && fieldSchema.getLogicalType().getName().equals(CustomAvroTypes.VARCHAR)) {
+      return recordValue.toString();
+    } else {
+      LOG.error("Unknown field type {} for field {} in {}.", fieldSchema, fieldName, recordValue);
       throw new UnsupportedOperationException(
           String.format(
               "Unknown field type %s for field %s in %s.", fieldSchema, fieldName, recordValue));
@@ -168,6 +221,7 @@ static String handleLogicalFieldType(String fieldName, Object recordValue, Schem
 
   /** Record field types are converted to an equivalent string type. */
   static String handleRecordFieldType(String fieldName, GenericRecord element, Schema fieldSchema) {
+    LOG.debug("found record type for col {} with schema: {}", fieldName, fieldSchema);
     if (element == null) {
       return null;
     }
diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java
index 8941c77598..2b713445dd 100644
--- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java
+++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java
@@ -31,13 +31,12 @@
 import com.google.cloud.teleport.v2.spanner.migrations.schema.IdentityMapper;
 import com.google.cloud.teleport.v2.spanner.type.Type;
 import com.google.cloud.teleport.v2.utils.SchemaUtils;
-import java.io.IOException;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
-import java.nio.file.Files;
-import java.nio.file.Paths;
 import java.util.List;
 import java.util.Map;
+import org.apache.avro.LogicalType;
+import org.apache.avro.LogicalTypes;
 import org.apache.avro.Schema;
 import org.apache.avro.SchemaBuilder;
 import org.apache.avro.generic.GenericData;
@@ -47,11 +46,105 @@
 
 public class GenericRecordTypeConvertorTest {
 
+  public Schema getLogicalTypesSchema() {
+    // Create schema types with LogicalTypes
+    Schema dateType = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT));
+    Schema decimalType = LogicalTypes.decimal(4, 2).addToSchema(Schema.create(Schema.Type.BYTES));
+    Schema timeMicrosType = LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG));
+    Schema timeMillisType = LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT));
+    Schema timestampMicrosType =
+        LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG));
+    Schema timestampMillisType =
+        LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG));
+    Schema jsonType =
+        new LogicalType(GenericRecordTypeConvertor.CustomAvroTypes.JSON)
+            .addToSchema(SchemaBuilder.builder().stringType());
+    Schema numberType =
+        new LogicalType(GenericRecordTypeConvertor.CustomAvroTypes.NUMBER)
+            .addToSchema(SchemaBuilder.builder().stringType());
+    Schema varcharType =
+        new LogicalType(GenericRecordTypeConvertor.CustomAvroTypes.VARCHAR)
+            .addToSchema(SchemaBuilder.builder().stringType());
+
+    // Build the schema using the created types
+    return SchemaBuilder.record("logicalTypes")
+        .namespace("com.test.schema")
+        .fields()
+        .name("date_col")
+        .type(dateType)
+        .noDefault()
+        .name("decimal_col")
+        .type(decimalType)
+        .noDefault()
+        .name("time_micros_col")
+        .type(timeMicrosType)
+        .noDefault()
+        .name("time_millis_col")
+        .type(timeMillisType)
+        .noDefault()
+        .name("timestamp_micros_col")
+        .type(timestampMicrosType)
+        .noDefault()
+        .name("timestamp_millis_col")
+        .type(timestampMillisType)
+        .noDefault()
+        .name("json_col")
+        .type(jsonType)
+        .noDefault()
+        .name("number_col")
+        .type(numberType)
+        .noDefault()
+        .name("varchar_col")
+        .type(varcharType)
+        .noDefault()
+        .endRecord();
+  }
+
+  public Schema unionNullType(Schema schema) {
+    return SchemaBuilder.builder().unionOf().nullType().and().type(schema).endUnion();
+  }
+
+  public Schema getAllSpannerTypesSchema() {
+    Schema decimalType =
+        unionNullType(LogicalTypes.decimal(5, 2).addToSchema(Schema.create(Schema.Type.BYTES)));
+    Schema dateType =
+        unionNullType(LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)));
+    Schema timestampType =
+        unionNullType(LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)));
+    return SchemaBuilder.record("all_types")
+        .namespace("com.test.schema")
+        .fields()
+        .name("bool_col")
+        .type(unionNullType(Schema.create(Schema.Type.BOOLEAN)))
+        .noDefault()
+        .name("int_col")
+        .type(unionNullType(Schema.create(Schema.Type.LONG)))
+        .noDefault()
+        .name("float_col")
+        .type(unionNullType(Schema.create(Schema.Type.DOUBLE)))
+        .noDefault()
+        .name("string_col")
+        .type(unionNullType(Schema.create(Schema.Type.STRING)))
+        .noDefault()
+        .name("numeric_col")
+        .type(decimalType)
+        .noDefault()
+        .name("bytes_col")
+        .type(unionNullType(Schema.create(Schema.Type.BYTES)))
+        .noDefault()
+        .name("timestamp_col")
+        .type(timestampType)
+        .noDefault()
+        .name("date_col")
+        .type(dateType)
+        .noDefault()
+        .endRecord();
+  }
+
   @Test
-  public void testHandleLogicalFieldType() throws IOException {
-    Schema avroSchema =
-        SchemaUtils.parseAvroSchema(
-            Files.readString(Paths.get("src/test/resources/avro/logical-types-schema.avsc")));
+  public void testHandleLogicalFieldType() {
+    Schema avroSchema = getLogicalTypesSchema();
+
     GenericRecord genericRecord = new GenericData.Record(avroSchema);
     genericRecord.put("date_col", 738991);
     genericRecord.put(
@@ -60,6 +153,9 @@ public void testHandleLogicalFieldType() throws IOException {
     genericRecord.put("time_millis_col", 48035000);
     genericRecord.put("timestamp_micros_col", 1602599400056483L);
     genericRecord.put("timestamp_millis_col", 1602599400056L);
+    genericRecord.put("json_col", "{\"k1\":\"v1\"}");
+    genericRecord.put("number_col", "289452");
+    genericRecord.put("varchar_col", "Hellogcds");
 
     String col = "date_col";
     String result =
@@ -96,6 +192,24 @@ public void testHandleLogicalFieldType() throws IOException {
         GenericRecordTypeConvertor.handleLogicalFieldType(
             col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema());
     assertEquals("Test timestamp_millis_col conversion: ", "2020-10-13T14:30:00.056Z", result);
+
+    col = "json_col";
+    result =
+        GenericRecordTypeConvertor.handleLogicalFieldType(
+            col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema());
+    assertEquals("Test json_col conversion: ", "{\"k1\":\"v1\"}", result);
+
+    col = "number_col";
+    result =
+        GenericRecordTypeConvertor.handleLogicalFieldType(
+            col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema());
+    assertEquals("Test number_col conversion: ", "289452", result);
+
+    col = "varchar_col";
+    result =
+        GenericRecordTypeConvertor.handleLogicalFieldType(
+            col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema());
+    assertEquals("Test varchar_col conversion: ", "Hellogcds", result);
   }
 
   @Test
@@ -235,11 +349,8 @@ static Ddl getIdentityDdl() {
   }
 
   @Test
-  public void transformChangeEventTest_identityMapper() throws IOException {
-    GenericRecord genericRecord =
-        new GenericData.Record(
-            SchemaUtils.parseAvroSchema(
-                Files.readString(Paths.get("src/test/resources/avro/all-spanner-types.avsc"))));
+  public void transformChangeEventTest_identityMapper() {
+    GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema());
     genericRecord.put("bool_col", true);
     genericRecord.put("int_col", 10);
     genericRecord.put("float_col", 10.34);
@@ -247,8 +358,7 @@ public void transformChangeEventTest_identityMapper() throws IOException {
     genericRecord.put(
         "numeric_col", ByteBuffer.wrap(new BigDecimal("12.34").unscaledValue().toByteArray()));
     genericRecord.put("bytes_col", new byte[] {10, 20, 30});
-    genericRecord.put(
-        "timestamp_col", AvroTestingHelper.createTimestampTzRecord(1602599400056483L, 3600000));
+    genericRecord.put("timestamp_col", 1602599400056483L);
     genericRecord.put("date_col", 738991);
     GenericRecordTypeConvertor genericRecordTypeConvertor =
         new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), "");
@@ -268,8 +378,41 @@ public void transformChangeEventTest_identityMapper() throws IOException {
     assertEquals(expected, actual);
   }
 
+  @Test
+  public void transformChangeEventTest_illegalUnionType() {
+    GenericRecordTypeConvertor genericRecordTypeConvertor =
+        new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), "");
+    Schema schema =
+        SchemaBuilder.builder()
+            .unionOf()
+            .nullType()
+            .and()
+            .type(Schema.create(Schema.Type.BOOLEAN))
+            .and()
+            .type(Schema.create(Schema.Type.STRING))
+            .endUnion();
+    assertThrows(
+        IllegalArgumentException.class,
+        () -> genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string()));
+  }
+
+  @Test
+  public void transformChangeEventTest_nullType() {
+    GenericRecordTypeConvertor genericRecordTypeConvertor =
+        new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), "");
+    Schema schema =
+        SchemaBuilder.builder()
+            .unionOf()
+            .nullType()
+            .and()
+            .type(Schema.create(Schema.Type.BOOLEAN))
+            .endUnion();
+    assertNull(
+        genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string()));
+  }
+
   @Test(expected = IllegalArgumentException.class)
-  public void transformChangeEventTest_incorrectSpannerType() throws IOException {
+  public void transformChangeEventTest_incorrectSpannerType() {
 
     ISchemaMapper mockSchemaMapper = mock(ISchemaMapper.class);
     when(mockSchemaMapper.getDialect()).thenReturn(Dialect.GOOGLE_STANDARD_SQL);
@@ -281,10 +424,7 @@ public void transformChangeEventTest_incorrectSpannerType() throws IOException {
     when(mockSchemaMapper.getSpannerColumnType(anyString(), anyString(), anyString()))
         .thenReturn(Type.array(Type.bool()));
 
-    GenericRecord genericRecord =
-        new GenericData.Record(
-            SchemaUtils.parseAvroSchema(
-                Files.readString(Paths.get("src/test/resources/avro/all-spanner-types.avsc"))));
+    GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema());
     genericRecord.put("bool_col", true);
     GenericRecordTypeConvertor genericRecordTypeConvertor =
         new GenericRecordTypeConvertor(mockSchemaMapper, "");
@@ -293,7 +433,7 @@ public void transformChangeEventTest_incorrectSpannerType() throws IOException {
   }
 
   @Test
-  public void transformChangeEventTest_nullDialect() throws IOException {
+  public void transformChangeEventTest_nullDialect() {
     ISchemaMapper mockSchemaMapper = mock(ISchemaMapper.class);
     when(mockSchemaMapper.getDialect()).thenReturn(null);
     when(mockSchemaMapper.getSpannerTableName(anyString(), anyString())).thenReturn("test");
@@ -304,10 +444,7 @@ public void transformChangeEventTest_nullDialect() throws IOException {
     when(mockSchemaMapper.getSpannerColumnType(anyString(), anyString(), anyString()))
         .thenReturn(Type.array(Type.bool()));
 
-    GenericRecord genericRecord =
-        new GenericData.Record(
-            SchemaUtils.parseAvroSchema(
-                Files.readString(Paths.get("src/test/resources/avro/all-spanner-types.avsc"))));
+    GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema());
     genericRecord.put("bool_col", true);
     GenericRecordTypeConvertor genericRecordTypeConvertor =
         new GenericRecordTypeConvertor(mockSchemaMapper, "");
@@ -318,4 +455,22 @@ public void transformChangeEventTest_nullDialect() throws IOException {
     // Verify that the mock method was called.
     Mockito.verify(mockSchemaMapper).getDialect();
   }
+
+  @Test
+  public void transformChangeEventTest_catchAllException() {
+    ISchemaMapper mockSchemaMapper = mock(ISchemaMapper.class);
+    when(mockSchemaMapper.getSpannerTableName(anyString(), anyString())).thenReturn("test");
+    when(mockSchemaMapper.getSpannerColumns(anyString(), anyString()))
+        .thenReturn(List.of("bool_col"));
+    when(mockSchemaMapper.getSourceColumnName(anyString(), anyString(), anyString()))
+        .thenThrow(new RuntimeException());
+
+    GenericRecordTypeConvertor genericRecordTypeConvertor =
+        new GenericRecordTypeConvertor(mockSchemaMapper, "");
+    assertThrows(
+        RuntimeException.class,
+        () -> genericRecordTypeConvertor.transformChangeEvent(null, "all_types"));
+    // Verify that the mock method was called.
+    Mockito.verify(mockSchemaMapper).getSourceColumnName(anyString(), anyString(), anyString());
+  }
 }
diff --git a/v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc b/v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc
deleted file mode 100644
index 707e2b20cf..0000000000
--- a/v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc
+++ /dev/null
@@ -1,62 +0,0 @@
-{
-  "type": "record",
-  "name": "all_types",
-  "namespace": "com.test.schema",
-  "fields": [
-    {
-      "name": "bool_col",
-      "type": "boolean"
-    },
-    {
-      "name": "int_col",
-      "type": "long"
-    },
-    {
-      "name": "float_col",
-      "type": "double"
-    },
-    {
-      "name": "string_col",
-      "type": "string"
-    },
-    {
-      "name": "numeric_col",
-      "type": {
-        "type": "bytes",
-        "logicalType": "decimal",
-        "precision": 5,
-        "scale": 2
-      }
-    },
-    {
-      "name": "bytes_col",
-      "type": "bytes"
-    },
-    {
-      "name": "timestamp_col",
-      "type": {
-        "type": "record",
-        "name": "timestampTz",
-        "fields": [
-          {
-            "name": "timestamp",
-            "type": "long",
-            "logicalType": "timestamp-micros"
-          },
-          {
-            "name": "offset",
-            "type": "int",
-            "logicalType": "time-millis"
-          }
-        ]
-      }
-    },
-    {
-      "name": "date_col",
-      "type": {
-        "type": "int",
-        "logicalType": "date"
-      }
-    }
-  ]
-}
diff --git a/v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc b/v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc
deleted file mode 100644
index b096de5063..0000000000
--- a/v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-  "type": "record",
-  "name": "logicalTypes",
-  "namespace": "com.test.schema",
-  "fields": [
-    {
-      "name": "date_col",
-      "type": {
-        "type": "int",
-        "logicalType": "date"
-      }
-    },
-    {
-      "name": "decimal_col",
-      "type": {
-        "type": "bytes",
-        "logicalType": "decimal",
-        "precision": 4,
-        "scale": 2
-      }
-    },
-    {
-      "name": "time_micros_col",
-      "type": {
-        "type": "long",
-        "logicalType": "time-micros"
-      }
-    },
-    {
-      "name": "time_millis_col",
-      "type": {
-        "type": "int",
-        "logicalType": "time-millis"
-      }
-    },
-    {
-      "name": "timestamp_micros_col",
-      "type": {
-        "type": "long",
-        "logicalType": "timestamp-micros"
-      }
-    },
-    {
-      "name": "timestamp_millis_col",
-      "type": {
-        "type": "long",
-        "logicalType": "timestamp-millis"
-      }
-    }
-  ]
-}

From 5c92cc829a8b91fb112e3edfb2aed896cffd8611 Mon Sep 17 00:00:00 2001
From: Nick Anikin <52892974+an2x@users.noreply.github.com>
Date: Fri, 10 May 2024 10:30:13 -0700
Subject: [PATCH 18/70] Fix typo.

Co-authored-by: Anand Inguva <34158215+AnandInguva@users.noreply.github.com>
---
 .../java/com/google/cloud/teleport/plugin/sample/AtoBOk.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java
index f0ea357288..48d3d56cae 100644
--- a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java
+++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java
@@ -110,7 +110,7 @@ public interface AtoBOptions {
     @TemplateParameter.KafkaTopic(
         order = 10,
         description = "Kafka input topic",
-        helpText = "Kafka topic to trad from",
+        helpText = "Kafka topic to read from",
         example =
             "projects/project-foo/locations/us-central1/clusters/cluster-bar/topics/topic-baz")
     String getInputKafkaTopic();

From 8b4eecb7f7fb33f2b1f0bfa6d3edb41b2e4b3314 Mon Sep 17 00:00:00 2001
From: Akshara Uke <aksharau@google.com>
Date: Mon, 13 May 2024 04:57:15 +0000
Subject: [PATCH 19/70] optimized the processing for lean Spanner write
 intervals

---
 .../handler/GCSToSourceStreamingHandler.java  |  38 ++++--
 .../transforms/GcsToSourceStreamer.java       |  18 +--
 .../v2/templates/utils/GCSReader.java         | 111 +++++++++---------
 .../v2/templates/utils/SpannerDao.java        |   4 +-
 4 files changed, 95 insertions(+), 76 deletions(-)

diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java
index c1e926489f..7a133de03f 100644
--- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java
+++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java
@@ -36,9 +36,10 @@ public class GCSToSourceStreamingHandler {
 
   private static final Logger LOG = LoggerFactory.getLogger(GCSToSourceStreamingHandler.class);
 
-  public static void process(ProcessingContext taskContext, SpannerDao spannerDao) {
+  public static String process(ProcessingContext taskContext, SpannerDao spannerDao) {
     String shardId = taskContext.getShard().getLogicalShardId();
     GCSReader inputFileReader = new GCSReader(taskContext, spannerDao);
+    String fileProcessedStartInterval = taskContext.getStartTimestamp();
 
     try {
       Instant readStartTime = Instant.now();
@@ -52,9 +53,11 @@ public static void process(ProcessingContext taskContext, SpannerDao spannerDao)
               + " records from the buffer in "
               + ChronoUnit.MILLIS.between(readStartTime, readEndTime)
               + " milliseconds");
+      // This may have changed in case the interval did not have data
+      fileProcessedStartInterval = inputFileReader.getCurrentIntervalStart();
       if (records.isEmpty()) {
-        markShardSuccess(taskContext, spannerDao);
-        return;
+        markShardSuccess(taskContext, spannerDao, fileProcessedStartInterval);
+        return fileProcessedStartInterval;
       }
 
       String connectString =
@@ -74,27 +77,36 @@ public static void process(ProcessingContext taskContext, SpannerDao spannerDao)
 
       InputRecordProcessor.processRecords(
           records, taskContext.getSchema(), dao, shardId, taskContext.getSourceDbTimezoneOffset());
-      markShardSuccess(taskContext, spannerDao);
+      markShardSuccess(taskContext, spannerDao, fileProcessedStartInterval);
       dao.cleanup();
       LOG.info(
           "Shard " + shardId + ": Successfully processed batch of " + records.size() + " records.");
     } catch (Exception e) {
       Metrics.counter(GCSToSourceStreamingHandler.class, "shard_failed_" + shardId).inc();
-      markShardFailure(taskContext, spannerDao);
+      markShardFailure(taskContext, spannerDao, fileProcessedStartInterval);
       throw new RuntimeException("Failure when processing records", e);
     }
+    return fileProcessedStartInterval;
   }
 
-  private static void markShardSuccess(ProcessingContext taskContext, SpannerDao spannerDao) {
-    markShardProgress(taskContext, Constants.SHARD_PROGRESS_STATUS_SUCCESS, spannerDao);
+  private static void markShardSuccess(
+      ProcessingContext taskContext, SpannerDao spannerDao, String fileProcessedStartInterval) {
+    markShardProgress(
+        taskContext,
+        Constants.SHARD_PROGRESS_STATUS_SUCCESS,
+        spannerDao,
+        fileProcessedStartInterval);
   }
 
   private static void markShardProgress(
-      ProcessingContext taskContext, String status, SpannerDao spannerDao) {
+      ProcessingContext taskContext,
+      String status,
+      SpannerDao spannerDao,
+      String fileProcessedStartInterval) {
     ShardProgressTracker shardProgressTracker =
         new ShardProgressTracker(spannerDao, taskContext.getRunId());
-    String fileStartTime = taskContext.getStartTimestamp();
-    com.google.cloud.Timestamp startTs = com.google.cloud.Timestamp.parseTimestamp(fileStartTime);
+    com.google.cloud.Timestamp startTs = null;
+    startTs = com.google.cloud.Timestamp.parseTimestamp(fileProcessedStartInterval);
 
     ShardProgress shardProgress =
         new ShardProgress(taskContext.getShard().getLogicalShardId(), startTs, status);
@@ -102,7 +114,9 @@ private static void markShardProgress(
     shardProgressTracker.writeShardProgress(shardProgress);
   }
 
-  private static void markShardFailure(ProcessingContext taskContext, SpannerDao spannerDao) {
-    markShardProgress(taskContext, Constants.SHARD_PROGRESS_STATUS_ERROR, spannerDao);
+  private static void markShardFailure(
+      ProcessingContext taskContext, SpannerDao spannerDao, String fileProcessedStartInterval) {
+    markShardProgress(
+        taskContext, Constants.SHARD_PROGRESS_STATUS_ERROR, spannerDao, fileProcessedStartInterval);
   }
 }
diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java
index 958ac743eb..4104712699 100644
--- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java
+++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java
@@ -132,18 +132,18 @@ public void processElement(
     }
 
     String shardId = keyString.read();
+
+    String storedStartTime = startString.read();
+    if (storedStartTime == null) {
+      startString.write(element.getValue().getStartTimestamp());
+    }
+
     // Set timer if not already running.
     if (shardId == null) {
-
+      keyString.write(element.getKey());
       Instant outputTimestamp =
           Instant.now().plus(Duration.millis(incrementIntervalInMilliSeconds));
       timer.set(outputTimestamp);
-      keyString.write(element.getKey());
-    }
-
-    String storedStartTime = startString.read();
-    if (storedStartTime == null) {
-      startString.write(element.getValue().getStartTimestamp());
     }
     num_shards.inc();
   }
@@ -169,10 +169,10 @@ public void onExpiry(
       try {
         taskContext.setStartTimestamp(startString.read());
 
-        GCSToSourceStreamingHandler.process(taskContext, spannerDao);
+        String processedStartTs = GCSToSourceStreamingHandler.process(taskContext, spannerDao);
         Instant nextTimer = Instant.now().plus(Duration.millis(incrementIntervalInMilliSeconds));
         com.google.cloud.Timestamp startTs =
-            com.google.cloud.Timestamp.parseTimestamp(startString.read());
+            com.google.cloud.Timestamp.parseTimestamp(processedStartTs);
         Instant startInst = new Instant(startTs.toSqlTimestamp());
         Instant endInst = startInst.plus(taskContext.getWindowDuration());
         startString.write(endInst.toString());
diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java
index 8af883fdbe..2f7cce1f25 100644
--- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java
+++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java
@@ -33,6 +33,7 @@
 import java.util.List;
 import org.apache.beam.sdk.io.FileSystems;
 import org.apache.beam.sdk.metrics.Metrics;
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -44,9 +45,9 @@ public class GCSReader {
   private ShardFileCreationTracker shardFileCreationTracker;
   private Instant currentIntervalEnd;
   private String shardId;
-  private boolean shouldRetryWhenFileNotFound;
-  private boolean shouldFailWhenFileNotFound;
-  private boolean queriedDataSeenTable;
+  private Duration windowDuration;
+  private String gcsPath;
+  private Instant currentIntervalStart;
 
   private static final Logger LOG = LoggerFactory.getLogger(GCSReader.class);
 
@@ -54,14 +55,14 @@ public GCSReader(ProcessingContext taskContext, SpannerDao spannerDao) {
 
     String fileStartTime = taskContext.getStartTimestamp();
     com.google.cloud.Timestamp startTs = com.google.cloud.Timestamp.parseTimestamp(fileStartTime);
-    Instant startInst = new Instant(startTs.toSqlTimestamp());
-    currentIntervalEnd = startInst.plus(taskContext.getWindowDuration());
+    currentIntervalStart = new Instant(startTs.toSqlTimestamp());
+    currentIntervalEnd = currentIntervalStart.plus(taskContext.getWindowDuration());
     String gcsFileName =
         taskContext.getGCSPath()
             + "/"
             + taskContext.getShard().getLogicalShardId()
             + "/"
-            + startInst
+            + currentIntervalStart
             + "-"
             + currentIntervalEnd
             + "-pane-0-last-0-of-1.txt";
@@ -71,9 +72,8 @@ public GCSReader(ProcessingContext taskContext, SpannerDao spannerDao) {
         new ShardFileCreationTracker(
             spannerDao, taskContext.getShard().getLogicalShardId(), taskContext.getRunId());
     this.shardId = taskContext.getShard().getLogicalShardId();
-    shouldRetryWhenFileNotFound = true;
-    shouldFailWhenFileNotFound = false;
-    queriedDataSeenTable = false;
+    this.windowDuration = taskContext.getWindowDuration();
+    this.gcsPath = taskContext.getGCSPath();
   }
 
   public List<TrimmedShardedDataChangeRecord> getRecords() {
@@ -111,33 +111,8 @@ public List<TrimmedShardedDataChangeRecord> getRecords() {
     } catch (com.fasterxml.jackson.core.JsonProcessingException ex) {
       throw new RuntimeException("Failed in processing the record ", ex);
     } catch (IOException e) {
-
       LOG.warn("File not found : " + fileName);
-      if (shouldRetryWhenFileNotFound) {
-        if (!queriedDataSeenTable) {
-          return checkAndReturnIfFileExists();
-        } else {
-          /* We do not need to call checkAndReturnIfFileExists again as it was called already
-          as this will lead to stack overflow when the time taken to write file to GCS is large.
-          GCS writing can take arbitrarty time in unforeseen scenario like Dataflow worker restart.
-          So we just try to read the file in the same function call until found.*/
-          return waitTillFileCreatedAndReturn();
-        }
-      } else {
-        if (shouldFailWhenFileNotFound) {
-          Metrics.counter(GCSReader.class, "file_not_found_errors_" + shardId).inc();
-          throw new RuntimeException("File  " + fileName + " expected but not found  : " + e);
-        }
-        /* The logic for writing to skipped file table can generate load on the metadata database
-        when the first file from the reader template comes very late.
-        In this case, a lot of file intervals will be skipped since no file exists.
-        This causes DEADLINE_EXCEEDED and hence can negatively harm the progress of
-        both the pipelines as the metadata database is shared.
-        Hence the code to store the file intervals skipped is removed
-        and only warnings are logged. Since it was only for audit purpose anyway.*/
-        LOG.warn("File not found : " + fileName + " skipping the file");
-      }
-
+      return checkAndReturnIfFileExists();
     } catch (Exception e) {
       throw new RuntimeException("Failed in GcsReader ", e);
     }
@@ -154,15 +129,13 @@ public List<TrimmedShardedDataChangeRecord> getRecords() {
    * we check the shard_file_create_progress table until the created_upto value is greater than or
    * equal to the current window.
    *
-   * <p>If the created_upto is equal to current window - then it's indication that file for current
-   * window is written and should exist in GCS. So we lookup the file again and fail if the file is
-   * not found.
-   *
-   * <p>If the created_upto is greater than current window, we need to know if there was any data in
-   * Spanner for the window we are checking. For this we query the date_seen table. If data was seen
-   * for the current window, then file should exist in GCS and we lookup the file indefinitely until
-   * is it found. If, however, there was no data for the current window in data_seen, then it means
-   * file for the current interval is not there in GCS. We just simply skip the file.
+   * <p>If the created_upto is greater than or equal to thecurrent window, we need to know if there
+   * was any data in Spanner for the window we are checking. For this we query the date_seen table.
+   * If data was seen for the current window, then file should exist in GCS and we lookup the file
+   * indefinitely until is it found. If, however, there was no data for the current window in
+   * data_seen, then it means file for the current interval is not there in GCS. We then keep
+   * incrementally looking in data_seen for the next window unitl we find data and then return the
+   * file contents
    */
   private List<TrimmedShardedDataChangeRecord> checkAndReturnIfFileExists() {
     try {
@@ -198,17 +171,45 @@ private List<TrimmedShardedDataChangeRecord> checkAndReturnIfFileExists() {
       // if the file is expected to be present - retry until found
       if (shardFileCreationTracker.doesDataExistForTimestamp(currentEndTimestamp)) {
         LOG.info("Data exists for shard {} and time end {} ", shardId, currentEndTimestamp);
-        shouldRetryWhenFileNotFound =
-            true; // can happen due to out of order writes or the write to GCS was very slow
-        shouldFailWhenFileNotFound = true;
-
       } else {
-        shouldRetryWhenFileNotFound = false;
-        shouldFailWhenFileNotFound = false;
-      }
-      queriedDataSeenTable = true;
-      return getRecords();
+        // Data does not exist for the current window. So we scan the data_seen table to see which
+        // is the next window for which data exists.
+        LOG.info("Data does not exist for shard {} and time end {} ", shardId, currentEndTimestamp);
+        Instant previousWindowEnd = currentIntervalEnd;
+        Instant nextWindowEnd = previousWindowEnd.plus(windowDuration);
+        Timestamp nextEndTimestamp = Timestamp.parseTimestamp(nextWindowEnd.toString());
 
+        // Note that since the firstPipelineProgress has a time, eventually we will find the
+        // data_seen entry
+        while (firstPipelineProgress.compareTo(nextEndTimestamp) >= 0) {
+          if (!shardFileCreationTracker.doesDataExistForTimestamp(nextEndTimestamp)) {
+            LOG.info(
+                "Data does not exist for shard {} and time end {} ", shardId, nextEndTimestamp);
+            previousWindowEnd = nextWindowEnd;
+            nextWindowEnd = previousWindowEnd.plus(windowDuration);
+            nextEndTimestamp = Timestamp.parseTimestamp(nextWindowEnd.toString());
+          } else {
+            // Now we have found the next interval which will have the file expected
+            // Construct the file name and return contents
+            LOG.info("Data exists for shard {} and time end {} ", shardId, nextEndTimestamp);
+            this.fileName =
+                this.gcsPath
+                    + "/"
+                    + this.shardId
+                    + "/"
+                    + previousWindowEnd
+                    + "-"
+                    + nextWindowEnd
+                    + "-pane-0-last-0-of-1.txt";
+            currentIntervalStart =
+                nextWindowEnd.minus(
+                    windowDuration); // for the caller to know the current interval start
+            break;
+          }
+        }
+      }
+      // File should exist now, so wait until found the file and return records
+      return waitTillFileCreatedAndReturn();
     } catch (Exception e) {
       throw new RuntimeException(
           " Cannot determine file creation progress for shard : " + shardId, e);
@@ -259,4 +260,8 @@ private List<TrimmedShardedDataChangeRecord> waitTillFileCreatedAndReturn() {
     }
     return changeStreamList;
   }
+
+  public String getCurrentIntervalStart() {
+    return currentIntervalStart.toString();
+  }
 }
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java
index 59c1273486..990003c6bd 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java
@@ -219,7 +219,7 @@ private void checkAndCreateDataSeenTable() {
                   + " run_id character varying NOT NULL,shard character"
                   + " varying NOT NULL,window_seen timestamp with time zone NOT NULL,update_ts"
                   + " timestamp with time zone DEFAULT CURRENT_TIMESTAMP,PRIMARY KEY(id))"
-                  + " TTL INTERVAL '2 days' ON update_ts";
+                  + " TTL INTERVAL '30 days' ON update_ts";
 
         } else {
           createTable =
@@ -228,7 +228,7 @@ private void checkAndCreateDataSeenTable() {
                   + " (id STRING(MAX) NOT NULL, run_id"
                   + " STRING(MAX) NOT NULL,shard STRING(MAX) NOT NULL, window_seen TIMESTAMP NOT"
                   + " NULL , update_ts TIMESTAMP DEFAULT (CURRENT_TIMESTAMP)) PRIMARY"
-                  + " KEY(id) , ROW DELETION POLICY (OLDER_THAN(update_ts, INTERVAL 2 DAY))";
+                  + " KEY(id) , ROW DELETION POLICY (OLDER_THAN(update_ts, INTERVAL 30 DAY))";
         }
         OperationFuture<Void, UpdateDatabaseDdlMetadata> op =
             databaseAdminClient.updateDatabaseDdl(

From 178294bb983c772b92dbba77e872d194241fd7cd Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Mon, 13 May 2024 12:34:04 +0530
Subject: [PATCH 20/70] added unit test to spanner-common

---
 .../ddl/InformationSchemaScannerTest.java     | 215 ++++++++++++++++++
 1 file changed, 215 insertions(+)
 create mode 100644 v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java

diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java
new file mode 100644
index 0000000000..7ff4e3171e
--- /dev/null
+++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.spanner.ddl;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.spanner.Dialect;
+import com.google.cloud.spanner.ReadContext;
+import com.google.cloud.spanner.ResultSet;
+import com.google.cloud.spanner.Statement;
+import org.junit.Test;
+
+public class InformationSchemaScannerTest {
+
+  void mockGSQLColumnOptions(ReadContext context) {
+    Statement listColumnOptions =
+        Statement.of(
+            "SELECT t.table_name, t.column_name, t.option_name, t.option_type,"
+                + " t.option_value"
+                + " FROM information_schema.column_options AS t"
+                + " WHERE t.table_catalog = '' AND t.table_schema = ''"
+                + " ORDER BY t.table_name, t.column_name");
+    ResultSet listColumnOptionsResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listColumnOptions)).thenReturn(listColumnOptionsResultSet);
+    when(listColumnOptionsResultSet.next()).thenReturn(true, false);
+    when(listColumnOptionsResultSet.getString(0)).thenReturn("singer");
+    when(listColumnOptionsResultSet.getString(1)).thenReturn("singerName");
+    when(listColumnOptionsResultSet.getString(2)).thenReturn("option1");
+    when(listColumnOptionsResultSet.getString(3)).thenReturn("STRING");
+    when(listColumnOptionsResultSet.getString(4)).thenReturn("SomeName");
+  }
+
+  void mockGSQLIndex(ReadContext context) {
+    Statement listIndexes =
+        Statement.of(
+            "SELECT t.table_name, t.index_name, t.parent_table_name, t.is_unique,"
+                + " t.is_null_filtered"
+                + " FROM information_schema.indexes AS t"
+                + " WHERE t.table_catalog = '' AND t.table_schema = '' AND"
+                + " t.index_type='INDEX' AND t.spanner_is_managed = FALSE"
+                + " ORDER BY t.table_name, t.index_name");
+    ResultSet listIndexessResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listIndexes)).thenReturn(listIndexessResultSet);
+    when(listIndexessResultSet.next()).thenReturn(true, false);
+    when(listIndexessResultSet.getString(0)).thenReturn("singer");
+    when(listIndexessResultSet.getString(1)).thenReturn("index1");
+    when(listIndexessResultSet.isNull(2)).thenReturn(true);
+    when(listIndexessResultSet.getBoolean(3)).thenReturn(false);
+    when(listIndexessResultSet.getBoolean(4)).thenReturn(false);
+    when(listIndexessResultSet.isNull(5)).thenReturn(true);
+  }
+
+  void mockGSQLIndexColumns(ReadContext context) {
+    Statement listIndexColumns =
+        Statement.of(
+            "SELECT t.table_name, t.column_name, t.column_ordering, t.index_name "
+                + "FROM information_schema.index_columns AS t "
+                + "WHERE t.table_catalog = '' AND t.table_schema = '' "
+                + "ORDER BY t.table_name, t.index_name, t.ordinal_position");
+    ResultSet listIndexColumnsResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listIndexColumns)).thenReturn(listIndexColumnsResultSet);
+    when(listIndexColumnsResultSet.next()).thenReturn(true, false);
+    when(listIndexColumnsResultSet.getString(0)).thenReturn("singer");
+    when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName");
+    when(listIndexColumnsResultSet.isNull(2)).thenReturn(true);
+    when(listIndexColumnsResultSet.getString(3)).thenReturn("index1");
+  }
+
+  void mockGSQLForeignKey(ReadContext context) {
+    Statement listForeignKeys =
+        Statement.of(
+            "SELECT rc.constraint_name,"
+                + " kcu1.table_name,"
+                + " kcu1.column_name,"
+                + " kcu2.table_name,"
+                + " kcu2.column_name"
+                + " FROM information_schema.referential_constraints as rc"
+                + " INNER JOIN information_schema.key_column_usage as kcu1"
+                + " ON kcu1.constraint_catalog = rc.constraint_catalog"
+                + " AND kcu1.constraint_schema = rc.constraint_schema"
+                + " AND kcu1.constraint_name = rc.constraint_name"
+                + " INNER JOIN information_schema.key_column_usage as kcu2"
+                + " ON kcu2.constraint_catalog = rc.unique_constraint_catalog"
+                + " AND kcu2.constraint_schema = rc.unique_constraint_schema"
+                + " AND kcu2.constraint_name = rc.unique_constraint_name"
+                + " AND kcu2.ordinal_position = kcu1.position_in_unique_constraint"
+                + " WHERE rc.constraint_catalog = ''"
+                + " AND rc.constraint_schema = ''"
+                + " AND kcu1.constraint_catalog = ''"
+                + " AND kcu1.constraint_schema = ''"
+                + " AND kcu2.constraint_catalog = ''"
+                + " AND kcu2.constraint_schema = ''"
+                + " ORDER BY rc.constraint_name, kcu1.ordinal_position;");
+    ResultSet listForeignKeysResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listForeignKeys)).thenReturn(listForeignKeysResultSet);
+    when(listForeignKeysResultSet.next()).thenReturn(true, false);
+    when(listForeignKeysResultSet.getString(0)).thenReturn("fk1");
+    when(listForeignKeysResultSet.getString(1)).thenReturn("album");
+    when(listForeignKeysResultSet.getString(2)).thenReturn("singerId");
+    when(listForeignKeysResultSet.getString(3)).thenReturn("singer");
+    when(listForeignKeysResultSet.getString(4)).thenReturn("singerId");
+  }
+
+  void mockGSQLCheckConstraint(ReadContext context) {
+    Statement listCheckConstraints =
+        Statement.of(
+            "SELECT ctu.TABLE_NAME,"
+                + " cc.CONSTRAINT_NAME,"
+                + " cc.CHECK_CLAUSE"
+                + " FROM INFORMATION_SCHEMA.CONSTRAINT_TABLE_USAGE as ctu"
+                + " INNER JOIN INFORMATION_SCHEMA.CHECK_CONSTRAINTS as cc"
+                + " ON ctu.constraint_catalog = cc.constraint_catalog"
+                + " AND ctu.constraint_schema = cc.constraint_schema"
+                + " AND ctu.CONSTRAINT_NAME = cc.CONSTRAINT_NAME"
+                + " WHERE NOT STARTS_WITH(cc.CONSTRAINT_NAME, 'CK_IS_NOT_NULL_')"
+                + " AND ctu.table_catalog = ''"
+                + " AND ctu.table_schema = ''"
+                + " AND ctu.constraint_catalog = ''"
+                + " AND ctu.constraint_schema = ''"
+                + " AND cc.SPANNER_STATE = 'COMMITTED';");
+    ResultSet listCheckConstraintsResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listCheckConstraints)).thenReturn(listCheckConstraintsResultSet);
+    when(listCheckConstraintsResultSet.next()).thenReturn(true, false);
+    when(listCheckConstraintsResultSet.getString(0)).thenReturn("album");
+    when(listCheckConstraintsResultSet.getString(1)).thenReturn("check1");
+    when(listCheckConstraintsResultSet.getString(2)).thenReturn("albumName!=NULL");
+  }
+
+  void mockGSQLListTables(ReadContext context) {
+    Statement listTables =
+        Statement.of(
+            "SELECT t.table_name, t.parent_table_name, t.on_delete_action"
+                + " FROM information_schema.tables AS t"
+                + " WHERE t.table_catalog = '' AND t.table_schema = ''");
+    ResultSet listTablesResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listTables)).thenReturn(listTablesResultSet);
+    when(listTablesResultSet.next()).thenReturn(true, true, false);
+    when(listTablesResultSet.getString(0)).thenReturn("singer", "album");
+    when(listTablesResultSet.getString(1)).thenReturn(null, "singer");
+    when(listTablesResultSet.getString(2)).thenReturn(null, "CASCADE");
+  }
+
+  void mockGSQLListColumns(ReadContext context) {
+    Statement listColumns =
+        Statement.of(
+            "SELECT c.table_name, c.column_name,"
+                + " c.ordinal_position, c.spanner_type, c.is_nullable,"
+                + " c.is_generated, c.generation_expression, c.is_stored"
+                + " FROM information_schema.columns as c"
+                + " WHERE c.table_catalog = '' AND c.table_schema = '' "
+                + " AND c.spanner_state = 'COMMITTED' "
+                + " ORDER BY c.table_name, c.ordinal_position");
+    ResultSet listColumnsResultSet = mock(ResultSet.class);
+
+    when(context.executeQuery(listColumns)).thenReturn(listColumnsResultSet);
+    when(listColumnsResultSet.next()).thenReturn(true, true, true, true, true, false);
+    when(listColumnsResultSet.getString(0))
+        .thenReturn("singer", "singer", "album", "album", "album");
+    when(listColumnsResultSet.getString(1))
+        .thenReturn("singerId", "singerName", "singerId", "albumId", "albumName");
+    when(listColumnsResultSet.getString(3)).thenReturn("STRING(50)");
+    when(listColumnsResultSet.getString(4)).thenReturn("NO");
+    when(listColumnsResultSet.getString(5)).thenReturn("NO");
+    when(listColumnsResultSet.isNull(6)).thenReturn(true);
+    when(listColumnsResultSet.isNull(7)).thenReturn(true);
+  }
+
+  @Test
+  public void testScan() {
+    ReadContext context = mock(ReadContext.class);
+
+    mockGSQLListTables(context);
+    mockGSQLListColumns(context);
+    mockGSQLColumnOptions(context);
+    mockGSQLIndex(context);
+    mockGSQLIndexColumns(context);
+    mockGSQLForeignKey(context);
+    mockGSQLCheckConstraint(context);
+    InformationSchemaScanner informationSchemaScanner =
+        new InformationSchemaScanner(context, Dialect.GOOGLE_STANDARD_SQL);
+    Ddl ddl = informationSchemaScanner.scan();
+    String expectedDdl =
+        "CREATE TABLE `singer` (\n"
+            + "\t`singerId`                              STRING(50) NOT NULL,\n"
+            + "\t`singerName`                            STRING(50) NOT NULL OPTIONS (option1=\"SomeName\"),\n"
+            + ") PRIMARY KEY ()\n"
+            + "CREATE INDEX `index1` ON `singer`() STORING (`singerName`)\n"
+            + "\n"
+            + "CREATE TABLE `album` (\n"
+            + "\t`singerId`                              STRING(50) NOT NULL,\n"
+            + "\t`albumId`                               STRING(50) NOT NULL,\n"
+            + "\t`albumName`                             STRING(50) NOT NULL,\n"
+            + "\tCONSTRAINT `check1` CHECK (albumName!=NULL),\n"
+            + ") PRIMARY KEY (),\n"
+            + "INTERLEAVE IN PARENT `singer` ON DELETE CASCADE\n"
+            + "\n"
+            + "ALTER TABLE `album` ADD CONSTRAINT `fk1` FOREIGN KEY (`singerId`) REFERENCES `singer` (`singerId`)";
+    assertEquals(expectedDdl, ddl.prettyPrint());
+  }
+}

From aecc881ad7b7c1c387286a802c228f0e0553df5f Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Mon, 13 May 2024 17:51:57 +0530
Subject: [PATCH 21/70] added more test cases

---
 .../ddl/InformationSchemaScannerTest.java     | 227 +++++++++++++++++-
 1 file changed, 214 insertions(+), 13 deletions(-)

diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java
index 7ff4e3171e..5c54f33414 100644
--- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java
+++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java
@@ -56,13 +56,12 @@ void mockGSQLIndex(ReadContext context) {
                 + " ORDER BY t.table_name, t.index_name");
     ResultSet listIndexessResultSet = mock(ResultSet.class);
     when(context.executeQuery(listIndexes)).thenReturn(listIndexessResultSet);
-    when(listIndexessResultSet.next()).thenReturn(true, false);
-    when(listIndexessResultSet.getString(0)).thenReturn("singer");
-    when(listIndexessResultSet.getString(1)).thenReturn("index1");
+    when(listIndexessResultSet.next()).thenReturn(true, true, true, false);
+    when(listIndexessResultSet.getString(0)).thenReturn("singer", "singer", "album");
+    when(listIndexessResultSet.getString(1)).thenReturn("index1", "PRIMARY_KEY", "PRIMARY_KEY");
     when(listIndexessResultSet.isNull(2)).thenReturn(true);
     when(listIndexessResultSet.getBoolean(3)).thenReturn(false);
     when(listIndexessResultSet.getBoolean(4)).thenReturn(false);
-    when(listIndexessResultSet.isNull(5)).thenReturn(true);
   }
 
   void mockGSQLIndexColumns(ReadContext context) {
@@ -74,11 +73,12 @@ void mockGSQLIndexColumns(ReadContext context) {
                 + "ORDER BY t.table_name, t.index_name, t.ordinal_position");
     ResultSet listIndexColumnsResultSet = mock(ResultSet.class);
     when(context.executeQuery(listIndexColumns)).thenReturn(listIndexColumnsResultSet);
-    when(listIndexColumnsResultSet.next()).thenReturn(true, false);
-    when(listIndexColumnsResultSet.getString(0)).thenReturn("singer");
-    when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName");
-    when(listIndexColumnsResultSet.isNull(2)).thenReturn(true);
-    when(listIndexColumnsResultSet.getString(3)).thenReturn("index1");
+    when(listIndexColumnsResultSet.next()).thenReturn(true, true, true, false);
+    when(listIndexColumnsResultSet.getString(0)).thenReturn("singer", "singer", "album");
+    when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName", "singerId", "albumId");
+    when(listIndexColumnsResultSet.isNull(2)).thenReturn(true, false, false);
+    when(listIndexColumnsResultSet.getString(2)).thenReturn("ASC", "DESC");
+    when(listIndexColumnsResultSet.getString(3)).thenReturn("index1", "PRIMARY_KEY", "PRIMARY_KEY");
   }
 
   void mockGSQLForeignKey(ReadContext context) {
@@ -180,8 +180,165 @@ void mockGSQLListColumns(ReadContext context) {
     when(listColumnsResultSet.isNull(7)).thenReturn(true);
   }
 
+  void mockPgSQLColumnOptions(ReadContext context) {
+    Statement listColumnOptions =
+        Statement.of(
+            "SELECT t.table_name, t.column_name, t.option_name, t.option_type,"
+                + " t.option_value"
+                + " FROM information_schema.column_options AS t"
+                + " WHERE t.table_schema NOT IN "
+                + " ('information_schema', 'spanner_sys', 'pg_catalog')"
+                + " ORDER BY t.table_name, t.column_name");
+    ResultSet listColumnOptionsResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listColumnOptions)).thenReturn(listColumnOptionsResultSet);
+    when(listColumnOptionsResultSet.next()).thenReturn(true, false);
+    when(listColumnOptionsResultSet.getString(0)).thenReturn("singer");
+    when(listColumnOptionsResultSet.getString(1)).thenReturn("singerName");
+    when(listColumnOptionsResultSet.getString(2)).thenReturn("option1");
+    when(listColumnOptionsResultSet.getString(3)).thenReturn("character varying");
+    when(listColumnOptionsResultSet.getString(4)).thenReturn("SomeName");
+  }
+
+  void mockPgSQLIndex(ReadContext context) {
+    Statement listIndexes =
+        Statement.of(
+            "SELECT t.table_name, t.index_name, t.parent_table_name, t.is_unique,"
+                + " t.is_null_filtered, t.filter FROM information_schema.indexes AS t "
+                + " WHERE t.table_schema NOT IN "
+                + " ('information_schema', 'spanner_sys', 'pg_catalog')"
+                + " AND t.index_type='INDEX' AND t.spanner_is_managed = 'NO' "
+                + " ORDER BY t.table_name, t.index_name");
+    ResultSet listIndexessResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listIndexes)).thenReturn(listIndexessResultSet);
+    when(listIndexessResultSet.next()).thenReturn(true, false);
+    when(listIndexessResultSet.getString(0)).thenReturn("singer");
+    when(listIndexessResultSet.getString(1)).thenReturn("index1");
+    when(listIndexessResultSet.isNull(2)).thenReturn(true);
+    when(listIndexessResultSet.getString(3)).thenReturn("YES");
+    when(listIndexessResultSet.getString(4)).thenReturn("YES");
+    when(listIndexessResultSet.isNull(5)).thenReturn(true);
+  }
+
+  void mockPgSQLIndexColumns(ReadContext context) {
+    Statement listIndexColumns =
+        Statement.of(
+            "SELECT t.table_name, t.column_name, t.column_ordering, t.index_name "
+                + "FROM information_schema.index_columns AS t "
+                + "WHERE t.table_schema NOT IN "
+                + "('information_schema', 'spanner_sys', 'pg_catalog') "
+                + "ORDER BY t.table_name, t.index_name, t.ordinal_position");
+    ResultSet listIndexColumnsResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listIndexColumns)).thenReturn(listIndexColumnsResultSet);
+    when(listIndexColumnsResultSet.next()).thenReturn(true, false);
+    when(listIndexColumnsResultSet.getString(0)).thenReturn("singer");
+    when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName");
+    when(listIndexColumnsResultSet.isNull(2)).thenReturn(true);
+    when(listIndexColumnsResultSet.getString(3)).thenReturn("index1");
+  }
+
+  void mockPgSQLForeignKey(ReadContext context) {
+    Statement listForeignKeys =
+        Statement.of(
+            "SELECT rc.constraint_name,"
+                + " kcu1.table_name,"
+                + " kcu1.column_name,"
+                + " kcu2.table_name,"
+                + " kcu2.column_name"
+                + " FROM information_schema.referential_constraints as rc"
+                + " INNER JOIN information_schema.key_column_usage as kcu1"
+                + " ON kcu1.constraint_catalog = rc.constraint_catalog"
+                + " AND kcu1.constraint_schema = rc.constraint_schema"
+                + " AND kcu1.constraint_name = rc.constraint_name"
+                + " INNER JOIN information_schema.key_column_usage as kcu2"
+                + " ON kcu2.constraint_catalog = rc.unique_constraint_catalog"
+                + " AND kcu2.constraint_schema = rc.unique_constraint_schema"
+                + " AND kcu2.constraint_name = rc.unique_constraint_name"
+                + " AND kcu2.ordinal_position = kcu1.position_in_unique_constraint"
+                + " WHERE rc.constraint_catalog = kcu1.constraint_catalog"
+                + " AND rc.constraint_catalog = kcu2.constraint_catalog"
+                + " AND rc.constraint_schema NOT IN "
+                + " ('information_schema', 'spanner_sys', 'pg_catalog')"
+                + " AND rc.constraint_schema = kcu1.constraint_schema"
+                + " AND rc.constraint_schema = kcu2.constraint_schema"
+                + " ORDER BY rc.constraint_name, kcu1.ordinal_position;");
+    ResultSet listForeignKeysResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listForeignKeys)).thenReturn(listForeignKeysResultSet);
+    when(listForeignKeysResultSet.next()).thenReturn(true, false);
+    when(listForeignKeysResultSet.getString(0)).thenReturn("fk1");
+    when(listForeignKeysResultSet.getString(1)).thenReturn("album");
+    when(listForeignKeysResultSet.getString(2)).thenReturn("singerId");
+    when(listForeignKeysResultSet.getString(3)).thenReturn("singer");
+    when(listForeignKeysResultSet.getString(4)).thenReturn("singerId");
+  }
+
+  void mockPgSQLCheckConstraint(ReadContext context) {
+    Statement listCheckConstraints =
+        Statement.of(
+            "SELECT ctu.TABLE_NAME,"
+                + " cc.CONSTRAINT_NAME,"
+                + " cc.CHECK_CLAUSE"
+                + " FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS as ctu"
+                + " INNER JOIN INFORMATION_SCHEMA.CHECK_CONSTRAINTS as cc"
+                + " ON ctu.constraint_catalog = cc.constraint_catalog"
+                + " AND ctu.constraint_schema = cc.constraint_schema"
+                + " AND ctu.CONSTRAINT_NAME = cc.CONSTRAINT_NAME"
+                + " WHERE NOT STARTS_WITH(cc.CONSTRAINT_NAME, 'CK_IS_NOT_NULL_')"
+                + " AND ctu.table_catalog = ctu.constraint_catalog"
+                + " AND ctu.table_schema NOT IN"
+                + "('information_schema', 'spanner_sys', 'pg_catalog')"
+                + " AND ctu.table_schema = ctu.constraint_schema"
+                + " AND cc.SPANNER_STATE = 'COMMITTED';");
+    ResultSet listCheckConstraintsResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listCheckConstraints)).thenReturn(listCheckConstraintsResultSet);
+    when(listCheckConstraintsResultSet.next()).thenReturn(true, false);
+    when(listCheckConstraintsResultSet.getString(0)).thenReturn("album");
+    when(listCheckConstraintsResultSet.getString(1)).thenReturn("check1");
+    when(listCheckConstraintsResultSet.getString(2)).thenReturn("albumName!=NULL");
+  }
+
+  void mockPgSQLListTables(ReadContext context) {
+    Statement listTables =
+        Statement.of(
+            "SELECT t.table_name, t.parent_table_name, t.on_delete_action FROM"
+                + " information_schema.tables AS t"
+                + " WHERE t.table_schema NOT IN "
+                + "('information_schema', 'spanner_sys', 'pg_catalog')");
+    ResultSet listTablesResultSet = mock(ResultSet.class);
+    when(context.executeQuery(listTables)).thenReturn(listTablesResultSet);
+    when(listTablesResultSet.next()).thenReturn(true, true, false);
+    when(listTablesResultSet.getString(0)).thenReturn("singer", "album");
+    when(listTablesResultSet.getString(1)).thenReturn(null, "singer");
+    when(listTablesResultSet.getString(2)).thenReturn(null, "CASCADE");
+  }
+
+  void mockPgSQLListColumns(ReadContext context) {
+    Statement listColumns =
+        Statement.of(
+            "SELECT c.table_name, c.column_name,"
+                + " c.ordinal_position, c.spanner_type, c.is_nullable,"
+                + " c.is_generated, c.generation_expression, c.is_stored"
+                + " FROM information_schema.columns as c"
+                + " WHERE c.table_schema NOT IN "
+                + " ('information_schema', 'spanner_sys', 'pg_catalog') "
+                + " AND c.spanner_state = 'COMMITTED' "
+                + " ORDER BY c.table_name, c.ordinal_position");
+    ResultSet listColumnsResultSet = mock(ResultSet.class);
+
+    when(context.executeQuery(listColumns)).thenReturn(listColumnsResultSet);
+    when(listColumnsResultSet.next()).thenReturn(true, true, true, true, true, false);
+    when(listColumnsResultSet.getString(0))
+        .thenReturn("singer", "singer", "album", "album", "album");
+    when(listColumnsResultSet.getString(1))
+        .thenReturn("singerId", "singerName", "singerId", "albumId", "albumName");
+    when(listColumnsResultSet.getString(3)).thenReturn("character varying(50)");
+    when(listColumnsResultSet.getString(4)).thenReturn("NO");
+    when(listColumnsResultSet.getString(5)).thenReturn("NO");
+    when(listColumnsResultSet.isNull(6)).thenReturn(true);
+    when(listColumnsResultSet.isNull(7)).thenReturn(true);
+  }
+
   @Test
-  public void testScan() {
+  public void testScanGSQLDdl() {
     ReadContext context = mock(ReadContext.class);
 
     mockGSQLListTables(context);
@@ -198,7 +355,8 @@ public void testScan() {
         "CREATE TABLE `singer` (\n"
             + "\t`singerId`                              STRING(50) NOT NULL,\n"
             + "\t`singerName`                            STRING(50) NOT NULL OPTIONS (option1=\"SomeName\"),\n"
-            + ") PRIMARY KEY ()\n"
+            + ") PRIMARY KEY (`singerId` ASC)\n"
+            + "CREATE INDEX `PRIMARY_KEY` ON `singer`()\n"
             + "CREATE INDEX `index1` ON `singer`() STORING (`singerName`)\n"
             + "\n"
             + "CREATE TABLE `album` (\n"
@@ -206,10 +364,53 @@ public void testScan() {
             + "\t`albumId`                               STRING(50) NOT NULL,\n"
             + "\t`albumName`                             STRING(50) NOT NULL,\n"
             + "\tCONSTRAINT `check1` CHECK (albumName!=NULL),\n"
-            + ") PRIMARY KEY (),\n"
+            + ") PRIMARY KEY (`albumId` DESC),\n"
             + "INTERLEAVE IN PARENT `singer` ON DELETE CASCADE\n"
-            + "\n"
+            + "CREATE INDEX `PRIMARY_KEY` ON `album`()\n"
             + "ALTER TABLE `album` ADD CONSTRAINT `fk1` FOREIGN KEY (`singerId`) REFERENCES `singer` (`singerId`)";
     assertEquals(expectedDdl, ddl.prettyPrint());
   }
+
+  @Test
+  public void testScanPgSQLDdl() {
+    ReadContext context = mock(ReadContext.class);
+
+    mockPgSQLListTables(context);
+    mockPgSQLListColumns(context);
+    mockPgSQLColumnOptions(context);
+    mockPgSQLIndex(context);
+    mockPgSQLIndexColumns(context);
+    mockPgSQLForeignKey(context);
+    mockPgSQLCheckConstraint(context);
+    InformationSchemaScanner informationSchemaScanner =
+        new InformationSchemaScanner(context, Dialect.POSTGRESQL);
+    Ddl ddl = informationSchemaScanner.scan();
+    String expectedDdl =
+        "CREATE TABLE \"singer\" (\n"
+            + "\t\"singerId\"                              character varying(50) NOT NULL,\n"
+            + "\t\"singerName\"                            character varying(50) NOT NULL OPTIONS (option1='SomeName'),\n"
+            + "\tPRIMARY KEY ()\n"
+            + ")\n"
+            + "CREATE UNIQUE INDEX \"index1\" ON \"singer\"() INCLUDE (\"singerName\")\n"
+            + "\n"
+            + "CREATE TABLE \"album\" (\n"
+            + "\t\"singerId\"                              character varying(50) NOT NULL,\n"
+            + "\t\"albumId\"                               character varying(50) NOT NULL,\n"
+            + "\t\"albumName\"                             character varying(50) NOT NULL,\n"
+            + "\tCONSTRAINT \"check1\" CHECK (albumName!=NULL),\n"
+            + "\tPRIMARY KEY ()\n"
+            + ") \n"
+            + "INTERLEAVE IN PARENT \"singer\" ON DELETE CASCADE\n"
+            + "\n"
+            + "ALTER TABLE \"album\" ADD CONSTRAINT \"fk1\" FOREIGN KEY (\"singerId\") REFERENCES \"singer\" (\"singerId\")";
+    assertEquals(expectedDdl, ddl.prettyPrint());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testWithInvalidDialect() {
+    ReadContext context = mock(ReadContext.class);
+    InformationSchemaScanner informationSchemaScanner =
+        new InformationSchemaScanner(context, Dialect.fromName("xyz"));
+    Ddl ddl = informationSchemaScanner.scan();
+  }
 }

From cae3a040f32e4ec81eb28ac0450a78c1154116e9 Mon Sep 17 00:00:00 2001
From: "vitaly.terentyev" <vitaly.terentyev@akvelon.com>
Date: Mon, 8 Apr 2024 16:26:52 +0400
Subject: [PATCH 22/70] Template parameters: update old types to new ones

---
 .../v2/templates/BigtableChangeStreamsToHBase.java   | 12 ++++++------
 .../teleport/v2/auto/blocks/WriteToBigQuery.java     |  2 +-
 .../v2/options/WindowedFilenamePolicyOptions.java    |  2 +-
 .../v2/options/DataplexBigQueryToGcsOptions.java     |  5 +----
 .../teleport/v2/templates/DataStreamToBigQuery.java  |  6 +++---
 .../teleport/v2/templates/DataStreamToSpanner.java   |  2 +-
 .../cloud/teleport/v2/templates/DataStreamToSQL.java |  2 +-
 .../options/ElasticsearchWriteOptions.java           |  7 ++-----
 .../cloud/teleport/v2/templates/GCSToSourceDb.java   |  4 ++--
 .../teleport/v2/templates/GoogleAdsToBigQuery.java   |  4 ++--
 .../BigtableChangeStreamsToPubSubOptions.java        |  2 +-
 .../SpannerChangeStreamsToBigQueryOptions.java       |  4 ++--
 .../options/SpannerChangeStreamsToPubSubOptions.java |  8 ++++++--
 .../teleport/v2/templates/TextIOToBigQuery.java      |  9 +++------
 .../teleport/v2/transforms/SplunkConverters.java     |  7 ++-----
 .../cloud/teleport/v2/templates/JmsToPubsub.java     |  4 ++--
 .../teleport/v2/templates/PubsubProtoToBigQuery.java |  2 +-
 .../teleport/v2/templates/PubSubCdcToBigQuery.java   |  4 ++--
 .../cloud/teleport/v2/templates/PubsubToJms.java     |  2 +-
 .../cloud/teleport/v2/templates/PubSubToRedis.java   |  2 +-
 .../v2/options/SourceDbToSpannerOptions.java         |  6 +++---
 .../SpannerChangeStreamsToShardedFileSink.java       |  4 ++--
 .../v2/templates/StreamingDataGenerator.java         | 12 ++++++------
 23 files changed, 52 insertions(+), 60 deletions(-)

diff --git a/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java b/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java
index 601c46626f..c9d6d5e6a0 100644
--- a/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java
+++ b/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java
@@ -102,9 +102,9 @@ public interface BigtableToHbasePipelineOptions
         helpText =
             "Whether bidirectional replication between hbase and bigtable is enabled, adds additional logic to filter out hbase-replicated mutations")
     @Default.Boolean(false)
-    boolean getBidirectionalReplicationEnabled();
+    Boolean getBidirectionalReplicationEnabled();
 
-    void setBidirectionalReplicationEnabled(boolean bidirectionalReplicationEnabled);
+    void setBidirectionalReplicationEnabled(Boolean bidirectionalReplicationEnabled);
 
     @TemplateParameter.Text(
         optional = true,
@@ -129,18 +129,18 @@ public interface BigtableToHbasePipelineOptions
         description = "Dry run",
         helpText = "When dry run is enabled, pipeline will not write to Hbase")
     @Default.Boolean(false)
-    boolean getDryRunEnabled();
+    Boolean getDryRunEnabled();
 
-    void setDryRunEnabled(boolean dryRunEnabled);
+    void setDryRunEnabled(Boolean dryRunEnabled);
 
     @TemplateParameter.Boolean(
         optional = true,
         description = "Filter GC mutations",
         helpText = "Filters out garbage collection Delete mutations from CBT")
     @Default.Boolean(false)
-    boolean getFilterGCMutations();
+    Boolean getFilterGCMutations();
 
-    void setFilterGCMutations(boolean filterGCMutations);
+    void setFilterGCMutations(Boolean filterGCMutations);
   }
 
   /**
diff --git a/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java b/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java
index a5a5fb7deb..586b2c5ecf 100644
--- a/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java
+++ b/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java
@@ -70,7 +70,7 @@ public interface SinkOptions
 
     void setOutputTableSpec(String input);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFile(
         order = 2,
         optional = true,
         description = "GCS Path to JSON file containing BigQuery table schema.",
diff --git a/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java b/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java
index 87c6721dc2..26f579671d 100644
--- a/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java
+++ b/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java
@@ -40,7 +40,7 @@ public interface WindowedFilenamePolicyOptions extends PipelineOptions {
 
   void setOutputShardTemplate(String value);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.Integer(
       order = 2,
       optional = true,
       description = "Number of shards",
diff --git a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java
index eb1e7c1165..bfbde306ba 100644
--- a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java
+++ b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java
@@ -75,12 +75,9 @@ public interface DataplexBigQueryToGcsOptions
 
   void setDestinationStorageBucketAssetName(String destinationStorageBucketAssetName);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.DateTime(
       order = 4,
       optional = true,
-      regexes = {
-        "^([0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|[+-][0-9]{2}:[0-9]{2})?)?|-[pP]([0-9]+(\\.[0-9]+)?Y)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?W)?([0-9]+(\\.[0-9]+)?D)?(T([0-9]+(\\.[0-9]+)?H)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?S)?)?)$"
-      },
       description = "Move data older than the date.",
       helpText =
           "Move data older than this date (and optional time). For partitioned tables, move partitions last modified before this date/time. For non-partitioned tables, move if the table was last modified before this date/time. If not specified, move all tables / partitions. The date/time is parsed in the default time zone by default, but optional suffixes Z and +HH:mm are supported. Format: YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss or YYYY-MM-DDTHH:mm:ss+03:00. Relative date/time (https://en.wikipedia.org/wiki/ISO_8601#Durations) is also supported. Format: -PnDTnHnMn.nS (must start with -P meaning time in the past).")
diff --git a/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java b/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java
index a4eddefec7..0427b9f128 100644
--- a/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java
+++ b/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java
@@ -144,7 +144,7 @@ public interface Options
           InputUDFOptions,
           BigQueryStorageApiStreamingOptions {
 
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFile(
         order = 1,
         description = "File location for Datastream file output in Cloud Storage.",
         helpText =
@@ -273,7 +273,7 @@ public interface Options
 
     void setIgnoreFields(String value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Integer(
         order = 13,
         optional = true,
         description = "The number of minutes between merges for a given table",
@@ -293,7 +293,7 @@ public interface Options
 
     void setDeadLetterQueueDirectory(String value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Integer(
         order = 15,
         optional = true,
         description = "The number of minutes between DLQ Retries.",
diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java
index a29d67b18b..568ffbe609 100644
--- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java
+++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java
@@ -131,7 +131,7 @@ public class DataStreamToSpanner {
    * <p>Inherits standard configuration options.
    */
   public interface Options extends PipelineOptions, StreamingOptions {
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFile(
         order = 1,
         description = "File location for Datastream file output in Cloud Storage.",
         helpText =
diff --git a/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java b/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java
index ec3b2ab261..849b725c09 100644
--- a/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java
+++ b/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java
@@ -97,7 +97,7 @@ public class DataStreamToSQL {
    * <p>Inherits standard configuration options.
    */
   public interface Options extends PipelineOptions, StreamingOptions {
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFile(
         order = 1,
         description = "File location for Datastream file input in Cloud Storage.",
         helpText =
diff --git a/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java b/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java
index 78baba94a8..baf9bfe94a 100644
--- a/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java
+++ b/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java
@@ -56,7 +56,7 @@ public interface ElasticsearchWriteOptions extends PipelineOptions {
 
   void setElasticsearchUsername(String elasticsearchUsername);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.Password(
       order = 4,
       optional = true,
       description = "Password for Elasticsearch endpoint",
@@ -262,14 +262,11 @@ public interface ElasticsearchWriteOptions extends PipelineOptions {
 
   void setDisableCertificateValidation(Boolean disableCertificateValidation);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.KmsEncryptionKey(
       order = 24,
       optional = true,
       parentName = "apiKeySource",
       parentTriggerValues = {"KMS"},
-      regexes = {
-        "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$"
-      },
       description = "Google Cloud KMS encryption key for the API key",
       helpText =
           "The Cloud KMS key to decrypt the API key. This parameter must be "
diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java
index c701ad91e5..d4f4f13a25 100644
--- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java
+++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java
@@ -134,7 +134,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
 
     void setTimerIntervalInMilliSec(Integer value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.DateTime(
         order = 6,
         optional = true,
         description =
@@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
 
     void setWindowDuration(String windowDuration);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFolder(
         order = 8,
         optional = false,
         description = "GCS input directory path",
diff --git a/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java b/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java
index 78bc803d28..31db481d80 100644
--- a/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java
+++ b/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java
@@ -115,9 +115,9 @@ public interface GoogleAdsToBigQueryOptions extends WriteOptions, GoogleAdsOptio
                 + "Divide the desired per pipeline QPS by the maximum number of workers. "
                 + "Avoid exceeding per-account or developer token limits. "
                 + "See Rate Limits (https://developers.google.com/google-ads/api/docs/best-practices/rate-limits).")
-    double getQpsPerWorker();
+    Double getQpsPerWorker();
 
-    void setQpsPerWorker(double qpsPerWorker);
+    void setQpsPerWorker(Double qpsPerWorker);
 
     @TemplateParameter.GcsReadFile(
         order = 5,
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java
index cdf6e09e30..fbf4d5daba 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java
@@ -31,7 +31,7 @@
 public interface BigtableChangeStreamsToPubSubOptions
     extends DataflowPipelineOptions, BigtableCommonOptions.ReadChangeStreamOptions {
 
-  @TemplateParameter.Text(
+  @TemplateParameter.PubsubTopic(
       order = 1,
       description = "The output Pub/Sub topic name",
       helpText = "The name of the destination Pub/Sub topic.")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java
index cec4b3d8dc..51b9d1d98e 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java
@@ -249,7 +249,7 @@ public interface SpannerChangeStreamsToBigQueryOptions
       description = "Whether or not to disable retries for the DLQ",
       helpText = "Whether or not to disable retries for the DLQ")
   @Default.Boolean(false)
-  boolean getDisableDlqRetries();
+  Boolean getDisableDlqRetries();
 
-  void setDisableDlqRetries(boolean value);
+  void setDisableDlqRetries(Boolean value);
 }
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
index b6d9809388..7a7f66a475 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
@@ -151,8 +151,12 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt
 
   void setSpannerHost(String value);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.Enum(
       order = 12,
+      enumOptions = {
+        @TemplateEnumOption("JSON"),
+        @TemplateEnumOption("AVRO")
+      },
       optional = true,
       description = "Output data format",
       helpText =
@@ -187,7 +191,7 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt
 
   void setPubsubProjectId(String pubsubProjectId);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.PubsubTopic(
       order = 15,
       description = "The output Pub/Sub topic",
       helpText = "The Pub/Sub topic to publish PubsubMessage.")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java
index 649f1959a0..74035b8f5b 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java
@@ -210,10 +210,9 @@ public interface Options
       extends DataflowPipelineOptions,
           PythonExternalTextTransformerOptions,
           BigQueryStorageApiBatchOptions {
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFile(
         order = 1,
         optional = false,
-        regexes = {"^gs:\\/\\/[^\\n\\r]+$"},
         description = "The GCS location of the text you'd like to process",
         helpText = "The gs:// path to the text in Cloud Storage you'd like to process.",
         example = "gs://your-bucket/your-file.txt")
@@ -232,10 +231,9 @@ public interface Options
 
     void setJSONPath(String value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.BigQueryTable(
         order = 3,
         optional = false,
-        regexes = {".+:.+\\..+"},
         description = "Output table to write to",
         helpText =
             "The BigQuery table name you want to create to store your processed data in. If you reuse an existing BigQuery table, the data is appended to the destination table.",
@@ -244,10 +242,9 @@ public interface Options
 
     void setOutputTable(String value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsWriteFile(
         order = 4,
         optional = false,
-        regexes = {"^gs:\\/\\/[^\\n\\r]+$"},
         description = "GCS path to javascript fn for transforming output",
         helpText =
             "The Cloud Storage URI of the `.js` file that defines the JavaScript user-defined function (UDF) you want to use.",
diff --git a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java
index 6ecbb00ea1..96581de347 100644
--- a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java
+++ b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java
@@ -154,12 +154,9 @@ public interface SplunkOptions extends PipelineOptions {
 
     void setTokenSource(String tokenSource);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.KmsEncryptionKey(
         order = 7,
         optional = true,
-        regexes = {
-          "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$"
-        },
         description = "Google Cloud KMS encryption key for the token",
         helpText =
             "The Cloud KMS key to decrypt the HEC token string. This parameter must be "
@@ -188,7 +185,7 @@ public interface SplunkOptions extends PipelineOptions {
 
     void setTokenSecretId(String secretId);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFile(
         order = 9,
         optional = true,
         description = "Cloud Storage path to root CA certificate.",
diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
index 304a2295f4..6a5cd2c789 100644
--- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
+++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java
@@ -172,7 +172,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
 
     void setInputType(String inputType);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.PubsubTopic(
         order = 4,
         description = "Output Pub/Sub topic",
         helpText =
@@ -192,7 +192,7 @@ public interface JmsToPubsubOptions extends PipelineOptions {
 
     void setUsername(String username);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Password(
         order = 6,
         description = "JMS Password",
         helpText = "The password associated with the provided username.",
diff --git a/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java b/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java
index cb797f6cd1..16f5eecfb7 100644
--- a/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java
+++ b/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java
@@ -187,7 +187,7 @@ public interface PubSubProtoToBigQueryOptions
 
     void setFullMessageName(String value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Boolean(
         order = 3,
         optional = true,
         description = "Preserve Proto Field Names",
diff --git a/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java b/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java
index d45c4dbebf..c6cfe8bec8 100644
--- a/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java
+++ b/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java
@@ -152,7 +152,7 @@ public interface Options
 
     void setAutoMapTables(Boolean value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.GcsReadFile(
         order = 3,
         optional = true,
         description = "Cloud Storage file with BigQuery schema fields to be used in DDL",
@@ -235,7 +235,7 @@ public interface Options
     void setWindowDuration(String value);
 
     // Thread Count
-    @TemplateParameter.Text(
+    @TemplateParameter.Integer(
         order = 10,
         optional = true,
         description = "Thread Number",
diff --git a/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java b/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java
index 83fe2e9a25..b78c8d57ec 100644
--- a/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java
+++ b/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java
@@ -183,7 +183,7 @@ public interface PubsubToJmsOptions extends PipelineOptions {
 
     void setUsername(String username);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Password(
         order = 6,
         description = "JMS Password",
         helpText = "Password for username provided for authentication with JMS server",
diff --git a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java
index 24c62ed1aa..d5d76168ba 100644
--- a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java
+++ b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java
@@ -164,7 +164,7 @@ public interface PubSubToRedisOptions
 
     void setRedisPort(int redisPort);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Password(
         order = 4,
         description = "Redis DB Password",
         helpText = "Redis database password.")
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java
index f348eafc80..d78cf945af 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java
@@ -218,7 +218,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
 
   void setIgnoreColumns(String value);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.Integer(
       order = 18,
       optional = true,
       description = "Maximum number of connections to Source database per worker",
@@ -230,7 +230,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
 
   void setMaxConnections(Integer value);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.Boolean(
       order = 19,
       optional = true,
       description = "enable connection reconnects",
@@ -241,7 +241,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions {
 
   void setReconnectsEnabled(Boolean value);
 
-  @TemplateParameter.Text(
+  @TemplateParameter.Integer(
       order = 20,
       optional = true,
       description = "Maximum number of connection reconnect attempts, if reconnects are enabled",
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java
index 084e2f8de5..63d2fadaf2 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java
@@ -154,7 +154,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
 
     void setMetadataDatabase(String value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.DateTime(
         order = 7,
         optional = true,
         description = "Changes are read from the given timestamp",
@@ -164,7 +164,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
 
     void setStartTimestamp(String value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.DateTime(
         order = 8,
         optional = true,
         description = "Changes are read until the given timestamp",
diff --git a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java
index b949cb53d2..c0439ba943 100644
--- a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java
+++ b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java
@@ -95,7 +95,7 @@ public class StreamingDataGenerator {
    * the executor at the command-line.
    */
   public interface StreamingDataGeneratorOptions extends PipelineOptions {
-    @TemplateParameter.Text(
+    @TemplateParameter.Long(
         order = 1,
         regexes = {"^[1-9][0-9]*$"},
         description = "Required output rate",
@@ -361,7 +361,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
 
     void setStatement(String statement);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.ProjectId(
         order = 22,
         optional = true,
         parentName = "sinkType",
@@ -409,7 +409,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
 
     void setSpannerTableName(String spannerTableName);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Long(
         order = 26,
         optional = true,
         parentName = "sinkType",
@@ -422,7 +422,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
 
     void setMaxNumMutations(Long value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Long(
         order = 27,
         optional = true,
         parentName = "sinkType",
@@ -435,7 +435,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
 
     void setMaxNumRows(Long value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Long(
         order = 28,
         optional = true,
         parentName = "sinkType",
@@ -448,7 +448,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
 
     void setBatchSizeBytes(Long value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Long(
         order = 29,
         optional = true,
         parentName = "sinkType",

From 968a7d19726ba636d87acea2ee3c2171c725d107 Mon Sep 17 00:00:00 2001
From: "vitaly.terentyev" <vitaly.terentyev@akvelon.com>
Date: Mon, 15 Apr 2024 19:08:14 +0400
Subject: [PATCH 23/70] Fix Spotless

---
 .../v2/options/SpannerChangeStreamsToPubSubOptions.java      | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
index 7a7f66a475..634d3e59ab 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
@@ -153,10 +153,7 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt
 
   @TemplateParameter.Enum(
       order = 12,
-      enumOptions = {
-        @TemplateEnumOption("JSON"),
-        @TemplateEnumOption("AVRO")
-      },
+      enumOptions = {@TemplateEnumOption("JSON"), @TemplateEnumOption("AVRO")},
       optional = true,
       description = "Output data format",
       helpText =

From b560460daf08afcaf3a920934b49a0a3acbb6879 Mon Sep 17 00:00:00 2001
From: "vitaly.terentyev" <vitaly.terentyev@akvelon.com>
Date: Mon, 15 Apr 2024 19:48:48 +0400
Subject: [PATCH 24/70] Removed regexes

---
 .../cloud/teleport/v2/templates/StreamingDataGenerator.java | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java
index c0439ba943..2c007c8a23 100644
--- a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java
+++ b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java
@@ -97,7 +97,6 @@ public class StreamingDataGenerator {
   public interface StreamingDataGeneratorOptions extends PipelineOptions {
     @TemplateParameter.Long(
         order = 1,
-        regexes = {"^[1-9][0-9]*$"},
         description = "Required output rate",
         helpText = "Indicates rate of messages per second to be published to Pub/Sub")
     @Required
@@ -366,7 +365,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
         optional = true,
         parentName = "sinkType",
         parentTriggerValues = {"SPANNER"},
-        regexes = {"^.+$"},
         description = "GCP Project Id of where the Spanner table lives.",
         helpText = "GCP Project Id of where the Spanner table lives.")
     String getProjectId();
@@ -414,7 +412,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
         optional = true,
         parentName = "sinkType",
         parentTriggerValues = {"SPANNER"},
-        regexes = {"^[1-9][0-9]*$"},
         description = "Max mutatated cells per batch.",
         helpText =
             "Specifies the cell mutation limit (maximum number of mutated cells per batch). Default value is 5000")
@@ -427,7 +424,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
         optional = true,
         parentName = "sinkType",
         parentTriggerValues = {"SPANNER"},
-        regexes = {"^[1-9][0-9]*$"},
         description = "Max rows per batch.",
         helpText =
             "Specifies the row mutation limit (maximum number of mutated rows per batch). Default value is 1000")
@@ -440,7 +436,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
         optional = true,
         parentName = "sinkType",
         parentTriggerValues = {"SPANNER"},
-        regexes = {"^[1-9][0-9]*$"},
         description = "Max batch size in bytes.",
         helpText =
             "Specifies the batch size limit (max number of bytes mutated per batch). Default value is 1MB")
@@ -453,7 +448,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions {
         optional = true,
         parentName = "sinkType",
         parentTriggerValues = {"SPANNER"},
-        regexes = {"^[1-9][0-9]*$"},
         description = "Commit deadline in seconds for write requests.",
         helpText = "Specifies the deadline in seconds for the Commit API call.")
     Long getCommitDeadlineSeconds();

From e3ddfecf0f96b2f6bb7065ac9f927a2b4ecee951 Mon Sep 17 00:00:00 2001
From: "vitaly.terentyev" <vitaly.terentyev@akvelon.com>
Date: Tue, 16 Apr 2024 11:18:26 +0400
Subject: [PATCH 25/70] Update old types for v1 templates

---
 .../cloud/teleport/bigtable/CassandraToBigtable.java     | 5 +----
 .../google/cloud/teleport/spanner/ImportPipeline.java    | 3 +--
 .../cloud/teleport/templates/BigQueryToTFRecord.java     | 9 +++------
 .../teleport/templates/common/DatadogConverters.java     | 5 +----
 .../teleport/templates/common/DatastoreConverters.java   | 6 ++----
 .../teleport/templates/common/SplunkConverters.java      | 7 ++-----
 .../v2/options/DataplexBigQueryToGcsOptions.java         | 5 ++++-
 7 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java
index f80d7116de..e72d47cb34 100644
--- a/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java
+++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java
@@ -78,12 +78,9 @@ public interface Options extends PipelineOptions {
     @SuppressWarnings("unused")
     void setCassandraHosts(ValueProvider<String> hosts);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Integer(
         order = 2,
         optional = true,
-        regexes = {
-          "^([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$"
-        },
         description = "Cassandra Port",
         helpText =
             "The TCP port to use to reach Apache Cassandra on the nodes. The default value is 9042.")
diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
index 4376c1d5ea..d640484310 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java
@@ -173,10 +173,9 @@ public interface Options extends PipelineOptions {
 
     void setWaitUntilFinish(boolean value);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Integer(
         order = 10,
         optional = true,
-        regexes = {"[0-9]+"},
         description = "DDL Creation timeout in minutes",
         helpText =
             "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.")
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java b/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java
index e081591200..e2b5dd65b5 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java
@@ -350,10 +350,9 @@ public interface Options extends BigQueryReadOptions {
 
     void setOutputSuffix(ValueProvider<String> outputSuffix);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Float(
         order = 3,
         optional = true,
-        regexes = {"(^\\.[1-9]*$)|(^[01]*)"},
         description = "Percentage of data to be in the training set ",
         helpText =
             "The percentage of query data allocated to training TFRecord files. The default value is 1, or 100%.")
@@ -362,10 +361,9 @@ public interface Options extends BigQueryReadOptions {
 
     void setTrainingPercentage(ValueProvider<Float> trainingPercentage);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Float(
         order = 4,
         optional = true,
-        regexes = {"(^\\.[1-9]*$)|(^[01]*)"},
         description = "Percentage of data to be in the testing set ",
         helpText =
             "The percentage of query data allocated to testing TFRecord files. The default value is 0, or 0%.")
@@ -374,10 +372,9 @@ public interface Options extends BigQueryReadOptions {
 
     void setTestingPercentage(ValueProvider<Float> testingPercentage);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Float(
         order = 5,
         optional = true,
-        regexes = {"(^\\.[1-9]*$)|(^[01]*)"},
         description = "Percentage of data to be in the validation set ",
         helpText =
             "The percentage of query data allocated to validation TFRecord files. The default value is 0, or 0%.")
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java
index a9ec270208..f0b7e7d92e 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java
@@ -136,12 +136,9 @@ public interface DatadogOptions extends PipelineOptions {
 
     void setIncludePubsubMessage(ValueProvider<Boolean> includePubsubMessage);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.KmsEncryptionKey(
         order = 6,
         optional = true,
-        regexes = {
-          "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$"
-        },
         description = "Google Cloud KMS encryption key for the API key",
         helpText =
             "The Cloud KMS key to use to decrypt the API Key. You must provide this parameter if the `apiKeySource` is set to `KMS`. If the Cloud KMS key is provided, you must pass in an encrypted API Key.",
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java
index d0c518bcdb..5fa9048c8a 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java
@@ -216,10 +216,9 @@ public interface DatastoreWriteOptions extends PipelineOptions {
     /**
      * @deprecated Please use getFirestoreHintNumWorkers() instead.
      */
-    @TemplateParameter.Text(
+    @TemplateParameter.Integer(
         order = 4,
         optional = true,
-        regexes = {"^[1-9]+[0-9]*$"},
         description = "Expected number of workers",
         helpText =
             "Hint for the expected number of workers in the Datastore ramp-up throttling step. Default is `500`.")
@@ -300,10 +299,9 @@ public interface DatastoreDeleteOptions extends PipelineOptions {
     /**
      * @deprecated Please use getFirestoreHintNumWorkers() instead.
      */
-    @TemplateParameter.Text(
+    @TemplateParameter.Integer(
         order = 2,
         optional = true,
-        regexes = {"^[1-9][0-9]*$"},
         description = "Expected number of workers",
         helpText =
             "Hint for the expected number of workers in the Datastore ramp-up throttling step.")
diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java
index d3d0e2e986..46e1a2eb27 100644
--- a/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java
+++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java
@@ -151,12 +151,9 @@ public interface SplunkOptions extends PipelineOptions {
 
     void setIncludePubsubMessage(ValueProvider<Boolean> includePubsubMessage);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.KmsEncryptionKey(
         order = 7,
         optional = true,
-        regexes = {
-          "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$"
-        },
         description = "Google Cloud KMS encryption key for the token",
         helpText =
             "The Cloud KMS key to use to decrypt the HEC token string. This parameter must be provided when tokenSource is set to KMS. If the Cloud KMS key is provided, the HEC token string `must` be passed in encrypted.",
@@ -217,7 +214,7 @@ public interface SplunkOptions extends PipelineOptions {
 
     void setEnableBatchLogs(ValueProvider<Boolean> enableBatchLogs);
 
-    @TemplateParameter.Text(
+    @TemplateParameter.Boolean(
         order = 12,
         optional = true,
         description =
diff --git a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java
index bfbde306ba..eb1e7c1165 100644
--- a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java
+++ b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java
@@ -75,9 +75,12 @@ public interface DataplexBigQueryToGcsOptions
 
   void setDestinationStorageBucketAssetName(String destinationStorageBucketAssetName);
 
-  @TemplateParameter.DateTime(
+  @TemplateParameter.Text(
       order = 4,
       optional = true,
+      regexes = {
+        "^([0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|[+-][0-9]{2}:[0-9]{2})?)?|-[pP]([0-9]+(\\.[0-9]+)?Y)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?W)?([0-9]+(\\.[0-9]+)?D)?(T([0-9]+(\\.[0-9]+)?H)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?S)?)?)$"
+      },
       description = "Move data older than the date.",
       helpText =
           "Move data older than this date (and optional time). For partitioned tables, move partitions last modified before this date/time. For non-partitioned tables, move if the table was last modified before this date/time. If not specified, move all tables / partitions. The date/time is parsed in the default time zone by default, but optional suffixes Z and +HH:mm are supported. Format: YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss or YYYY-MM-DDTHH:mm:ss+03:00. Relative date/time (https://en.wikipedia.org/wiki/ISO_8601#Durations) is also supported. Format: -PnDTnHnMn.nS (must start with -P meaning time in the past).")

From 89cf17e9c7da6f01f9b573dc4440fe73fbf01773 Mon Sep 17 00:00:00 2001
From: "vitaly.terentyev" <vitaly.terentyev@akvelon.com>
Date: Thu, 18 Apr 2024 12:52:13 +0400
Subject: [PATCH 26/70] Change DateTime to Text

---
 .../com/google/cloud/teleport/v2/templates/GCSToSourceDb.java | 2 +-
 .../google/cloud/teleport/v2/templates/TextIOToBigQuery.java  | 2 +-
 .../v2/templates/SpannerChangeStreamsToShardedFileSink.java   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java
index d4f4f13a25..5dedd6d505 100644
--- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java
+++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java
@@ -134,7 +134,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
 
     void setTimerIntervalInMilliSec(Integer value);
 
-    @TemplateParameter.DateTime(
+    @TemplateParameter.Text(
         order = 6,
         optional = true,
         description =
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java
index 74035b8f5b..86f6f13690 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java
@@ -242,7 +242,7 @@ public interface Options
 
     void setOutputTable(String value);
 
-    @TemplateParameter.GcsWriteFile(
+    @TemplateParameter.GcsReadFile(
         order = 4,
         optional = false,
         description = "GCS path to javascript fn for transforming output",
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java
index 63d2fadaf2..084e2f8de5 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java
@@ -154,7 +154,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
 
     void setMetadataDatabase(String value);
 
-    @TemplateParameter.DateTime(
+    @TemplateParameter.Text(
         order = 7,
         optional = true,
         description = "Changes are read from the given timestamp",
@@ -164,7 +164,7 @@ public interface Options extends PipelineOptions, StreamingOptions {
 
     void setStartTimestamp(String value);
 
-    @TemplateParameter.DateTime(
+    @TemplateParameter.Text(
         order = 8,
         optional = true,
         description = "Changes are read until the given timestamp",

From b01840746b0e7ecb65f7ad37198786a3553ca205 Mon Sep 17 00:00:00 2001
From: "vitaly.terentyev" <vitaly.terentyev@akvelon.com>
Date: Mon, 13 May 2024 17:55:37 +0400
Subject: [PATCH 27/70] Fix PubsubTopic parameters

---
 .../v2/options/BigtableChangeStreamsToPubSubOptions.java        | 2 +-
 .../v2/options/SpannerChangeStreamsToPubSubOptions.java         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java
index fbf4d5daba..cdf6e09e30 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java
@@ -31,7 +31,7 @@
 public interface BigtableChangeStreamsToPubSubOptions
     extends DataflowPipelineOptions, BigtableCommonOptions.ReadChangeStreamOptions {
 
-  @TemplateParameter.PubsubTopic(
+  @TemplateParameter.Text(
       order = 1,
       description = "The output Pub/Sub topic name",
       helpText = "The name of the destination Pub/Sub topic.")
diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
index 634d3e59ab..2bc7b84b38 100644
--- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
+++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java
@@ -188,7 +188,7 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt
 
   void setPubsubProjectId(String pubsubProjectId);
 
-  @TemplateParameter.PubsubTopic(
+  @TemplateParameter.Text(
       order = 15,
       description = "The output Pub/Sub topic",
       helpText = "The Pub/Sub topic to publish PubsubMessage.")

From 83992c61485361b7761100c143cc34c8807a4563 Mon Sep 17 00:00:00 2001
From: "vitaly.terentyev" <vitaly.terentyev@akvelon.com>
Date: Mon, 13 May 2024 18:10:33 +0400
Subject: [PATCH 28/70] Resolve old comments

---
 .../java/com/google/cloud/teleport/spanner/ExportPipeline.java | 2 --
 .../com/google/cloud/teleport/v2/templates/PubSubToRedis.java  | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
index 867ed2e999..949201edac 100644
--- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
+++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java
@@ -176,8 +176,6 @@ public interface ExportPipelineOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 10,
         optional = true,
-        parentName = "shouldExportRelatedTables",
-        parentTriggerValues = {"true"},
         regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"},
         description = "Cloud Spanner table name(s).",
         helpText =
diff --git a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java
index d5d76168ba..79f56fc4b4 100644
--- a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java
+++ b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java
@@ -221,7 +221,8 @@ public interface PubSubToRedisOptions
         optional = true,
         parentName = "redisSinkType",
         parentTriggerValues = {"HASH_SINK", "LOGGING_SINK"},
-        description = "Hash key expiration time in sec (ttl)",
+        description =
+            "Hash key expiration time in sec (ttl), supported only for HASH_SINK and LOGGING_SINK",
         helpText =
             "Key expiration time in sec (ttl, default for HASH_SINK is -1 i.e. no expiration)")
     @Default.Long(-1L)

From 0bea310adcc6e9a843a9de0081a53c8c7544e852 Mon Sep 17 00:00:00 2001
From: Dippatel98 <dippatel@google.com>
Date: Mon, 13 May 2024 17:22:00 +0000
Subject: [PATCH 29/70] Add integration test for KafkaToBigQuery Template

---
 .../BigQueryDynamicDestination.java           |   2 +-
 .../templates/KafkaToBigQueryFlexAvroIT.java  | 412 ++++++++++++++++++
 .../avro_schema.avsc                          |  15 +
 .../other_avro_schema.avsc                    |  19 +
 4 files changed, 447 insertions(+), 1 deletion(-)
 create mode 100644 v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java
 create mode 100644 v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc
 create mode 100644 v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc

diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
index a33fb1d7f1..8f43357d00 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
@@ -58,7 +58,7 @@ public TableDestination getTable(GenericRecord element) {
     // tablename + record name (same across schemas) + schema id?
     String bqQualifiedFullName = element.getSchema().getFullName().replace(".", "-");
     String tableName =
-        this.tableNamePrefix + (this.tableNamePrefix == "" ? "" : "-") + bqQualifiedFullName;
+        this.tableNamePrefix + (this.tableNamePrefix.isBlank() ? "" : "-") + bqQualifiedFullName;
     String tableSpec = this.projectName + ":" + this.datasetName + "." + tableName;
     return new TableDestination(tableSpec, null);
   }
diff --git a/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java b/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java
new file mode 100644
index 0000000000..faf72a3885
--- /dev/null
+++ b/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java
@@ -0,0 +1,412 @@
+/*
+ * Copyright (C) 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
+
+import com.google.cloud.bigquery.Field;
+import com.google.cloud.bigquery.Field.Mode;
+import com.google.cloud.bigquery.Schema;
+import com.google.cloud.bigquery.StandardSQLTypeName;
+import com.google.cloud.bigquery.TableId;
+import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
+import com.google.cloud.teleport.v2.kafka.transforms.BinaryAvroSerializer;
+import com.google.cloud.teleport.v2.utils.SecretManagerUtils;
+import com.google.common.io.Resources;
+import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
+import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
+import io.confluent.kafka.serializers.KafkaAvroSerializer;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Function;
+import net.jcip.annotations.NotThreadSafe;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.beam.it.common.PipelineLauncher.LaunchConfig;
+import org.apache.beam.it.common.PipelineLauncher.LaunchInfo;
+import org.apache.beam.it.common.PipelineOperator.Result;
+import org.apache.beam.it.common.TestProperties;
+import org.apache.beam.it.common.utils.ResourceManagerUtils;
+import org.apache.beam.it.conditions.ConditionCheck;
+import org.apache.beam.it.gcp.TemplateTestBase;
+import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager;
+import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck;
+import org.apache.beam.it.kafka.KafkaResourceManager;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Integration test for {@link KafkaToBigQuery} (Kafka_To_BigQuery). */
+@Category(TemplateIntegrationTest.class)
+@TemplateIntegrationTest(KafkaToBigQueryFlex.class)
+@RunWith(JUnit4.class)
+@NotThreadSafe
+public final class KafkaToBigQueryFlexAvroIT extends TemplateTestBase {
+
+  private static final Logger LOG = LoggerFactory.getLogger(KafkaToBigQueryFlexAvroIT.class);
+
+  private KafkaResourceManager kafkaResourceManager;
+  private BigQueryResourceManager bigQueryClient;
+  private String bqDatasetId;
+  private TableId deadletterTableId;
+  private TableId tableId;
+  private Schema bqSchema;
+  private org.apache.avro.Schema avroSchema;
+  private org.apache.avro.Schema otherAvroSchema;
+
+  @Before
+  public void setup() throws IOException {
+    bigQueryClient = BigQueryResourceManager.builder(testName, PROJECT, credentials).build();
+    bqDatasetId = bigQueryClient.createDataset(REGION);
+    bqSchema =
+        Schema.of(
+            Field.of("productId", StandardSQLTypeName.INT64),
+            Field.newBuilder("productName", StandardSQLTypeName.STRING).setMaxLength(10L).build());
+
+    tableId = bigQueryClient.createTable(testName, bqSchema);
+    kafkaResourceManager =
+        KafkaResourceManager.builder(testName).setHost(TestProperties.hostIp()).build();
+
+    URL avroSchemaResource = Resources.getResource("KafkaToBigQueryFlexAvroIT/avro_schema.avsc");
+    gcsClient.uploadArtifact("avro_schema.avsc", avroSchemaResource.getPath());
+    avroSchema = new org.apache.avro.Schema.Parser().parse(avroSchemaResource.openStream());
+
+    URL otherAvroSchemaResource =
+        Resources.getResource("KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc");
+    gcsClient.uploadArtifact("other_avro_schema.avsc", otherAvroSchemaResource.getPath());
+    otherAvroSchema =
+        new org.apache.avro.Schema.Parser().parse(otherAvroSchemaResource.openStream());
+  }
+
+  @After
+  public void tearDown() {
+    ResourceManagerUtils.cleanResources(kafkaResourceManager, bigQueryClient);
+  }
+
+  @Test
+  public void testKafkaToBigQueryAvroInConfluentFormat() throws IOException, RestClientException {
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT")
+                .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("outputTableSpec", toTableSpecLegacy(tableId)));
+  }
+
+  @Test
+  public void testKafkaToBigQueryAvroWithSchemaRegistry() throws IOException, RestClientException {
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT")
+                .addParameter(
+                    "schemaRegistryConnectionUrl",
+                    SecretManagerUtils.getSecret(
+                        "projects/269744978479/secrets/kafka-schema-registry-connection-url/versions/1"))
+                .addParameter("outputDataset", bqDatasetId));
+  }
+
+  @Test
+  public void testKafkaToBigQueryAvroInNonConfluentFormat()
+      throws IOException, RestClientException {
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("avroFormat", "NON_WIRE_FORMAT")
+                .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("outputTableSpec", toTableSpecLegacy(tableId)));
+  }
+
+  @Test
+  public void testKafkaToBigQueryAvroWithExistingDLQ() throws IOException, RestClientException {
+    deadletterTableId = bigQueryClient.createTable(testName + "_dlq", getDeadletterSchema());
+
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("outputDeadletterTable", toTableSpecLegacy(deadletterTableId))
+                .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("outputTableSpec", toTableSpecLegacy(tableId)));
+  }
+
+  @Test
+  public void testKafkaToBigQueryAvroWithStorageApi() throws IOException, RestClientException {
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("useStorageWriteApi", "true")
+                .addParameter("numStorageWriteApiStreams", "3")
+                .addParameter("storageWriteApiTriggeringFrequencySec", "3")
+                .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("outputTableSpec", toTableSpecLegacy(tableId)));
+  }
+
+  @Test
+  public void testKafkaToBigQueryAvroWithStorageApiExistingDLQ()
+      throws IOException, RestClientException {
+    deadletterTableId = bigQueryClient.createTable(testName + "_dlq", getDeadletterSchema());
+
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("useStorageWriteApi", "true")
+                .addParameter("numStorageWriteApiStreams", "3")
+                .addParameter("storageWriteApiTriggeringFrequencySec", "3")
+                .addParameter("outputDeadletterTable", toTableSpecLegacy(deadletterTableId))
+                .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("outputTableSpec", toTableSpecLegacy(tableId)));
+  }
+
+  private Schema getDeadletterSchema() {
+    Schema dlqSchema =
+        Schema.of(
+            Field.newBuilder("timestamp", StandardSQLTypeName.TIMESTAMP)
+                .setMode(Mode.REQUIRED)
+                .build(),
+            Field.newBuilder("payloadString", StandardSQLTypeName.STRING)
+                .setMode(Mode.REQUIRED)
+                .build(),
+            Field.newBuilder("payloadBytes", StandardSQLTypeName.BYTES)
+                .setMode(Mode.REQUIRED)
+                .build(),
+            Field.newBuilder(
+                    "attributes",
+                    StandardSQLTypeName.STRUCT,
+                    Field.newBuilder("key", StandardSQLTypeName.STRING)
+                        .setMode(Mode.NULLABLE)
+                        .build(),
+                    Field.newBuilder("value", StandardSQLTypeName.STRING)
+                        .setMode(Mode.NULLABLE)
+                        .build())
+                .setMode(Mode.REPEATED)
+                .build(),
+            Field.newBuilder("errorMessage", StandardSQLTypeName.STRING)
+                .setMode(Mode.NULLABLE)
+                .build(),
+            Field.newBuilder("stacktrace", StandardSQLTypeName.STRING)
+                .setMode(Mode.NULLABLE)
+                .build());
+    return dlqSchema;
+  }
+
+  private void baseKafkaToBigQueryAvro(
+      Function<LaunchConfig.Builder, LaunchConfig.Builder> paramsAdder)
+      throws IOException, RestClientException {
+    // Arrange
+    String topicName = kafkaResourceManager.createTopic(testName, 5);
+
+    LaunchConfig.Builder options =
+        paramsAdder.apply(
+            LaunchConfig.builder(testName, specPath)
+                .addParameter(
+                    "readBootstrapServers",
+                    kafkaResourceManager.getBootstrapServers().replace("PLAINTEXT://", ""))
+                .addParameter("kafkaReadTopics", topicName)
+                .addParameter("kafkaReadOffset", "earliest")
+                .addParameter("messageFormat", "AVRO"));
+
+    // Act
+    LaunchInfo info = launchTemplate(options);
+    assertThatPipeline(info).isRunning();
+
+    List<ConditionCheck> conditions = new ArrayList<ConditionCheck>();
+
+    if (options.getParameter("avroFormat") != null
+        && options.getParameter("avroFormat").equals("CONFLUENT_WIRE_FORMAT")
+        && options.getParameter("schemaRegistryConnectionUrl") != null) {
+
+      publishDoubleSchemaMessages(topicName);
+      TableId avroTable = TableId.of(bqDatasetId, avroSchema.getFullName().replace(".", "-"));
+      TableId otherAvroTable =
+          TableId.of(bqDatasetId, otherAvroSchema.getFullName().replace(".", "-"));
+
+      conditions.add(BigQueryRowsCheck.builder(bigQueryClient, avroTable).setMinRows(20).build());
+      conditions.add(
+          BigQueryRowsCheck.builder(bigQueryClient, otherAvroTable).setMinRows(20).build());
+
+    } else if (options.getParameter("avroFormat") != null
+        && options.getParameter("avroFormat").equals("NON_WIRE_FORMAT")
+        && options.getParameter("avroSchemaPath") != null) {
+
+      publishBinaryMessages(topicName);
+      conditions.add(BigQueryRowsCheck.builder(bigQueryClient, tableId).setMinRows(20).build());
+
+    } else {
+
+      publishSingleSchemaMessages(topicName);
+      conditions.add(BigQueryRowsCheck.builder(bigQueryClient, tableId).setMinRows(20).build());
+    }
+
+    if (options.getParameter("outputDeadletterTable") != null) {
+      conditions.add(
+          BigQueryRowsCheck.builder(bigQueryClient, deadletterTableId).setMinRows(10).build());
+    }
+
+    Result result =
+        pipelineOperator()
+            .waitForConditionsAndFinish(
+                createConfig(info), conditions.toArray(new ConditionCheck[0]));
+
+    // Assert
+    assertThatResult(result).meetsConditions();
+  }
+
+  private void publishSingleSchemaMessages(String topicName)
+      throws IOException, RestClientException {
+    MockSchemaRegistryClient registryClient = new MockSchemaRegistryClient();
+    registryClient.register(topicName + "-value", avroSchema, 1, 1);
+
+    KafkaProducer<String, Object> kafkaProducer =
+        kafkaResourceManager.buildProducer(
+            new StringSerializer(), new KafkaAvroSerializer(registryClient));
+
+    for (int i = 1; i <= 10; i++) {
+      GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0);
+      publish(kafkaProducer, topicName, i + "1", dataflow);
+
+      GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0);
+      publish(kafkaProducer, topicName, i + "2", pubsub);
+
+      GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0);
+      publish(kafkaProducer, topicName, i + "3", invalid);
+
+      try {
+        TimeUnit.SECONDS.sleep(3);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
+    }
+  }
+
+  private void publishDoubleSchemaMessages(String topicName)
+      throws IOException, RestClientException {
+    MockSchemaRegistryClient registryClient = new MockSchemaRegistryClient();
+    registryClient.register(topicName + "-value", avroSchema, 1, 3);
+    registryClient.register(topicName + "-value", otherAvroSchema, 1, 4);
+
+    KafkaProducer<String, Object> kafkaProducer =
+        kafkaResourceManager.buildProducer(
+            new StringSerializer(), new KafkaAvroSerializer(registryClient));
+
+    for (int i = 1; i <= 10; i++) {
+      GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0);
+      publish(kafkaProducer, topicName, i + "1", dataflow);
+
+      GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0);
+      publish(kafkaProducer, topicName, i + "2", pubsub);
+
+      GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0);
+      publish(kafkaProducer, topicName, i + "3", invalid);
+
+      GenericRecord otherDataflow =
+          createOtherRecord(Integer.valueOf(i + "4"), "Dataflow", "dataflow", 0);
+      publish(kafkaProducer, topicName, i + "4", otherDataflow);
+
+      GenericRecord otherPubsub =
+          createOtherRecord(Integer.valueOf(i + "5"), "Pub/Sub", "pubsub", 0);
+      publish(kafkaProducer, topicName, i + "5", otherPubsub);
+
+      GenericRecord otherInvalid =
+          createOtherRecord(
+              Integer.valueOf(i + "6"), "InvalidNameTooLong", "InvalidNameTooLong", 0);
+      publish(kafkaProducer, topicName, i + "6", otherInvalid);
+
+      try {
+        TimeUnit.SECONDS.sleep(3);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
+    }
+  }
+
+  public void publishBinaryMessages(String topicName) throws IOException {
+    KafkaProducer<String, GenericRecord> kafkaProducer =
+        kafkaResourceManager.buildProducer(
+            new StringSerializer(), new BinaryAvroSerializer(avroSchema));
+
+    for (int i = 1; i <= 10; i++) {
+      GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0);
+      publishBinary(kafkaProducer, topicName, i + "1", dataflow);
+
+      GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0);
+      publishBinary(kafkaProducer, topicName, i + "2", pubsub);
+
+      GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0);
+      publishBinary(kafkaProducer, topicName, i + "3", invalid);
+
+      try {
+        TimeUnit.SECONDS.sleep(3);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
+    }
+  }
+
+  private void publish(
+      KafkaProducer<String, Object> producer, String topicName, String key, GenericRecord value) {
+    try {
+      RecordMetadata recordMetadata =
+          producer.send(new ProducerRecord<>(topicName, key, value)).get();
+      LOG.info(
+          "Published record {}, partition {} - offset: {}",
+          recordMetadata.topic(),
+          recordMetadata.partition(),
+          recordMetadata.offset());
+    } catch (Exception e) {
+      throw new RuntimeException("Error publishing record to Kafka", e);
+    }
+  }
+
+  private void publishBinary(
+      KafkaProducer<String, GenericRecord> producer,
+      String topicName,
+      String key,
+      GenericRecord value) {
+    try {
+      RecordMetadata recordMetadata =
+          producer.send(new ProducerRecord<>(topicName, key, value)).get();
+      LOG.info(
+          "Published record {}, partition {} - offset: {}",
+          recordMetadata.topic(),
+          recordMetadata.partition(),
+          recordMetadata.offset());
+    } catch (Exception e) {
+      throw new RuntimeException("Error publishing record to Kafka", e);
+    }
+  }
+
+  private GenericRecord createRecord(int id, String productName, double value) {
+    return new GenericRecordBuilder(avroSchema)
+        .set("productId", id)
+        .set("productName", productName)
+        .build();
+  }
+
+  private GenericRecord createOtherRecord(int id, String productName, String name, double value) {
+    return new GenericRecordBuilder(otherAvroSchema)
+        .set("productId", id)
+        .set("productName", productName)
+        .set("name", name)
+        .build();
+  }
+}
diff --git a/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc
new file mode 100644
index 0000000000..d907a7f17a
--- /dev/null
+++ b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc
@@ -0,0 +1,15 @@
+{
+    "type": "record",
+    "namespace": "org.example.avro",
+    "name": "AvroProductKafkaRecord",
+    "fields": [
+        {
+            "name": "productId",
+            "type": "int"
+        },
+        {
+            "name": "productName",
+            "type": "string"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc
new file mode 100644
index 0000000000..13f32d803d
--- /dev/null
+++ b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc
@@ -0,0 +1,19 @@
+{
+    "type": "record",
+    "namespace": "org.example.other.avro",
+    "name": "OtherAvroProductKafkaRecord",
+    "fields": [
+        {
+            "name": "productId",
+            "type": "int"
+        },
+        {
+            "name": "productName",
+            "type": "string"
+        },
+        {
+            "name": "name",
+            "type": "string"
+        }
+    ]
+}
\ No newline at end of file

From b961be547dbe786b4d0f9d934115504bb8cb5c51 Mon Sep 17 00:00:00 2001
From: Jeffrey Kinard <jeff@thekinards.com>
Date: Wed, 17 Apr 2024 14:23:45 -0400
Subject: [PATCH 30/70] Add jinja preprocessing to YamlTemplate

Signed-off-by: Jeffrey Kinard <jeff@thekinards.com>
---
 .../main/resources/Dockerfile-template-yaml   |  2 +
 .../templates/python/YAMLTemplate.java        |  9 ++
 python/src/main/python/yaml-template/main.py  | 91 +++++++++++++++++--
 .../templates/python/YAMLTemplateIT.java      | 15 ++-
 python/src/test/resources/YamlTemplateIT.yaml | 18 +---
 5 files changed, 108 insertions(+), 27 deletions(-)

diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml b/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml
index 9af50e750b..f24de42e3b 100644
--- a/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml
+++ b/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml
@@ -20,9 +20,11 @@ WORKDIR $WORKDIR
 RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi
 
 # Install dependencies to launch the pipeline and download to reduce startup time
+# Remove Jinja2 dependency once YAML templatization support is added to Beam
 RUN python -m venv /venv \
     && /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \
     && /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \
+    && /venv/bin/pip install --no-cache-dir -U Jinja2 \
     && /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
     && rm -rf /usr/local/lib/python$PY_VERSION/site-packages  \
     && mv /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/
diff --git a/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java b/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java
index 739c866835..8b8cb989b0 100644
--- a/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java
+++ b/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java
@@ -46,4 +46,13 @@ public interface YAMLTemplate {
       description = "Input YAML pipeline spec file in Cloud Storage.",
       helpText = "A file in Cloud Storage containing a yaml description of the pipeline to run.")
   String getYamlPipelineFile();
+
+  @TemplateParameter.Text(
+      order = 3,
+      name = "jinja_variables",
+      optional = true,
+      description = "Input jinja preprocessing variables.",
+      helpText =
+          "A json dict of variables used when invoking the jinja preprocessor on the provided yaml pipeline.")
+  String getJinjaVariables();
 }
diff --git a/python/src/main/python/yaml-template/main.py b/python/src/main/python/yaml-template/main.py
index 5fad83470c..4898b67ff6 100644
--- a/python/src/main/python/yaml-template/main.py
+++ b/python/src/main/python/yaml-template/main.py
@@ -12,23 +12,96 @@
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations under
 # the License.
-#
+
 
 import argparse
-import logging
+import json
 
+import jinja2
+import yaml
+
+import apache_beam as beam
+from apache_beam.io.filesystems import FileSystems
+from apache_beam.typehints.schemas import LogicalType
+from apache_beam.typehints.schemas import MillisInstant
 from apache_beam.yaml import cache_provider_artifacts
-from apache_beam.yaml import main
+from apache_beam.yaml import yaml_transform
 
+# Workaround for https://github.com/apache/beam/issues/28151.
+LogicalType.register_logical_type(MillisInstant)
 
-def run(argv=None):
+
+def _configure_parser(argv):
   parser = argparse.ArgumentParser()
-  _, pipeline_args = parser.parse_known_args(argv)
-  pipeline_args += ['--sdk_location=container']
-  cache_provider_artifacts.cache_provider_artifacts()
-  main.run(argv=pipeline_args)
+  parser.add_argument(
+    '--yaml_pipeline',
+    '--pipeline_spec',
+    help='A yaml description of the pipeline to run.')
+  parser.add_argument(
+    '--yaml_pipeline_file',
+    '--pipeline_spec_file',
+    help='A file containing a yaml description of the pipeline to run.')
+  parser.add_argument(
+    '--json_schema_validation',
+    default='generic',
+    help='none: do no pipeline validation against the schema; '
+         'generic: validate the pipeline shape, but not individual transforms; '
+         'per_transform: also validate the config of known transforms')
+  parser.add_argument(
+    '--jinja_variables',
+    default=None,
+    type=json.loads,
+    help='A json dict of variables used when invoking the jinja preprocessor '
+         'on the provided yaml pipeline.')
+  return parser.parse_known_args(argv)
+
+
+def _pipeline_spec_from_args(known_args):
+  if known_args.yaml_pipeline_file and known_args.yaml_pipeline:
+    raise ValueError(
+      "Exactly one of yaml_pipeline or yaml_pipeline_file must be set.")
+  elif known_args.yaml_pipeline_file:
+    with FileSystems.open(known_args.yaml_pipeline_file) as fin:
+      pipeline_yaml = fin.read().decode()
+  elif known_args.yaml_pipeline:
+    pipeline_yaml = known_args.yaml_pipeline
+  else:
+    raise ValueError(
+      "Exactly one of yaml_pipeline or yaml_pipeline_file must be set.")
+
+  return pipeline_yaml
+
+
+class _BeamFileIOLoader(jinja2.BaseLoader):
+  def get_source(self, environment, path):
+    source = FileSystems.open(path).read().decode()
+    return source, path, lambda: True
+
+
+def run(argv=None):
+  known_args, pipeline_args = _configure_parser(argv)
+  pipeline_yaml = (  # keep formatting
+    jinja2.Environment(
+      undefined=jinja2.StrictUndefined, loader=_BeamFileIOLoader())
+    .from_string(_pipeline_spec_from_args(known_args))
+    .render(**known_args.jinja_variables or {}))
+  pipeline_spec = yaml.load(pipeline_yaml, Loader=yaml_transform.SafeLineLoader)
+
+  with beam.Pipeline(  # linebreak for better yapf formatting
+          options=beam.options.pipeline_options.PipelineOptions(
+            pipeline_args,
+            pickle_library='cloudpickle',
+            **yaml_transform.SafeLineLoader.strip_metadata(pipeline_spec.get(
+              'options', {}))),
+          display_data={'yaml': pipeline_yaml}) as p:
+    print("Building pipeline...")
+    yaml_transform.expand_pipeline(
+      p, pipeline_spec, validate_schema=known_args.json_schema_validation)
+    print("Running pipeline...")
 
 
 if __name__ == '__main__':
+  import logging
   logging.getLogger().setLevel(logging.INFO)
-  run()
+  cache_provider_artifacts.cache_provider_artifacts()
+  run()
\ No newline at end of file
diff --git a/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java b/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java
index 40b77e0d30..f96700e158 100644
--- a/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java
+++ b/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java
@@ -98,10 +98,7 @@ private void testSimpleComposite(
   }
 
   private String createSimpleYamlMessage() throws IOException {
-    String yamlMessage =
-        Files.readString(Paths.get(Resources.getResource("YamlTemplateIT.yaml").getPath()));
-    yamlMessage = yamlMessage.replaceAll("INPUT_PATH", getGcsBasePath() + "/input/test.csv");
-    return yamlMessage.replaceAll("OUTPUT_PATH", getGcsBasePath() + "/output");
+    return Files.readString(Paths.get(Resources.getResource("YamlTemplateIT.yaml").getPath()));
   }
 
   private void runYamlTemplateTest(
@@ -109,8 +106,16 @@ private void runYamlTemplateTest(
           paramsAdder)
       throws IOException {
     // Arrange
+    String inputPath = getGcsBasePath() + "/input/test.csv";
+    String outputPath = getGcsBasePath() + "/output";
     PipelineLauncher.LaunchConfig.Builder options =
-        paramsAdder.apply(PipelineLauncher.LaunchConfig.builder(testName, specPath));
+        paramsAdder.apply(
+            PipelineLauncher.LaunchConfig.builder(testName, specPath)
+                .addParameter(
+                    "jinja_variables",
+                    String.format(
+                        "{\"INPUT_PATH_PARAM\": \"%s\", \"OUTPUT_PATH_PARAM\": \"%s\"}",
+                        inputPath, outputPath)));
 
     // Act
     PipelineLauncher.LaunchInfo info = launchTemplate(options);
diff --git a/python/src/test/resources/YamlTemplateIT.yaml b/python/src/test/resources/YamlTemplateIT.yaml
index 2646e5de75..1e26ee90a4 100644
--- a/python/src/test/resources/YamlTemplateIT.yaml
+++ b/python/src/test/resources/YamlTemplateIT.yaml
@@ -3,7 +3,7 @@ pipeline:
   transforms:
     - type: ReadFromCsv
       config:
-        path: "INPUT_PATH"
+        path: {{ INPUT_PATH_PARAM }}
     - type: MapToFields
       name: MapWithErrorHandling
       input: ReadFromCsv
@@ -42,21 +42,13 @@ pipeline:
         fields:
           sum:
             expression: num + inverse
-    - type: WriteToJsonPython
+    - type: WriteToJson
       name: WriteGoodFiles
       input: Sum
       config:
-        path: "OUTPUT_PATH/good"
-    - type: WriteToJsonPython
+        path: {{ OUTPUT_PATH_PARAM }}/good
+    - type: WriteToJson
       name: WriteBadFiles
       input: TrimErrors
       config:
-        path: "OUTPUT_PATH/bad"
-
-# TODO(polber) - remove with https://github.com/apache/beam/pull/30777
-providers:
-  - type: python
-    config:
-      packages: []
-    transforms:
-      'WriteToJsonPython': 'apache_beam.io.WriteToJson'
\ No newline at end of file
+        path: {{ OUTPUT_PATH_PARAM }}/bad

From 7a3e55d8799ebf4055df954d7cb151eddb64d3ab Mon Sep 17 00:00:00 2001
From: Manit Gupta <manitgupta@google.com>
Date: Thu, 9 May 2024 12:52:22 +0530
Subject: [PATCH 31/70] Switch EventsIT and SessionIT from JSON to Avro

---
 .../DataStreamToSpannerEventsIT.java          |  77 +++++++++++-------
 .../DataStreamToSpannerSessionIT.java         |  10 +--
 .../mysql-Articles.avro                       | Bin 0 -> 2484 bytes
 .../mysql-Articles.jsonl                      |   6 --
 .../mysql-Authors.avro                        | Bin 0 -> 2154 bytes
 .../mysql-Authors.jsonl                       |   6 --
 .../mysql-Books.avro                          | Bin 0 -> 2304 bytes
 .../mysql-Books.jsonl                         |   6 --
 .../mysql-backfill-Movie.avro                 | Bin 0 -> 1938 bytes
 .../mysql-backfill-Movie.jsonl                |   2 -
 .../mysql-backfill-Users.avro                 | Bin 0 -> 1927 bytes
 .../mysql-backfill-Users.jsonl                |   2 -
 .../mysql-cdc-Users.avro                      | Bin 0 -> 2592 bytes
 .../mysql-cdc-Users.jsonl                     |   4 -
 .../mysql-statements.sql                      |  15 ++++
 .../mysql-backfill-Category.avro              | Bin 0 -> 1770 bytes
 .../mysql-backfill-Category.jsonl             |   2 -
 .../mysql-cdc-Category.avro                   | Bin 0 -> 2440 bytes
 .../mysql-cdc-Category.jsonl                  |   4 -
 .../mysql-statements.sql                      |   6 ++
 20 files changed, 74 insertions(+), 66 deletions(-)
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.avro
 delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl
 create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql

diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java
index 31c766da2a..0d07ec4b38 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java
@@ -96,7 +96,7 @@ public void setUp() throws IOException {
                 pubsubResourceManager,
                 new HashMap<>() {
                   {
-                    put("inputFileFormat", "json");
+                    put("inputFileFormat", "avro");
                   }
                 });
       }
@@ -129,8 +129,8 @@ public void migrationTestWithUpdatesAndDeletes() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE1,
-                        "backfill.jsonl",
-                        "DataStreamToSpannerEventsIT/mysql-backfill-Users.jsonl"),
+                        "backfill_users.avro",
+                        "DataStreamToSpannerEventsIT/mysql-backfill-Users.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE1)
                         .setMinRows(2)
                         .setMaxRows(2)
@@ -138,8 +138,8 @@ public void migrationTestWithUpdatesAndDeletes() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE1,
-                        "cdc1.jsonl",
-                        "DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl"),
+                        "cdc_users.avro",
+                        "DataStreamToSpannerEventsIT/mysql-cdc-Users.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE1)
                         .setMinRows(3)
                         .setMaxRows(3)
@@ -169,8 +169,8 @@ public void migrationTestWithInsertsOnly() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE2,
-                        "backfill.jsonl",
-                        "DataStreamToSpannerEventsIT/mysql-backfill-Movie.jsonl"),
+                        "backfill_movie.avro",
+                        "DataStreamToSpannerEventsIT/mysql-backfill-Movie.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE2)
                         .setMinRows(2)
                         .setMaxRows(2)
@@ -197,29 +197,29 @@ public void interleavedAndFKAndIndexTest() {
                     uploadDataStreamFile(
                         jobInfo,
                         "Articles",
-                        "mysql-Articles.jsonl",
-                        "DataStreamToSpannerEventsIT/mysql-Articles.jsonl"),
+                        "mysql_articles.avro",
+                        "DataStreamToSpannerEventsIT/mysql-Articles.avro"),
                     uploadDataStreamFile(
                         jobInfo,
                         "Authors",
-                        "mysql-Authors.jsonl",
-                        "DataStreamToSpannerEventsIT/mysql-Authors.jsonl"),
+                        "mysql_authors.avro",
+                        "DataStreamToSpannerEventsIT/mysql-Authors.avro"),
                     uploadDataStreamFile(
                         jobInfo,
                         "Books",
-                        "mysql-Books.jsonl",
-                        "DataStreamToSpannerEventsIT/mysql-Books.jsonl"),
+                        "mysql_books.avro",
+                        "DataStreamToSpannerEventsIT/mysql-Books.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, "Articles")
-                        .setMinRows(3)
-                        .setMaxRows(3)
+                        .setMinRows(4)
+                        .setMaxRows(4)
                         .build(),
                     SpannerRowsCheck.builder(spannerResourceManager, "Books")
-                        .setMinRows(3)
-                        .setMaxRows(3)
+                        .setMinRows(4)
+                        .setMaxRows(4)
                         .build(),
                     SpannerRowsCheck.builder(spannerResourceManager, "Authors")
-                        .setMinRows(3)
-                        .setMaxRows(3)
+                        .setMinRows(4)
+                        .setMaxRows(4)
                         .build()))
             .build();
 
@@ -295,8 +295,9 @@ private void assertMovieTableContents() {
 
     ImmutableList<Struct> numericVals =
         spannerResourceManager.runQuery("select actor from Movie order by id");
-    Assert.assertEquals(123.098, numericVals.get(0).getBigDecimal(0).doubleValue(), 0.001);
-    Assert.assertEquals(931.512, numericVals.get(1).getBigDecimal(0).doubleValue(), 0.001);
+    // delta value is required to compare floating point numbers
+    Assert.assertEquals(12345.09876, numericVals.get(0).getBigDecimal(0).doubleValue(), 0.00000001);
+    Assert.assertEquals(931.5123, numericVals.get(1).getBigDecimal(0).doubleValue(), 0.00000001);
   }
 
   private void assertAuthorsTable() {
@@ -307,12 +308,17 @@ private void assertAuthorsTable() {
     row.put("name", "a1");
     events.add(row);
 
-    row = new HashMap<>();
+    row.clear();
+    row.put("author_id", 2);
+    row.put("name", "a2");
+    events.add(row);
+
+    row.clear();
     row.put("author_id", 3);
-    row.put("name", "a003");
+    row.put("name", "a3");
     events.add(row);
 
-    row = new HashMap<>();
+    row.clear();
     row.put("author_id", 4);
     row.put("name", "a4");
     events.add(row);
@@ -330,18 +336,24 @@ private void assertBooksTable() {
     row.put("author_id", 3);
     events.add(row);
 
-    row = new HashMap<>();
+    row.clear();
     row.put("id", 2);
     row.put("title", "Book002");
     row.put("author_id", 3);
     events.add(row);
 
-    row = new HashMap<>();
+    row.clear();
     row.put("id", 3);
     row.put("title", "Book004");
     row.put("author_id", 4);
     events.add(row);
 
+    row.clear();
+    row.put("id", 4);
+    row.put("title", "Book005");
+    row.put("author_id", 2);
+    events.add(row);
+
     SpannerAsserts.assertThatStructs(
             spannerResourceManager.runQuery("select * from Books@{FORCE_INDEX=author_id_6}"))
         .hasRecordsUnorderedCaseInsensitiveColumns(events);
@@ -357,20 +369,27 @@ private void assertArticlesTable() {
     row.put("author_id", 1);
     events.add(row);
 
-    row = new HashMap<>();
+    row.clear();
     row.put("id", 2);
     row.put("name", "Article002");
     row.put("published_date", Date.parseDate("2024-01-01"));
     row.put("author_id", 1);
     events.add(row);
 
-    row = new HashMap<>();
+    row.clear();
     row.put("id", 3);
-    row.put("name", "Article003");
+    row.put("name", "Article004");
     row.put("published_date", Date.parseDate("2024-01-01"));
     row.put("author_id", 4);
     events.add(row);
 
+    row.clear();
+    row.put("id", 4);
+    row.put("name", "Article005");
+    row.put("published_date", Date.parseDate("2024-01-01"));
+    row.put("author_id", 3);
+    events.add(row);
+
     SpannerAsserts.assertThatStructs(
             spannerResourceManager.runQuery("select * from Articles@{FORCE_INDEX=author_id}"))
         .hasRecordsUnorderedCaseInsensitiveColumns(events);
diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java
index 2809f2b992..2ec54f2dc6 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java
@@ -85,7 +85,7 @@ public void setUp() throws IOException {
                 pubsubResourceManager,
                 new HashMap<>() {
                   {
-                    put("inputFileFormat", "json");
+                    put("inputFileFormat", "avro");
                   }
                 });
       }
@@ -116,8 +116,8 @@ public void migrationTestWithRenameAndDropColumn() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE,
-                        "backfill.jsonl",
-                        "DataStreamToSpannerSessionIT/mysql-backfill-Category.jsonl"),
+                        "backfill_category.avro",
+                        "DataStreamToSpannerSessionIT/mysql-backfill-Category.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE)
                         .setMinRows(2)
                         .setMaxRows(2)
@@ -140,8 +140,8 @@ public void migrationTestWithRenameAndDropColumn() {
                     uploadDataStreamFile(
                         jobInfo,
                         TABLE,
-                        "cdc1.jsonl",
-                        "DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl"),
+                        "cdc_category.avro",
+                        "DataStreamToSpannerSessionIT/mysql-cdc-Category.avro"),
                     SpannerRowsCheck.builder(spannerResourceManager, TABLE)
                         .setMinRows(3)
                         .setMaxRows(3)
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.avro
new file mode 100644
index 0000000000000000000000000000000000000000..01421913ae369ac9a7a5b0b9f603a78d42f76ae5
GIT binary patch
literal 2484
zcmdUvO^ee&7{|L|sVjI8l?88-bF)qQw#nW+2zu}Z-P=;aWM;CRb&`oQ6QL}HqA0j$
z4}Jr|gXm}Q=EvBR_yQ{IMLamOO{dwmTPt|b913av&piM6J^z{8(cX%+4jyv0Az%+D
z;NarEE&Ca^w``6f<H)YrDM<7?^}?%M5=gO7f5rq;B<!u7{qgraCt6)p3&}~kYY%GX
zJI4Tpk|bD2kYq+_`cbCzHJh?s0s-9~nJ{`MA(Rqf4@xTvmUD>z?wTyx!%!M`_9^>F
zu<Wsg+f{`aB;hf<1yn}Z!~3|usHb3DYK<m80&K7W&VjBa5+`~dW5=4L0@|GeOqmA=
zNHDwD^vnywrGnYHOAt|PmKwJ&jY+qPfLN%>I7tBSFSke2qtryjCX7kR$c~-nlqz4v
z1iMq>j0qwMOO10=P;i?h7xqB9t8%6zIp%NFbCxF}M3`cUCjl|?h%t&ms^)THG8yPo
z1}1tl-sy=<gtL<<oj=o$q|)xzY=mQwQ)zE$C?ygn;%SP;dpf-?%OiCfdl-dky0cV_
z{ofvH$eB9S3mk%6oirXUy*GumJE;8pGI)M=BY1rCtxG$XUv95`y>LBhL~*Ct3LF$;
z-)UjwI{}O%r;VHL@#DGfhu2TueS&Wehw68|bmLt~`r-j~Ac9UrQWdrrD;a0{su1;d
ztKIVapzQ~4yG|JdI#!~d3kTv<azMTM=rR!XJ;73a+v+2s<3wbhX@GYcuHR{GHkz*I
zxy=A=MuCTrkNwVO5JMC-J3jJ#4E?G#@>VGfDKi_|yVtT_eLhn;Kfa+?sb{ScRI%pj
ztaCSR-Me;o+p1WWb*ZRPi&(?qvqy(VPpg)-3@z_JrgiXbIa*3rqP1G0)%Y`7UXiWg
z_kTBABX4uE_3>yqT1r=<RV~qKO=ztxku9&t)_+Os{g35nDP4)yT8UPBLaRD`mHh%A
C=R`FC

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.jsonl
deleted file mode 100644
index 09cedb0227..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.jsonl
+++ /dev/null
@@ -1,6 +0,0 @@
-{"uuid":"333e6c72-6264-4ff6-9a41-b57a00000000","read_timestamp":"2024-02-09T07:11:07.563Z","source_timestamp":"2024-02-09T07:10:40.000Z","object":"l1_Articles","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"88e9342b3791cdace58f61fa5f298d3e7a70f84d","sort_keys":[1707462640000,"mysql-bin.000013",6215094],"source_metadata":{"table":"Articles","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6215094,"change_type":"INSERT","is_deleted":false},"payload":{"id":1,"name":"Article001","published_date":"2024-01-01T00:00:00.000Z","author_id":1}}
-{"uuid":"333e6c72-6264-4ff6-9a41-b57a00000001","read_timestamp":"2024-02-09T07:11:07.563Z","source_timestamp":"2024-02-09T07:10:40.000Z","object":"l1_Articles","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"88e9342b3791cdace58f61fa5f298d3e7a70f84d","sort_keys":[1707462640000,"mysql-bin.000013",6215398],"source_metadata":{"table":"Articles","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6215398,"change_type":"INSERT","is_deleted":false},"payload":{"id":2,"name":"Article002","published_date":"2024-01-01T00:00:00.000Z","author_id":1}}
-{"uuid":"333e6c72-6264-4ff6-9a41-b57a00000010","read_timestamp":"2024-02-09T07:11:07.563Z","source_timestamp":"2024-02-09T07:10:40.000Z","object":"l1_Articles","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"88e9342b3791cdace58f61fa5f298d3e7a70f84d","sort_keys":[1707462640000,"mysql-bin.000013",6215702],"source_metadata":{"table":"Articles","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6215702,"change_type":"INSERT","is_deleted":false},"payload":{"id":3,"name":"Article004","published_date":"2024-01-01T00:00:00.000Z","author_id":4}}
-{"uuid":"333e6c72-6264-4ff6-9a41-b57a00000011","read_timestamp":"2024-02-09T07:11:07.563Z","source_timestamp":"2024-02-09T07:10:40.000Z","object":"l1_Articles","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"88e9342b3791cdace58f61fa5f298d3e7a70f84d","sort_keys":[1707462640000,"mysql-bin.000013",6216006],"source_metadata":{"table":"Articles","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6216006,"change_type":"INSERT","is_deleted":false},"payload":{"id":4,"name":"Article005","published_date":"2024-01-01T00:00:00.000Z","author_id":3}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700000101","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:38.000Z","object":"l1_Articles","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"88e9342b3791cdace58f61fa5f298d3e7a70f84d","sort_keys":[1707462698000,"mysql-bin.000013",6217858],"source_metadata":{"table":"Articles","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6217858,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":3,"name":"Article003","published_date":"2024-01-01T00:00:00.000Z","author_id":4}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700000110","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:38.000Z","object":"l1_Articles","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"88e9342b3791cdace58f61fa5f298d3e7a70f84d","sort_keys":[1707462698000,"mysql-bin.000013",6218162],"source_metadata":{"table":"Articles","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6218162,"change_type":"DELETE","is_deleted":true},"payload":{"id":4,"name":"Article005","published_date":"2024-01-01T00:00:00.000Z","author_id":3}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.avro
new file mode 100644
index 0000000000000000000000000000000000000000..0b050282ecf538c83271ba46c3e67478b5f0a4da
GIT binary patch
literal 2154
zcmds%KW`H;7{=36R22!K3LyqY&$;HD+=ZkY0}@CG5Ok}e(|bN&>ehEYY#&lo<ukB$
zWJEB)fI6^%#KP1s0v`gqNt|m^q6|z;r06bw-ar05ul*u9-tz9lTPgMwIifjS?4BY$
zE-Bhak`f_P)ItU1_MPMSQKgTBROY{oQJyNae|S1yt}155vtXqqD@JJ2a_f>p8f%tQ
zr6Dh!(v{=F=v#=35hK74rad^lpEJ&xLX%oXB`Qhi|II0qV@kB^4*g>vIW-{GZr21-
z$m2_PYp6`A$0u~WuBH-FTa6_@1?;ealwf0-rn&WF-mxWVfDW$#W}1f-G^{3DwtPmo
z7O?8OhJ;g>YF@r(lP#5iTB|7~%R!De+oP*dD`Lhb&RDIHLsV3po4#fa4wuBGP)svX
zxXxX`jN4*z$q^JIGiP=r=lq@en)FP?Ddkkt#eg_@A_S*U7+<b9Q^J@FSm>R-%Z@CB
zt4_e>>5OZ~(3NF{M#C0LX$BS7Xy3A_0l6}zE@-~B<v7#%aMC#c_3{0?TMs&;XWsWO
z7k@tNob5gd`aPQT)6^#zXMRXKfe)CZey`sPgW2cnw@*KRlOLb$@4dyHvX_t58}27S
zPK=M-&rFADm6vB*r4;r|s&*7cA?^>N{ve3jToADSsdlA&LJKW{<M#B_srIoFg}v46
z>{;KKu1fPvUnU7AWDrFZIzgCV(_Al$x>?W<AL0PhL1)l`Ue@hyd(#dZ4X@F-GrRYi
z9D8>iWwT<wx1T<L{rJ^^dGfp_;7t(V^P7sGX%KWaK!AfAi(uOz=x&1G=1#$mK@cuq
G4}SrcYtt40

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.jsonl
deleted file mode 100644
index 6fd6f4ef82..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.jsonl
+++ /dev/null
@@ -1,6 +0,0 @@
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700001010","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:12:13.000Z","object":"l1_Authors","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"6be9e712b0411a4579651372797888aac9a67d79","sort_keys":[1707462733000,"mysql-bin.000013",6219409],"source_metadata":{"table":"Authors","database":"l1","primary_keys":["author_id"],"log_file":"mysql-bin.000013","log_position":6219409,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"author_id":3,"name":"a003"}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700001011","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:12:13.000Z","object":"l1_Authors","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"6be9e712b0411a4579651372797888aac9a67d79","sort_keys":[1707462733000,"mysql-bin.000013",6219697],"source_metadata":{"table":"Authors","database":"l1","primary_keys":["author_id"],"log_file":"mysql-bin.000013","log_position":6219697,"change_type":"DELETE","is_deleted":true},"payload":{"author_id":2,"name":"a2"}}
-{"uuid":"8a8fcd86-606c-4302-958d-21a700000000","read_timestamp":"2024-02-09T07:09:56.837Z","source_timestamp":"2024-02-09T07:09:33.000Z","object":"l1_Authors","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"6be9e712b0411a4579651372797888aac9a67d79","sort_keys":[1707462573000,"mysql-bin.000013",6213926],"source_metadata":{"table":"Authors","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6213926,"change_type":"INSERT","is_deleted":false},"payload":{"author_id":1,"name":"a1"}}
-{"uuid":"8a8fcd86-606c-4302-958d-21a700000001","read_timestamp":"2024-02-09T07:09:56.837Z","source_timestamp":"2024-02-09T07:09:33.000Z","object":"l1_Authors","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"6be9e712b0411a4579651372797888aac9a67d79","sort_keys":[1707462573000,"mysql-bin.000013",6214214],"source_metadata":{"table":"Authors","database":"l1","primary_keys":["author_id"],"log_file":"mysql-bin.000013","log_position":6214214,"change_type":"INSERT","is_deleted":false},"payload":{"author_id":2,"name":"a2"}}
-{"uuid":"8a8fcd86-606c-4302-958d-21a700000010","read_timestamp":"2024-02-09T07:09:56.837Z","source_timestamp":"2024-02-09T07:09:33.000Z","object":"l1_Authors","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"6be9e712b0411a4579651372797888aac9a67d79","sort_keys":[1707462573000,"mysql-bin.000013",6214502],"source_metadata":{"table":"Authors","database":"l1","primary_keys":["author_id"],"log_file":"mysql-bin.000013","log_position":6214502,"change_type":"INSERT","is_deleted":false},"payload":{"author_id":3,"name":"a3"}}
-{"uuid":"8a8fcd86-606c-4302-958d-21a700000011","read_timestamp":"2024-02-09T07:09:56.837Z","source_timestamp":"2024-02-09T07:09:33.000Z","object":"l1_Authors","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"6be9e712b0411a4579651372797888aac9a67d79","sort_keys":[1707462573000,"mysql-bin.000013",6214790],"source_metadata":{"table":"Authors","database":"l1","primary_keys":["author_id"],"log_file":"mysql-bin.000013","log_position":6214790,"change_type":"INSERT","is_deleted":false},"payload":{"author_id":4,"name":"a4"}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.avro
new file mode 100644
index 0000000000000000000000000000000000000000..79c690553a5fcec9a287ecd994f2b3bca8455128
GIT binary patch
literal 2304
zcmdUvF>ljA7=}|TDiK16E{GA`#1J=eY`0E@7?6-)0R-JjWS!6F_S&=0+?|o4sspMj
zA;iFh*qB%=#4jjfLHr8Dgv7wk0(VV(F=-k+FwjVelKB1Zd+&L_yL-|0f^``_;cQu;
zEu6ylOS?c0a}3r1$B6M5v_S?_{hK=BO~!VF`X?cn#saKu?3NFUf@tZuR7g&;O|aKC
z&pC#1C`pQiglTS+rWs{Q-v*Rz5(Me`$br$@DWQ}Iuvb}8u!1A}Z`WkeHb&C81E=gG
z#d3?y-L5LcFb$8%&7m@49`4}byq<z_sWqDX2(ZBhIfuHINSx|CO2?X{0y>xhOqqu<
zl<?$Y(}9l(R|-z%E@4EmS*mPbiAlGLpqQ)4IY}WO*4v}$QE8&aCQL}h$Ogy?N|mp1
zf`ch>&IFN!WyZNF7;~FUF4}_GrkXQ7k|lqmp0PX;VT>u3crqYH9x+BS%v3IaB$LA-
zWpJW5@lGc)5uQw<a{f#|l1VUVgBT~Upc1TUC>0Wt$a&08FNB3U06eVSQtfuI*Z3@Z
zkAB?v@aEw8wJUEwUEgngS-KtdqGZMO`gWY)!1iz)*nN~lwvXMeemUy>?eO5^cXagb
zsrtNB-a=Q?p?FMf6eBw#nHqrC3pr={_==A2`JNN>{h;6VJCq@)qZXY)*a&BmL+W%!
z2UK*n1<UmHr^i%BOk|O(@A$rlV(3SK+h6sf0O79Zcq@V5i`*n}T`y1%yQ|J><Stqx
zV~w#PWoG%<xM#ikb*^##_{wZj$7+(eVa*g-7w+7Bc<aHs)vzq<qAuumeQU7}8|SRD
z`MTfwI~ygcuxXZTdbQX%XLfE5pVeTaL=`rRB^%Fb)tDP+JU1_o{?juxGB)E({d`e_
RjS^MZv`RL<)ttV9egkQS{d)ia

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.jsonl
deleted file mode 100644
index 31a8bf7ca9..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.jsonl
+++ /dev/null
@@ -1,6 +0,0 @@
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700001001","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:58.000Z","object":"l1_Books","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"d3b1894b2bc5e165c66dbe07e7afea58409c1008","sort_keys":[1707462718000,"mysql-bin.000013",6219100],"source_metadata":{"table":"Books","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6219100,"change_type":"DELETE","is_deleted":true},"payload":{"id":4,"title":"Book003","author_id":2}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700000000","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:23.000Z","object":"l1_Books","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"d3b1894b2bc5e165c66dbe07e7afea58409c1008","sort_keys":[1707462683000,"mysql-bin.000013",6216300],"source_metadata":{"table":"Books","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6216300,"change_type":"INSERT","is_deleted":false},"payload":{"id":1,"title":"Book005","author_id":3}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700000001","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:23.000Z","object":"l1_Books","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"d3b1894b2bc5e165c66dbe07e7afea58409c1008","sort_keys":[1707462683000,"mysql-bin.000013",6216594],"source_metadata":{"table":"Books","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6216594,"change_type":"INSERT","is_deleted":false},"payload":{"id":2,"title":"Book002","author_id":3}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700000010","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:23.000Z","object":"l1_Books","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"d3b1894b2bc5e165c66dbe07e7afea58409c1008","sort_keys":[1707462683000,"mysql-bin.000013",6216888],"source_metadata":{"table":"Books","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6216888,"change_type":"INSERT","is_deleted":false},"payload":{"id":3,"title":"Book004","author_id":4}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700000011","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:23.000Z","object":"l1_Books","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"d3b1894b2bc5e165c66dbe07e7afea58409c1008","sort_keys":[1707462683000,"mysql-bin.000013",6217182],"source_metadata":{"table":"Books","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6217182,"change_type":"INSERT","is_deleted":false},"payload":{"id":4,"title":"Book005","author_id":2}}
-{"uuid":"d5df0680-2403-4ce1-8a98-e0d700001000","read_timestamp":"2024-02-09T07:12:17.249Z","source_timestamp":"2024-02-09T07:11:58.000Z","object":"l1_Books","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"d3b1894b2bc5e165c66dbe07e7afea58409c1008","sort_keys":[1707462718000,"mysql-bin.000013",6218806],"source_metadata":{"table":"Books","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6218806,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":4,"title":"Book003","author_id":2}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.avro
new file mode 100644
index 0000000000000000000000000000000000000000..195a491cd84a26a53f92eb9b18772422dfc9dcdd
GIT binary patch
literal 1938
zcmdT_zi-n(7<H{seyD{?ASMLJEDUiIC$Z~fM=Aq9L@R_;Md5tDv{%kPb9b&3sz@v>
zj0g}TNGwc@ObDs!0%C!MfxiGKf(bEHg1aWUxJ{FGU?D@4IPbgfeeb>dawS+EGf#pQ
zMyEMkLJ`<FzG{h9f~=aw5Tq=$@>UEY^_@74OLPSz`J{;v3At5USnd6ordUC{rCcx^
zFIw%q{+%HZHUy3k7a&SBQa2-1;`0`vix>hj-(jHfc7zGR+-he|<TPav{o6BXu#BM4
z?5rVkL`W>rk>koj1fs^4awAmfXx_MmTBCY$%7nsj4brj!#(=8D0!7M?9%H4X6tupJ
zV3IrtfdGSt?KHfraHe3;cL4&T5;4?x)hYmdBqm`v0<5*a8G9pUN0YH>G;xN=f)%HP
zNM2nB>zUvva6&m2n8up9F6ipkPcB>n@uJL`iezt2jo#&W%o`yhNT7Z|G(4b`AP`Gm
zwmFl47NMYz*Y@soqz@c)B6I(AKO76Ip0`5O1St_#O^K3GVp}|QQE#T<bpQ&=2E@>;
zgO)%ltJ^Pgh@f;?d6LMnFjwl=JXcyIFTC6@<zjmTRk=2pn3wxwc-uW!2xL^Qkbx%B
zihA2PdhX2BvtN@ZUTz-S_<3jcPww5}Sw8^oOtl%>GrnK4D_{oLei?dp(J$)P`s252
z-{8{^Yw~@nw<6C*Eq<HWFobr1W2wWFv4l~zTX?~(xE05*x_-6j76^qv<&YOrZbK9c
z28dJWY!6;o<}_BjS=}L(KP=KjenwY42wksOEO`i3i%s8gN~m0hzK7&v=sUg#9q7BB
zKW=u28C^n>%(P<eRLqT62aO|J`#eV+GlxUN+!bU_T)aGY;o7`un5H=qsXqY6JZ;{5
zG0Em8Z#3&a4jG1F-rfB8<<tA$2lgYG^S?@V|Mfm3lc0=bxvpd-^OXJl`h&0M*`v<|
RvZdI=_4Vg(?v3xv-Cv&Rj@keK

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.jsonl
deleted file mode 100644
index bc4bda793b..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"uuid":"9acc37c9-38c5-49aa-b04a-dc9800000000","read_timestamp":"2024-01-07T14:52:35.830","source_timestamp":"2024-01-07T14:51:43.000","object":"l1_Movie","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"4cf566b2ebe53e2b0e226688d9968fa9495faa2c","sort_keys":[1704639103000,"mysql-bin.000011",74961416],"source_metadata":{"table":"Movie","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74961416,"change_type":"INSERT","is_deleted":false},"payload":{"id":1,"name":"movie1","actor":123.098,"startTime":"2023-01-01T12:12:12.000"}}
-{"uuid":"9acc37c9-38c5-49aa-b04a-dc9800000001","read_timestamp":"2024-01-07T14:52:35.830","source_timestamp":"2024-01-07T14:51:43.000","object":"l1_Movie","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"4cf566b2ebe53e2b0e226688d9968fa9495faa2c","sort_keys":[1704639103000,"mysql-bin.000011",74961751],"source_metadata":{"table":"Movie","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74961751,"change_type":"INSERT","is_deleted":false},"payload":{"id":2,"name":"movie2","actor":931.512,"startTime":"2023-11-25T17:10:12.000"}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.avro
new file mode 100644
index 0000000000000000000000000000000000000000..3996dd9a669274f1ee248549df81f3e769ec16e6
GIT binary patch
literal 1927
zcmds%zl#$=6vuN*JVX>ML=G&J#WH)z=CZluuu&BJ0V;YHa)j{aM{<*!-PxO&JqX7^
zun_!FXlG;Ve_$tKDYiD&mfF||md;#~NiM&%urh&Ve|+D(H=pmDcL#NrytA+;#j3*F
zG=?u{_I=$=slVY%iiIS8!A~Hz&pd2CP*f`Oro|{H%HP=9AAZa-W~GBrr6o(+{z1Wg
zmlQ}-vzRIkaq5&V85c%h@VRI+41BX+fzt;u<D4n~U~EMtGKuNGT~nYA#oATp&)G*z
z^|n~JT@y$lZXS|bK&8*TxkI~)aw;LU)mZX<zz!Qo2{x8#8e2bx6<d-9Xk!L2Q#=sR
zFumBM;RC{B0n@%~K%BazhUv#_vZ)ZLg_=~d7-aYF_ULkqB{6N&Y_YLIw)`aH+|)I2
zurVP{g<_hC#8vJB2HZxSi?<<Zn?AEGIppuuGnQwnNhqh9jvB<tkr13hVthH`ObT5t
zV5E2UPC7CYPCGGn{#-tmXunbL32i~fwZCDZjFC9fo-Sy(r}=Tvo`IqdZhnx;kiwFo
zSzP`Un{oqfF8l>KHAsCG^mMrn@nPykg9l4t2G0~lV_@STw{o$x+Ij!>&3XC!)VbZ8
zpYtD&U$4L#L8x8{0AmtVs^wNtqY(<KghZvm@9E3e@4n+7&+PT|aImh%UA4;t1bD|Z
zGY?wEEt19QtEE&5JD5~4szjBrR*!1+QdHyugKc6}%v6ABq9yRK*dINr*ij;}gWVoL
z+q_I?skx_jLsAA5B24HStyRNPxx8MlwGg3Xy&RT96je*5bsUwKy?%YUoR`a;I2`?X
z9(s93%nS2BdE@r|Yxg$Io#(A=n!!%xg<F|9$lfLU?y}eGef#j@)30NHLnQpah~yoS
Zhi2IBc~=Z0bLsV-Jo@_c@$zKb{068OhL`{V

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.jsonl
deleted file mode 100644
index e56e506a9d..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"uuid":"587474a5-9a77-422e-9fd6-74ef00000000","read_timestamp":"2024-01-07T14:32:39.849","source_timestamp":"2024-01-07T14:31:55.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704637915000,"mysql-bin.000011",74881839],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74881839,"change_type":"INSERT","is_deleted":false},"payload":{"id":1,"name":"Tester Kumar","age":30,"subscribed":0,"plan":"A","startDate":"2023-01-01T00:00:00.000"}}
-{"uuid":"587474a5-9a77-422e-9fd6-74ef00000001","read_timestamp":"2024-01-07T14:32:39.849","source_timestamp":"2024-01-07T14:31:55.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704637915000,"mysql-bin.000011",74882155],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74882155,"change_type":"INSERT","is_deleted":false},"payload":{"id":2,"name":"Tester Yadav","age":33,"subscribed":1,"plan":"B","startDate":"2024-01-01T00:00:00.000"}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.avro
new file mode 100644
index 0000000000000000000000000000000000000000..844e0ec20a4a9b0bb85cedf122bfada1df1e1e43
GIT binary patch
literal 2592
zcmdUw!D|yi6voq`)*=N(M8s3sn~-g?X|_#!vnIAe(W2==O9`{HlXmLt&N?$&2&GVp
z2;QV8J?TMv5W#=IO7X8KUKH`BC&7zCai*J0(<F^Kc(6HyB>TO2-~QgWFSopvK5!AP
zayBB+G7jM0g*8joB5X}s93#ekD{X}^(BG+3nHQJ~^-Gmt>I-XfVXbj7j)|7mONHbl
zT(Z{F<~qmFuSgPLAz=_1rD;Z)(x)xTmI#8hoH#IgJ0O%2VXZfvC|Jx9{<mi`Zv`W1
z+*!xmM}Xxr>w3GY5W}EyK&}gwgn8vYu664v7?)b3$tQpfHpn^DwM61T&!gd3lT<(#
zI{;JWp${c&Kdg1&b;3;r?Q@sVqu3nPXx}s@-O7Wat0v+kfV|e*J(?a(O;l_uRnlZ+
z!3twam9P2)7hA*;6GRdg8t0~<&h0R{Xc>k}DrY*94gN;m;dmk{KBidW!+;pM#~8&h
zRC75pnF!V>gNJ%E-qwj63fm{q^!%BAB$U=-+VXJ~##CC98cGw1Bk?pvje9!24lZ>-
zk$^kCNW`8%oOqqM{H->k3fi&oyY!Th%OaHRb`9c#*4-8l;!G``Hi~M1i|eVg4_!R)
zKHRxJv$r<Qx33KTIzKatvehcYp6xkf*v_K@vL_rrZx<Zb&DLLAU$;MeLr=axSMTQ<
zi*-1ti3ik1KC(R$s!Tuai#XF8No3r-n|BHmZeb$pW++2Yr&nZRVIv$$4ylt#c8<ub
z2o~y%uAe-eS|Z~}ea3T+pM##`As>(8!kClI<;EupRnN!%c+SZ=o;#M!jw3fW01{)V
zx*=s|MEiE~VE4u8)R}{Sfqv?Me&VM<M-dn*mS#)k66gc{;8Iy_eawgNs=c!crhs})
zfsKvFn_qrD8t4U=b5e1=dDR0hB?3b?Zr&-~E`t;p1VaffFsnfo6{!8)cRQQCfO4`4
zsL#Lu(_AISI^^o*>mEQUQ4^>EldI`Ck`T-&c;K4C^=#|or~Oks<jSe%>g3`2@wNwC
aN(3&?&lRthOSXwuD>YwJyJx7i?z2Bsr)3cU

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl
deleted file mode 100644
index d433a51c16..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl
+++ /dev/null
@@ -1,4 +0,0 @@
-{"uuid":"da9f3332-bcce-491c-924e-849200000000","read_timestamp":"2024-01-07T14:35:00.649","source_timestamp":"2024-01-07T14:34:39.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704638079000,"mysql-bin.000011",74893116],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74893116,"change_type":"DELETE","is_deleted":true},"payload":{"id":2,"name":"Tester Yadav","age":33,"subscribed":1,"plan":"B","startDate":"2024-01-01T00:00:00.000"}}
-{"uuid":"da9f3332-bcce-491c-924e-849200000001","read_timestamp":"2024-01-07T14:35:00.649","source_timestamp":"2024-01-07T14:34:39.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704638079000,"mysql-bin.000011",74893475],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74893475,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":3,"name":"Tester Gupta","age":50,"subscribed":0,"plan":"Z","startDate":"2023-06-07T00:00:00.000"}}
-{"uuid":"da9f3332-bcce-491c-924e-849200000010","read_timestamp":"2024-01-07T14:35:00.649","source_timestamp":"2024-01-07T14:34:39.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704638079000,"mysql-bin.000011",74893785],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74893785,"change_type":"INSERT","is_deleted":false},"payload":{"id":4,"name":"Tester","age":38,"subscribed":1,"plan":"D","startDate":"2023-09-10T00:00:00.000"}}
-{"uuid":"587474a5-9a77-422e-9fd6-74ef00000010","read_timestamp":"2024-01-07T14:32:39.849","source_timestamp":"2024-01-07T14:31:55.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704637915000,"mysql-bin.000011",74882471],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74882471,"change_type":"INSERT","is_deleted":false},"payload":{"id":3,"name":"Tester Gupta","age":36,"subscribed":0,"plan":"C","startDate":"2023-06-07T00:00:00.000"}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql
new file mode 100644
index 0000000000..1f356f8b62
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql
@@ -0,0 +1,15 @@
+### CDC + Backfill for insert, delete and updates
+# First wave of INSERT
+Insert into Users values(1, 'Tester Kumar', 30, 0, 'A', '2023-01-01'); Insert into Users values(2, 'Tester Yadav', 33, 1, 'B', '2024-01-01');
+
+# Second wave of INSERT, UPDATE, DELETE
+Delete from Users where id=2; Insert into Users values(4, 'Tester', 38, 1, 'D', '2023-09-10'); Insert into Users values(3, 'Tester Gupta', 36, 0, 'C', '2023-06-07'); Update Users set age=50, plan='Z' where id=3;
+
+### Insert only
+# First wave of INSERT
+Insert into Movie values(1, 'movie1', 12345.09876, '2023-01-01 12:12:12'); Insert into Movie values(2, 'movie2', 931.5123, '2023-11-25 17:10:12');
+
+### Foreign key constraint test
+Insert into Authors values(1, 'a1'); Insert into Authors values(2, 'a2'); Insert into Authors values(3, 'a3'); insert into Authors values(4, 'a4');
+Insert into Articles values(1, 'Article001', '2024-01-01', 1); Insert into Articles values(2, 'Article002', '2024-01-01', 1); Insert into Articles values(3, 'Article004', '2024-01-01', 4); Insert into Articles values(4, 'Article005', '2024-01-01', 3);
+Insert into Books values(1, 'Book005', 3); Insert into Books values(2, 'Book002', 3); Insert into Books values(3, 'Book004', 4); Insert into Books values(4, 'Book005', 2);
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.avro
new file mode 100644
index 0000000000000000000000000000000000000000..9f4d33df58593eac44cd55d1395c2cb803700ff4
GIT binary patch
literal 1770
zcmds1J#P~+7*0=BgaiWusUw5uT$3xg<kA%(!N7;0TNRyN`+TWe-}$h86`_jMZu|n&
z6^4oh2~i2LurabA@h=$IU|_)}iE~Zb(t(Mb6eZ{9{d%6~^#i=U<gLK26swAC(F|T)
z+e3O-P_&LDB|;{sfpW;~e;&lQK+}PcL-R6alqU+UZ|+Uc%aU32BwA_7@&Ot(+_|KX
z#F}MPX~+tPboIC}_y*!)zzFb-aT5;jXN+^E(5RA8iBb~!Z*vN~O^J5hp)=;lsNNFi
zj%xxbWbq&J&S7Otd%Qy}sb_Mhgwz(J9LUiIQi6?Tnr2pyX~&jHLumgL!c6m!fQH3n
z^O{c>t^_RVt^srEQccTOWU{3&sB<xeWEsfe#b$IhDn*QK;*?by*+hBCx#?@7V1Eu=
z2*orLx$E2oOn93)mux{kFg~-EoQmYor=({pPAI3E&J5z<SO`ubH@ciqrhp+AFvB}}
z=M|X&7nLBB^^7Zv=nAu3qkaP=G=-9Dv~Jm{P;$bjE@(R3*>N0{j?2PmyvU6k;;*Ys
zq%f<!(4L{FKdK$p-ha4<4?bT0`XjyZ<$8Vj)1^CUr`PSS!I~dn-0|BH34Q26;-|RP
zZB0H$N3TDBBX3{W@0H5!wVDmp6YgUmJH|2GPYn}EnH7gig%o!CRWodd?I7xfQLhy?
zxiG{Ur>JJBd_r?Afd|d;eyirT61lxVt;?-Lm>#q7FiAVOO}Zou!<!)uIymXBMeQgF
zS`mpL>XM{QahTw`H*T?3^J=xL(<^qF2i{6WP;+RWD|hcdzWr#!{PMi|)8R93|K<0e
e@7_GGUxbk0zb0has{<z9{;%VMV?)TiX?_Elo?CSQ

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.jsonl
deleted file mode 100644
index babd32f55b..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"uuid":"a49f1ab1-e39e-4f82-a1af-dd3f00000000","read_timestamp":"2024-02-06T12:27:45.602Z","source_timestamp":"2024-02-06T12:27:45.000Z","object":"it_Category","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707222465000,"",0],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"category_id":1,"name":"xyz","last_update":"2024-02-06T12:19:37.000Z"}}
-{"uuid":"a49f1ab1-e39e-4f82-a1af-dd3f00000001","read_timestamp":"2024-02-06T12:27:45.602Z","source_timestamp":"2024-02-06T12:27:45.000Z","object":"it_Category","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707222465000,"",0],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"category_id":2,"name":"abc","last_update":"2024-02-06T12:19:47.000Z"}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.avro
new file mode 100644
index 0000000000000000000000000000000000000000..1a1745d20ea649a799f815c7af695208e7b99957
GIT binary patch
literal 2440
zcmdUvF=!J}7{}8`2^l(wLI<aEf*|CYTzl7CyNL}bidKzN3CFv4FUhO#?w$8u#8?V(
zbJ3+Bf|ITysNK3c=%7;<K|46;VnGK%K?mQN%j-2sYYGnbhQQ^%@4f%~{r}&);4Y1T
zad<sqlN>d$4<8*~)<i4B+ME_)#3E12YXS7*cWOANpulw&wd9{Qf~m)~xy9w~_hysC
z)k!xO5ee$rN?v`BF!UTje9Q&(LshBvqfFN4HOlG)L0ajEQ1!Y`C?#B5Nv+6PGeY>^
zuF2dbjD%9B?Q#wuiw4{CxUvyL-}wV?4=Nq(&Q%;k-IX0@k%()!DpJ^B6v4Qc2<*px
zbQQ-WCD7^sU{XBvpn(00^?IHVPBrxVE}%=XTB^H!%BFZL7xF!8!ie}VY7KWrwMS|b
z85^fYQbHEBph>CJl{i@KDGnJYg0Mg-R}Bfb?c|~c4C*pxaU{DYsp<jC6YhAJVu7~<
zqRL&yD29RbWrs5%Y*7Zc>y^E|j%*k9JAsn%bW($;HYS14s(H=BHQ1y=n~UkBknG5*
zYU&<t_dW`_aGIg?xSx$W#9v1nMNE!%pFD}Ex{^6NvGsB5-Ep)Ue!F-qJh7Q;A35!M
z2pUD?=`*%FrJE%S>Mn*Q-L}o?V)Cs2xcBBWdir`*{*I?^uL-}!uTdR&NOwsfgL8i*
zj95JOykMD@Y1p%tJzKO2lu4p8BD~P#I>Ld7AT<h|!RCb}&Vu;njN=_ggNP1>1+O;a
zny7><%bK>ZG2?orDciI?qi7==+9l+f*tI-23p&~|i6ALvGL~+b;L}DfbEtdUj#2}R
zr@YO8fem16zI?7+DT5IJK=wxKCTKtW^8NXf+u31g83&lw`rCcdk_}+&%=ydZOBIj-
zIgqV42xx!3^WfF9p=cS!4z2e4|2JD5Z3D7(cXOY#WJ8Kp7G&ie0@@od9={xtR+6m)
nN$b~#ebSN*;OLcw`ID8ht|B%Ha?nKvX#ZThwI(OB_e%Q>8|Oox

literal 0
HcmV?d00001

diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl
deleted file mode 100644
index 7825ca6e74..0000000000
--- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl
+++ /dev/null
@@ -1,4 +0,0 @@
-{"uuid":"05111f37-1fcc-48b2-99bd-8f1c00000000","read_timestamp":"2024-02-07T08:11:50.054Z","source_timestamp":"2024-02-07T08:10:55.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293455000,"mysql-bin.000029",9612272],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9612272,"change_type":"DELETE","is_deleted":true},"payload":{"category_id":1,"name":"xyz","last_update":"2024-02-06T12:19:37.000Z"}}
-{"uuid":"3faa3ec0-cedc-4a93-b9ce-50e000000000","read_timestamp":"2024-02-07T08:13:00.421Z","source_timestamp":"2024-02-07T08:12:32.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293552000,"mysql-bin.000029",9612570],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9612570,"change_type":"INSERT","is_deleted":false},"payload":{"category_id":4,"name":"ghi","last_update":"2024-02-07T08:12:32.000Z"}}
-{"uuid":"359901f6-96bc-4f68-9f6c-7d4500000001","read_timestamp":"2024-02-07T08:10:39.444Z","source_timestamp":"2024-02-07T08:10:35.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293435000,"mysql-bin.000029",9611982],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9611982,"change_type":"INSERT","is_deleted":false},"payload":{"category_id":3,"name":"def","last_update":"2024-02-07T08:10:35.000Z"}}
-{"uuid":"359901f6-96bc-4f68-9f6c-7d4500000000","read_timestamp":"2024-02-07T08:10:39.444Z","source_timestamp":"2024-02-07T08:10:15.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293415000,"mysql-bin.000029",9611684],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9611684,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"category_id":2,"name":"abc1","last_update":"2024-02-06T12:19:47.000Z"}}
\ No newline at end of file
diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql
new file mode 100644
index 0000000000..0ce47716c1
--- /dev/null
+++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql
@@ -0,0 +1,6 @@
+### CDC + Backfill for insert, delete and updates
+# First wave of INSERT
+Insert into Category values(1, 'xyz', '2024-02-06T12:19:37'); Insert into Category values(2, 'abc', '2024-02-06T12:19:47');
+
+# Second wave of INSERT, UPDATE, DELETE
+Delete from Category where category_id=1; Insert into Category values(4, 'ghi', '2024-02-07T08:12:32'); Insert into Category values(3, 'def', '2024-02-07T08:10:35'); Update Category set name='abc1' where category_id=2;
\ No newline at end of file

From 6e7b262bc2b298881940d5a8a665ca92cf643012 Mon Sep 17 00:00:00 2001
From: Manit Gupta <manitgupta@google.com>
Date: Mon, 13 May 2024 09:06:40 +0530
Subject: [PATCH 32/70] Fix row.clear()

---
 .../templates/DataStreamToSpannerEventsIT.java | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java
index 0d07ec4b38..4cc2d4df56 100644
--- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java
+++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java
@@ -308,17 +308,17 @@ private void assertAuthorsTable() {
     row.put("name", "a1");
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("author_id", 2);
     row.put("name", "a2");
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("author_id", 3);
     row.put("name", "a3");
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("author_id", 4);
     row.put("name", "a4");
     events.add(row);
@@ -336,19 +336,19 @@ private void assertBooksTable() {
     row.put("author_id", 3);
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("id", 2);
     row.put("title", "Book002");
     row.put("author_id", 3);
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("id", 3);
     row.put("title", "Book004");
     row.put("author_id", 4);
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("id", 4);
     row.put("title", "Book005");
     row.put("author_id", 2);
@@ -369,21 +369,21 @@ private void assertArticlesTable() {
     row.put("author_id", 1);
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("id", 2);
     row.put("name", "Article002");
     row.put("published_date", Date.parseDate("2024-01-01"));
     row.put("author_id", 1);
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("id", 3);
     row.put("name", "Article004");
     row.put("published_date", Date.parseDate("2024-01-01"));
     row.put("author_id", 4);
     events.add(row);
 
-    row.clear();
+    row = new HashMap<>();
     row.put("id", 4);
     row.put("name", "Article005");
     row.put("published_date", Date.parseDate("2024-01-01"));

From 304fd59e904e17f9de5e1ba97f01f03cf23a4404 Mon Sep 17 00:00:00 2001
From: Deep1998 <deepchowdhury1998@gmail.com>
Date: Wed, 8 May 2024 15:47:04 +0530
Subject: [PATCH 33/70] Add error counters for transformer and reader

---
 .../teleport/v2/constants/MetricCounters.java | 31 +++++++++++++++++++
 .../teleport/v2/constants/package-info.java   | 16 ++++++++++
 .../mysql/MysqlDialectAdapter.java            | 14 +++++++--
 .../jdbc/rowmapper/JdbcSourceRowMapper.java   |  7 +++++
 .../transformer/SourceRowToMutationDoFn.java  | 13 +++++++-
 5 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java
 create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java

diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java
new file mode 100644
index 0000000000..3b79f6bb54
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.constants;
+
+// Counters for bulk template.
+public class MetricCounters {
+
+  // Counter for errors in the transformer.
+  public static final String TRANSFORMER_ERRORS = "transformer_errors";
+
+  // Counter for errors encountered by the reader when trying to map JDBC ResultSet to a SourceRow.
+  public static final String READER_MAPPING_ERRORS = "reader_mapping_errors";
+
+  // Counter for errors encountered by the reader while discovering schema. This counts all sorts of
+  // errors including SQLTransientConnectionException, SQLNonTransientConnectionException,
+  // SQLExceptions etc.
+  public static final String READER_SCHEMA_DISCOVERY_ERRORS = "reader_schema_discovery_errors";
+}
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java
new file mode 100644
index 0000000000..3e1124dc05
--- /dev/null
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.constants;
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java
index 8c714bf7ae..17494e442e 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java
@@ -15,9 +15,11 @@
  */
 package com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.mysql;
 
+import com.google.cloud.teleport.v2.constants.MetricCounters;
 import com.google.cloud.teleport.v2.source.reader.io.exception.RetriableSchemaDiscoveryException;
 import com.google.cloud.teleport.v2.source.reader.io.exception.SchemaDiscoveryException;
 import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.DialectAdapter;
+import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcSourceRowMapper;
 import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchemaReference;
 import com.google.cloud.teleport.v2.spanner.migrations.schema.SourceColumnType;
 import com.google.common.collect.ImmutableList;
@@ -29,6 +31,8 @@
 import java.sql.SQLNonTransientConnectionException;
 import java.sql.SQLTransientConnectionException;
 import javax.sql.DataSource;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -38,6 +42,9 @@ public final class MysqlDialectAdapter implements DialectAdapter {
 
   private static final Logger logger = LoggerFactory.getLogger(MysqlDialectAdapter.class);
 
+  private final Counter schemaDiscoveryErrors =
+      Metrics.counter(JdbcSourceRowMapper.class, MetricCounters.READER_SCHEMA_DISCOVERY_ERRORS);
+
   public MysqlDialectAdapter(MySqlVersion mySqlVersion) {
     this.mySqlVersion = mySqlVersion;
   }
@@ -79,24 +86,25 @@ public ImmutableMap<String, ImmutableMap<String, SourceColumnType>> discoverTabl
           String.format(
               "Transient connection error while discovering table schema for datasource=%s db=%s tables=%s, cause=%s",
               dataSource, sourceSchemaReference, tables, e));
-      // TODO: Add metrics for transient connection errors.
+      schemaDiscoveryErrors.inc();
       throw new RetriableSchemaDiscoveryException(e);
     } catch (SQLNonTransientConnectionException e) {
       logger.error(
           String.format(
               "Non Transient connection error while discovering table schema for datasource=%s, db=%s tables=%s, cause=%s",
               dataSource, sourceSchemaReference, tables, e));
-      // TODO: Add metrics for non-transient connection errors.
+      schemaDiscoveryErrors.inc();
       throw new SchemaDiscoveryException(e);
     } catch (SQLException e) {
       logger.error(
           String.format(
               "Sql exception while discovering table schema for datasource=%s db=%s tables=%s, cause=%s",
               dataSource, sourceSchemaReference, tables, e));
-      // TODO: Add metrics for SQL exceptions.
+      schemaDiscoveryErrors.inc();
       throw new SchemaDiscoveryException(e);
     } catch (SchemaDiscoveryException e) {
       // Already logged.
+      schemaDiscoveryErrors.inc();
       throw e;
     }
     return tablesBuilder.build();
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java
index 3e920e252b..76354273c7 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper;
 
+import com.google.cloud.teleport.v2.constants.MetricCounters;
 import com.google.cloud.teleport.v2.source.reader.io.exception.ValueMappingException;
 import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow;
 import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableSchema;
@@ -24,6 +25,8 @@
 import java.util.concurrent.TimeUnit;
 import org.apache.avro.Schema;
 import org.apache.beam.sdk.io.jdbc.JdbcIO;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
 import org.checkerframework.checker.initialization.qual.Initialized;
 import org.checkerframework.checker.nullness.qual.NonNull;
 import org.checkerframework.checker.nullness.qual.Nullable;
@@ -39,6 +42,9 @@ public final class JdbcSourceRowMapper implements JdbcIO.RowMapper<SourceRow> {
 
   private static final Logger logger = LoggerFactory.getLogger(JdbcSourceRowMapper.class);
 
+  private final Counter mapperErrors =
+      Metrics.counter(JdbcSourceRowMapper.class, MetricCounters.READER_MAPPING_ERRORS);
+
   /**
    * Construct {@link JdbcSourceRowMapper}.
    *
@@ -87,6 +93,7 @@ long getCurrentTimeMicros() {
                         .getOrDefault(entry.getValue().getName(), JdbcValueMapper.UNSUPPORTED)
                         .mapValue(resultSet, entry.getKey(), schema));
               } catch (SQLException e) {
+                mapperErrors.inc();
                 logger.error(
                     "Exception while mapping jdbc ResultSet to avro. Check for potential schema changes. Exception: "
                         + e);
diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
index 1f9375c15c..7e00c00a22 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
@@ -18,6 +18,7 @@
 import com.google.auto.value.AutoValue;
 import com.google.cloud.spanner.Mutation;
 import com.google.cloud.spanner.Value;
+import com.google.cloud.teleport.v2.constants.MetricCounters;
 import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow;
 import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference;
 import com.google.cloud.teleport.v2.spanner.migrations.avro.GenericRecordTypeConvertor;
@@ -25,6 +26,8 @@
 import java.io.Serializable;
 import java.util.Map;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -39,6 +42,9 @@ public abstract class SourceRowToMutationDoFn extends DoFn<SourceRow, Mutation>
 
   private static final Logger LOG = LoggerFactory.getLogger(SourceRowToMutationDoFn.class);
 
+  private final Counter transformerErrors =
+      Metrics.counter(SourceRowToMutationDoFn.class, MetricCounters.TRANSFORMER_ERRORS);
+
   public abstract ISchemaMapper iSchemaMapper();
 
   public abstract Map<String, SourceTableReference> tableIdMapper();
@@ -58,6 +64,7 @@ public void processElement(ProcessContext c) {
       LOG.error(
           "cannot find valid sourceTable for tableId: {} in tableIdMapper",
           sourceRow.tableSchemaUUID());
+      transformerErrors.inc();
       return;
     }
     try {
@@ -73,7 +80,11 @@ public void processElement(ProcessContext c) {
       c.output(mutation);
     } catch (Exception e) {
       // TODO: Add DLQ integration once supported.
-      LOG.error("Unable to transform source row to spanner mutation: {}", e.getMessage());
+      LOG.error(
+          "Unable to transform source row to spanner mutation: {} {}",
+          e.getMessage(),
+          e.fillInStackTrace());
+      transformerErrors.inc();
     }
   }
 

From 3030f70709001ab1f098a4f863428b3e1394ad1a Mon Sep 17 00:00:00 2001
From: Deep1998 <deepchowdhury1998@gmail.com>
Date: Fri, 10 May 2024 15:16:10 +0530
Subject: [PATCH 34/70] Handle null valued strings

---
 .../transformer/SourceRowToMutationDoFn.java  |  5 +-
 .../migrations/avro/AvroToValueMapper.java    | 18 +++++--
 .../avro/GenericRecordTypeConvertor.java      | 20 +++-----
 .../avro/AvroToValueMapperTest.java           | 23 +++++++++
 .../avro/GenericRecordTypeConvertorTest.java  | 51 +++++++++++++------
 5 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
index 7e00c00a22..b68d38ae27 100644
--- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
+++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java
@@ -80,10 +80,7 @@ public void processElement(ProcessContext c) {
       c.output(mutation);
     } catch (Exception e) {
       // TODO: Add DLQ integration once supported.
-      LOG.error(
-          "Unable to transform source row to spanner mutation: {} {}",
-          e.getMessage(),
-          e.fillInStackTrace());
+      LOG.error("Unable to transform source row to spanner mutation", e);
       transformerErrors.inc();
     }
   }
diff --git a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java
index b76eb7c46a..2d4aa97d96 100644
--- a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java
+++ b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java
@@ -89,9 +89,9 @@ static Map<Type, AvroToValueFunction> getGsqlMap() {
         Type.float64(),
         (recordValue, fieldSchema) -> Value.float64(avroFieldToDouble(recordValue, fieldSchema)));
     gsqlFunctions.put(
-        Type.string(), (recordValue, fieldSchema) -> Value.string(recordValue.toString()));
+        Type.string(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue)));
     gsqlFunctions.put(
-        Type.json(), (recordValue, fieldSchema) -> Value.string(recordValue.toString()));
+        Type.json(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue)));
     gsqlFunctions.put(
         Type.numeric(),
         (recordValue, fieldSchema) ->
@@ -121,11 +121,12 @@ static Map<Type, AvroToValueFunction> getPgMap() {
         Type.pgFloat8(),
         (recordValue, fieldSchema) -> Value.float64(avroFieldToDouble(recordValue, fieldSchema)));
     pgFunctions.put(
-        Type.pgVarchar(), (recordValue, fieldSchema) -> Value.string(recordValue.toString()));
+        Type.pgVarchar(),
+        (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue)));
     pgFunctions.put(
-        Type.pgText(), (recordValue, fieldSchema) -> Value.string(recordValue.toString()));
+        Type.pgText(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue)));
     pgFunctions.put(
-        Type.pgJsonb(), (recordValue, fieldSchema) -> Value.string(recordValue.toString()));
+        Type.pgJsonb(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue)));
     pgFunctions.put(
         Type.pgNumeric(),
         (recordValue, fieldSchema) ->
@@ -189,6 +190,13 @@ static Double avroFieldToDouble(Object recordValue, Schema fieldSchema) {
     }
   }
 
+  static String avroFieldToString(Object recordValue) {
+    if (recordValue == null) {
+      return null;
+    }
+    return recordValue.toString();
+  }
+
   static BigDecimal avroFieldToNumericBigDecimal(Object recordValue, Schema fieldSchema) {
     try {
       if (recordValue == null) {
diff --git a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java
index 9822f23276..1d986ef35e 100644
--- a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java
+++ b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java
@@ -88,21 +88,18 @@ public Map<String, Value> transformChangeEvent(GenericRecord record, String srcT
                 spannerColumnType);
         result.put(spannerColName, value);
       } catch (NullPointerException e) {
-        LOG.info("Unable to transform change event: {}", e.getMessage());
+        LOG.error("Unable to transform change event", e);
         throw e;
       } catch (IllegalArgumentException e) {
-        LOG.info("Unable to transform change event: {}", e.getMessage());
+        LOG.error("Unable to transform change event", e);
         throw e;
       } catch (Exception e) {
-        LOG.info(
-            "Unable to convert spanner value for spanner col: {}. {}",
-            spannerColName,
-            e.getMessage());
+        LOG.error(
+            String.format("Unable to convert spanner value for spanner col: {}", spannerColName),
+            e);
         throw new RuntimeException(
-            String.format(
-                "Unable to convert spanner value for spanner col: {}. {}",
-                spannerColName,
-                e.getMessage()));
+            String.format("Unable to convert spanner value for spanner col: {}", spannerColName),
+            e);
       }
     }
     return result;
@@ -123,9 +120,6 @@ public Value getSpannerValue(
       LOG.debug("found union type: {}", types);
       // Schema types can only union with Type NULL. Any other UNION is unsupported.
       if (types.size() == 2 && types.stream().anyMatch(s -> s.getType().equals(Schema.Type.NULL))) {
-        if (recordValue == null) {
-          return null;
-        }
         fieldSchema =
             types.stream().filter(s -> !s.getType().equals(Schema.Type.NULL)).findFirst().get();
       } else {
diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java
index d002c5b53d..74b69aca14 100644
--- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java
+++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java
@@ -158,6 +158,29 @@ public void testAvroFieldToDouble_UnsupportedType() {
     AvroToValueMapper.avroFieldToDouble(inputValue, SchemaBuilder.builder().booleanType());
   }
 
+  @Test
+  public void testAvroFieldToString_valid() {
+    String result = AvroToValueMapper.avroFieldToString("Hello");
+    assertEquals("Hello", result);
+
+    result = AvroToValueMapper.avroFieldToString("");
+    assertEquals("", result);
+
+    result = AvroToValueMapper.avroFieldToString(14);
+    assertEquals("14", result);
+
+    result = AvroToValueMapper.avroFieldToString(513148134L);
+    assertEquals("513148134", result);
+
+    result = AvroToValueMapper.avroFieldToString(325.532);
+    assertEquals("325.532", result);
+  }
+
+  @Test
+  public void testAvroFieldToString_NullInput() {
+    assertNull(AvroToValueMapper.avroFieldToString(null));
+  }
+
   @Test
   public void testAvroFieldToNumericBigDecimal_StringInput() {
     Map<String, String> testCases = new HashMap<>();
diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java
index 2b713445dd..67216674bd 100644
--- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java
+++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java
@@ -378,6 +378,42 @@ public void transformChangeEventTest_identityMapper() {
     assertEquals(expected, actual);
   }
 
+  @Test
+  public void transformChangeEventTest_nullValues() {
+    GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema());
+    genericRecord.put("bool_col", null);
+    genericRecord.put("int_col", null);
+    genericRecord.put("float_col", null);
+    genericRecord.put("string_col", null);
+    genericRecord.put("numeric_col", null);
+    genericRecord.put("bytes_col", null);
+    genericRecord.put("timestamp_col", null);
+    genericRecord.put("date_col", null);
+    GenericRecordTypeConvertor genericRecordTypeConvertor =
+        new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), "");
+    Map<String, Value> actual =
+        genericRecordTypeConvertor.transformChangeEvent(genericRecord, "all_types");
+    Map<String, Value> expected =
+        Map.of(
+            "bool_col",
+            Value.bool(null),
+            "int_col",
+            Value.int64(null),
+            "float_col",
+            Value.float64(null),
+            "string_col",
+            Value.string(null),
+            "numeric_col",
+            Value.numeric(null),
+            "bytes_col",
+            Value.bytes(null),
+            "timestamp_col",
+            Value.timestamp(null),
+            "date_col",
+            Value.date(null));
+    assertEquals(expected, actual);
+  }
+
   @Test
   public void transformChangeEventTest_illegalUnionType() {
     GenericRecordTypeConvertor genericRecordTypeConvertor =
@@ -396,21 +432,6 @@ public void transformChangeEventTest_illegalUnionType() {
         () -> genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string()));
   }
 
-  @Test
-  public void transformChangeEventTest_nullType() {
-    GenericRecordTypeConvertor genericRecordTypeConvertor =
-        new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), "");
-    Schema schema =
-        SchemaBuilder.builder()
-            .unionOf()
-            .nullType()
-            .and()
-            .type(Schema.create(Schema.Type.BOOLEAN))
-            .endUnion();
-    assertNull(
-        genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string()));
-  }
-
   @Test(expected = IllegalArgumentException.class)
   public void transformChangeEventTest_incorrectSpannerType() {
 

From 19c59dd1774cffa63fef8dfcc1e299e8bde0692a Mon Sep 17 00:00:00 2001
From: Akshara Uke <aksharau@google.com>
Date: Tue, 14 May 2024 11:18:33 +0000
Subject: [PATCH 35/70] updated based on review comments

---
 .../pom.xml                                   |   31 +
 .../DataChangeRecordTypeConvertor.java        |    3 +
 .../SpannerChangeStreamToGcsSimpleIT.java     |  453 ++++++
 .../DataChangeRecordTypeConvertorTest.java    |   13 +
 .../session.json                              | 1262 +++++++++++++++++
 .../spanner-schema-simple.sql                 |   49 +
 6 files changed, 1811 insertions(+)
 create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
 create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json
 create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql

diff --git a/v2/spanner-change-streams-to-sharded-file-sink/pom.xml b/v2/spanner-change-streams-to-sharded-file-sink/pom.xml
index f329528cf8..8da0f5d2f7 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/pom.xml
+++ b/v2/spanner-change-streams-to-sharded-file-sink/pom.xml
@@ -53,6 +53,37 @@
             <artifactId>json</artifactId>
             <version>${json.version}</version>
         </dependency>
+        <!-- Test dependencies -->
+        <dependency>
+            <groupId>com.google.cloud.teleport</groupId>
+            <artifactId>it-google-cloud-platform</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.google.cloud.teleport</groupId>
+            <artifactId>it-conditions</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.beam</groupId>
+            <artifactId>beam-it-jdbc</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>mysql</groupId>
+            <artifactId>mysql-connector-java</artifactId>
+            <version>8.0.30</version>
+            <scope>test</scope>
+        </dependency>
+<!--        TODO - Remove when https://github.com/apache/beam/pull/29732 is released. -->
+        <dependency>
+            <groupId>com.google.cloud.teleport</groupId>
+            <artifactId>it-jdbc</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
 </project>
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java
index 5c02d7a5af..5c26abd968 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java
@@ -157,6 +157,9 @@ public static BigDecimal toNumericBigDecimal(
       throws DataChangeRecordConvertorException {
 
     String value = toString(changeEvent, key, requiredField);
+    if (value == null) {
+      return null;
+    }
     if (NumberUtils.isCreatable(value) || NumberUtils.isParsable(value) || isNumeric(value)) {
       return new BigDecimal(value).setScale(9, RoundingMode.HALF_UP);
     }
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
new file mode 100644
index 0000000000..7fb40e231f
--- /dev/null
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
@@ -0,0 +1,453 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.apache.beam.it.gcp.artifacts.matchers.ArtifactAsserts.assertThatArtifacts;
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
+
+import com.google.cloud.spanner.Mutation;
+import com.google.cloud.teleport.metadata.SkipDirectRunnerTest;
+import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
+import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard;
+import com.google.common.io.Resources;
+import com.google.gson.Gson;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+import org.apache.beam.it.common.PipelineLauncher;
+import org.apache.beam.it.common.PipelineLauncher.LaunchConfig;
+import org.apache.beam.it.common.PipelineOperator;
+import org.apache.beam.it.common.utils.PipelineUtils;
+import org.apache.beam.it.common.utils.ResourceManagerUtils;
+import org.apache.beam.it.conditions.ChainedConditionCheck;
+import org.apache.beam.it.gcp.TemplateTestBase;
+import org.apache.beam.it.gcp.artifacts.Artifact;
+import org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils;
+import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
+import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck;
+import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Integration test for simple test of single shard,single table. */
+@Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class})
+@TemplateIntegrationTest(SpannerChangeStreamsToShardedFileSink.class)
+@RunWith(JUnit4.class)
+public class SpannerChangeStreamToGcsSimpleIT extends TemplateTestBase {
+  private static final Logger LOG = LoggerFactory.getLogger(SpannerChangeStreamToGcsSimpleIT.class);
+  private static SpannerResourceManager spannerResourceManager;
+  private static SpannerResourceManager spannerMetadataResourceManager;
+  private static HashSet<SpannerChangeStreamToGcsSimpleIT> testInstances = new HashSet<>();
+  private static final String spannerDdl =
+      "SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql";
+  private static final String sessionFileResourceName =
+      "SpannerChangeStreamToGcsSimpleIT/session.json";
+  private static PipelineLauncher.LaunchInfo jobInfo;
+  private static String spannerDatabaseName = "";
+  private static String spannerMetadataDatabaseName = "";
+  private static GcsResourceManager gcsResourceManager;
+
+  /**
+   * Does the following setup:
+   *
+   * <p>1. Creates a Spanner database with a given table 2. Creates a shard file with the connection
+   * details 3. The session file for the same is taken from the resources and uploaded to GCS 4.
+   * Places the session file and shard file in GCS 5. Creates the change stream in Spanner database
+   * 6. Creates the metadata database 8. Launches the job to read from Spanner and write to GCS
+   *
+   * @throws IOException
+   */
+  @Before
+  public void setUp() throws IOException {
+    skipBaseCleanup = true;
+    synchronized (SpannerChangeStreamToGcsSimpleIT.class) {
+      testInstances.add(this);
+      if (jobInfo == null) {
+        createGcsResourceManager();
+        createSpannerDatabase();
+        createAndUploadShardConfigToGcs();
+        uploadSessionFileToGcs();
+        createSpannerMetadataDatabase();
+        launchReaderDataflowJob();
+      }
+    }
+  }
+
+  @AfterClass
+  public static void cleanUp() throws IOException {
+    for (SpannerChangeStreamToGcsSimpleIT instance : testInstances) {
+      instance.tearDownBase();
+    }
+    ResourceManagerUtils.cleanResources(
+        spannerResourceManager, spannerMetadataResourceManager, gcsResourceManager);
+  }
+
+  @Test
+  public void testSingleRecordWrittenToGcs() throws IOException, java.lang.InterruptedException {
+    // Construct a ChainedConditionCheck with below stages.
+    // 1. Wait for the metadata table to have the start time of reader job
+    // 2. Write a single record to Spanner
+    // 3. Wait on GCS to have the file
+    // 4. Match the PK in GCS with the PK written to Spanner
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    SpannerRowsCheck.builder(
+                            spannerMetadataResourceManager, "spanner_to_gcs_metadata")
+                        .setMinRows(1)
+                        .setMaxRows(1)
+                        .build()))
+            .build();
+    // Wait for conditions
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(10)), conditionCheck);
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+    // Perform writes to Spanner
+    writeSpannerDataForSingers(1, "FF", "testShardA");
+    // Assert file present in GCS with the needed data
+    assertFileContentsInGCS();
+  }
+
+  private void createGcsResourceManager() {
+    gcsResourceManager =
+        GcsResourceManager.builder(artifactBucketName, getClass().getSimpleName(), credentials)
+            .build(); // DB name is appended with prefix to avoid clashes
+  }
+
+  private void createSpannerDatabase() throws IOException {
+    spannerResourceManager =
+        SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION)
+            .maybeUseStaticInstance()
+            .build(); // DB name is appended with prefix to avoid clashes
+    String ddl =
+        String.join(
+            " ", Resources.readLines(Resources.getResource(spannerDdl), StandardCharsets.UTF_8));
+    ddl = ddl.trim();
+    String[] ddls = ddl.split(";");
+    for (String d : ddls) {
+      if (!d.isBlank()) {
+        spannerResourceManager.executeDdlStatement(d);
+      }
+    }
+    spannerDatabaseName = spannerResourceManager.getDatabaseId();
+  }
+
+  private void createAndUploadShardConfigToGcs() throws IOException {
+    List<String> shardNames = new ArrayList<>();
+    shardNames.add("testShardA");
+    shardNames.add("testShardB");
+    shardNames.add("testShardC");
+    shardNames.add("testShardD");
+    JsonArray ja = new JsonArray();
+
+    for (String shardName : shardNames) {
+      Shard shard = new Shard();
+      shard.setLogicalShardId(shardName);
+      shard.setUser("dummy");
+      shard.setHost("dummy");
+      shard.setPassword("dummy");
+      shard.setPort("3306");
+      JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject();
+      jsObj.remove("secretManagerUri"); // remove field secretManagerUri
+      ja.add(jsObj);
+    }
+
+    String shardFileContents = ja.toString();
+    LOG.info("Shard file contents: {}", shardFileContents);
+    // -DartifactBucket has the bucket name
+    gcsResourceManager.createArtifact("input/shard.json", shardFileContents);
+  }
+
+  private void uploadSessionFileToGcs() throws IOException {
+    gcsResourceManager.uploadArtifact(
+        "input/session.json", Resources.getResource(sessionFileResourceName).getPath());
+  }
+
+  private void createSpannerMetadataDatabase() throws IOException {
+    spannerMetadataResourceManager =
+        SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION)
+            .maybeUseStaticInstance()
+            .build(); // DB name is appended with prefix to avoid clashes
+    String dummy = "create table t1(id INT64 ) primary key(id)";
+    spannerMetadataResourceManager.executeDdlStatement(dummy);
+    // needed to create separate metadata database
+    spannerMetadataDatabaseName = spannerMetadataResourceManager.getDatabaseId();
+  }
+
+  private void launchReaderDataflowJob() throws IOException {
+    // default parameters
+    Map<String, String> params =
+        new HashMap<>() {
+          {
+            put("sessionFilePath", getGcsFullPath("input/session.json"));
+            put("instanceId", spannerResourceManager.getInstanceId());
+            put("databaseId", spannerResourceManager.getDatabaseId());
+            put("spannerProjectId", PROJECT);
+            put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
+            put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
+            put("sourceShardsFilePath", getGcsFullPath("input/shard.json"));
+            put("changeStreamName", "allstream");
+            put("runIdentifier", "run1");
+            put("gcsOutputDirectory", getGcsFullPath("output"));
+          }
+        };
+
+    // Construct template
+    String jobName = PipelineUtils.createJobName("rr-it");
+    // /-DunifiedWorker=true when using runner v2
+    LaunchConfig.Builder options = LaunchConfig.builder(jobName, specPath);
+    options.setParameters(params);
+    options.addEnvironment("additionalExperiments", Collections.singletonList("use_runner_v2"));
+    // Run
+    jobInfo = launchTemplate(options, false);
+    assertThatPipeline(jobInfo).isRunning();
+  }
+
+  private void writeSpannerDataForSingers(int singerId, String firstName, String shardId) {
+    // Write a single record to Spanner for the given logical shard
+    Mutation m =
+        Mutation.newInsertOrUpdateBuilder("Singers")
+            .set("SingerId")
+            .to(singerId)
+            .set("FirstName")
+            .to(firstName)
+            .set("migration_shard_id")
+            .to(shardId)
+            .build();
+    spannerResourceManager.write(m);
+  }
+
+  private void assertFileContentsInGCS() throws IOException, java.lang.InterruptedException {
+    List<Artifact> artifacts = null;
+    Thread.sleep(
+        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
+    // at-least. If not present wait additional 3 minutes before failing
+    for (int i = 0; i < 10; i++) {
+      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
+      artifacts =
+          gcsResourceManager.listArtifacts("output/testShardA/", Pattern.compile(".*\\.txt$"));
+      if (artifacts.size() == 1) {
+        break;
+      }
+    }
+    assertThat(artifacts).hasSize(1);
+    assertThatArtifacts(artifacts).hasContent("SingerId\\\":\\\"1");
+  }
+
+  @Test
+  public void testMultiShardsRecordWrittenToGcs()
+      throws IOException, java.lang.InterruptedException {
+    // Construct a ChainedConditionCheck with below stages.
+    // 1. Wait for the metadata table to have the start time of reader job
+    // 2. Write a 2 records per shard to Spanner
+    // 3. Wait on GCS to have the files
+    // 4. Match the PK in GCS with the PK written to Spanner
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    SpannerRowsCheck.builder(
+                            spannerMetadataResourceManager, "spanner_to_gcs_metadata")
+                        .setMinRows(1)
+                        .setMaxRows(1)
+                        .build()))
+            .build();
+    // Wait for conditions
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(10)), conditionCheck);
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+    // Perform writes to Spanner
+    writeSpannerDataForSingers(2, "two", "testShardB");
+    writeSpannerDataForSingers(3, "three", "testShardB");
+    writeSpannerDataForSingers(4, "four", "testShardC");
+    writeSpannerDataForSingers(5, "five", "testShardC");
+
+    // Assert file present in GCS with the needed data
+    assertFileContentsInGCSForMultipleShards();
+  }
+
+  private void assertFileContentsInGCSForMultipleShards()
+      throws IOException, java.lang.InterruptedException {
+    List<Artifact> artifactsShardB = null;
+    List<Artifact> artifactsShardC = null;
+    Thread.sleep(
+        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
+    // at-least. If not present wait additional 3 minutes before failing
+    for (int i = 0; i < 10; i++) {
+      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
+      artifactsShardB =
+          gcsResourceManager.listArtifacts("output/testShardB/", Pattern.compile(".*\\.txt$"));
+      artifactsShardC =
+          gcsResourceManager.listArtifacts("output/testShardC/", Pattern.compile(".*\\.txt$"));
+
+      // Ideally both the mutations written to spanner per shard will commit within 10 seconds.
+      // But that does not guarantee that they will be in the same file, since they can commit
+      // within 1 second interval boundary
+      if (artifactsShardB.size() >= 1 && artifactsShardC.size() >= 1) {
+        break;
+      }
+    }
+    assertThatArtifacts(artifactsShardB).hasFiles();
+    assertThatArtifacts(artifactsShardC).hasFiles();
+    // checks that any of the artifact has the given content
+    assertThatArtifacts(artifactsShardB).hasContent("SingerId\\\":\\\"2");
+    assertThatArtifacts(artifactsShardB).hasContent("SingerId\\\":\\\"3");
+    assertThatArtifacts(artifactsShardC).hasContent("SingerId\\\":\\\"4");
+    assertThatArtifacts(artifactsShardC).hasContent("SingerId\\\":\\\"5");
+  }
+
+  @Test
+  public void testAllDatatypes() throws IOException, java.lang.InterruptedException {
+    // Construct a ChainedConditionCheck with below stages.
+    // 1. Wait for the metadata table to have the start time of reader job
+    // 2. Write a record with
+    // 3. Wait on GCS to have the files
+    // 4. Match the PK in GCS with the PK written to Spanner
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    SpannerRowsCheck.builder(
+                            spannerMetadataResourceManager, "spanner_to_gcs_metadata")
+                        .setMinRows(1)
+                        .setMaxRows(1)
+                        .build()))
+            .build();
+    // Wait for conditions
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(10)), conditionCheck);
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+    // Perform writes to Spanner
+    writeSpannerDataForAllDatatypes();
+    // Assert file present in GCS with the needed data
+    assertFileContentsInGCSForAllDatatypes();
+  }
+
+  private void writeSpannerDataForAllDatatypes() {
+    // Write a single record to Spanner for logical shard : testD
+    Mutation m =
+        Mutation.newInsertOrUpdateBuilder("sample_table")
+            .set("id")
+            .to(1)
+            .set("varchar_column")
+            .to("abc")
+            .set("migration_shard_id")
+            .to("testShardD")
+            .set("tinyint_column")
+            .to(1)
+            .set("text_column")
+            .to("aaaaaddd")
+            .set("year_column")
+            .to("2023")
+            .set("smallint_column")
+            .to(22)
+            .set("bigint_column")
+            .to(12345678910L)
+            .set("float_column")
+            .to(4.2f)
+            .set("double_column")
+            .to(42.42d)
+            .set("blob_column")
+            .to("abc")
+            .set("bool_column")
+            .to(false)
+            .set("binary_column")
+            .to(Base64.getEncoder().encodeToString("Hello".getBytes()))
+            .set("enum_column")
+            .to("1")
+            .set("timestamp_column")
+            .to("2024-05-09T05:40:08.005683553Z")
+            .set("date_column")
+            .to("2024-05-09")
+            .build();
+    spannerResourceManager.write(m);
+  }
+
+  private void assertFileContentsInGCSForAllDatatypes()
+      throws IOException, java.lang.InterruptedException {
+    List<Artifact> artifacts = null;
+    Thread.sleep(
+        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
+    // at-least. If not present wait additional 3 minutes before failing
+    for (int i = 0; i < 10; i++) {
+      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
+      artifacts =
+          gcsResourceManager.listArtifacts("output/testShardD/", Pattern.compile(".*\\.txt$"));
+      if (artifacts.size() == 1) {
+        break;
+      }
+    }
+    assertThat(artifacts).hasSize(1);
+    assertThatArtifacts(artifacts).hasContent("id\\\":\\\"1");
+    assertThatArtifacts(artifacts).hasContent("year_column\\\":\\\"2023");
+    assertThatArtifacts(artifacts).hasContent("bigint_column\\\":\\\"12345678910");
+    assertThatArtifacts(artifacts).hasContent("binary_column\\\":\\\"SGVsbG8");
+    assertThatArtifacts(artifacts).hasContent("blob_column\\\":\\\"abc");
+    assertThatArtifacts(artifacts).hasContent("bool_column\\\":false");
+    assertThatArtifacts(artifacts).hasContent("char_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("date_column\\\":\\\"2024-05-09");
+    assertThatArtifacts(artifacts).hasContent("datetime_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("decimal_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("double_column\\\":42.42");
+    assertThatArtifacts(artifacts).hasContent("enum_column\\\":\\\"1");
+    assertThatArtifacts(artifacts).hasContent("float_column\\\":4.199999809265137");
+    assertThatArtifacts(artifacts).hasContent("longblob_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("longtext_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("mediumblob_column\\\":null");
+
+    assertThatArtifacts(artifacts).hasContent("mediumint_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("mediumtext_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("smallint_column\\\":\\\"22");
+    assertThatArtifacts(artifacts).hasContent("text_column\\\":\\\"aaaaaddd");
+    assertThatArtifacts(artifacts).hasContent("time_column\\\":null");
+    assertThatArtifacts(artifacts)
+        .hasContent("timestamp_column\\\":\\\"2024-05-09T05:40:08.005683553Z");
+
+    assertThatArtifacts(artifacts).hasContent("tinyblob_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("tinyint_column\\\":\\\"1");
+    assertThatArtifacts(artifacts).hasContent("tinytext_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("update_ts\\\":null");
+    assertThatArtifacts(artifacts).hasContent("varbinary_column\\\":null");
+    assertThatArtifacts(artifacts).hasContent("varchar_column\\\":\\\"abc");
+  }
+
+  private String getGcsFullPath(String artifactId) {
+    return ArtifactUtils.getFullGcsPath(
+        artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId);
+  }
+}
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java
index e55ae3637d..8c150ec65f 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java
@@ -374,4 +374,17 @@ public void cannotConvertBooleanToDate() throws Exception {
         DataChangeRecordTypeConvertor.toDate(ce, "field1", /* requiredField= */ true),
         Timestamp.parseTimestamp("2020-12-30T12:12:12Z"));
   }
+
+  /*
+   * Tests null value for NumericBigDecimal
+   */
+  @Test
+  public void testNullNumericBigDecimal() throws Exception {
+    String jsonChangeEvent = "{ \"field1\" : null }";
+    JsonNode ce = getJsonNode(jsonChangeEvent);
+
+    assertNull(
+        DataChangeRecordTypeConvertor.toNumericBigDecimal(
+            ce, "field1", /* requiredField= */ false));
+  }
 }
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json
new file mode 100644
index 0000000000..48e11ca1ee
--- /dev/null
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json
@@ -0,0 +1,1262 @@
+{
+  "SpSchema": {
+   "t1": {
+    "Name": "Singers",
+    "ColIds": [
+     "c3",
+     "c4",
+     "c5",
+     "c6",
+     "c7",
+     "c36"
+    ],
+    "ShardIdColumn": "c36",
+    "ColDefs": {
+     "c3": {
+      "Name": "SingerId",
+      "T": {
+       "Name": "INT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": true,
+      "Comment": "From: SingerId bigint(19)",
+      "Id": "c3"
+     },
+     "c36": {
+      "Name": "migration_shard_id",
+      "T": {
+       "Name": "STRING",
+       "Len": 50,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "",
+      "Id": "c36"
+     },
+     "c4": {
+      "Name": "FirstName",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: FirstName text(65535)",
+      "Id": "c4"
+     },
+     "c5": {
+      "Name": "LastName",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: LastName text(65535)",
+      "Id": "c5"
+     },
+     "c6": {
+      "Name": "shardId",
+      "T": {
+       "Name": "STRING",
+       "Len": 20,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: shardId varchar(20)",
+      "Id": "c6"
+     },
+     "c7": {
+      "Name": "update_ts",
+      "T": {
+       "Name": "TIMESTAMP",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: update_ts timestamp",
+      "Id": "c7"
+     }
+    },
+    "PrimaryKeys": [
+     {
+      "ColId": "c3",
+      "Desc": false,
+      "Order": 2
+     },
+     {
+      "ColId": "c36",
+      "Desc": false,
+      "Order": 1
+     }
+    ],
+    "ForeignKeys": null,
+    "Indexes": null,
+    "ParentId": "",
+    "Comment": "Spanner schema for source table Singers",
+    "Id": "t1"
+   },
+   "t2": {
+    "Name": "sample_table",
+    "ColIds": [
+     "c8",
+     "c9",
+     "c10",
+     "c11",
+     "c12",
+     "c13",
+     "c14",
+     "c15",
+     "c16",
+     "c17",
+     "c18",
+     "c19",
+     "c20",
+     "c21",
+     "c22",
+     "c23",
+     "c24",
+     "c25",
+     "c26",
+     "c27",
+     "c28",
+     "c29",
+     "c30",
+     "c31",
+     "c32",
+     "c33",
+     "c34",
+     "c35",
+     "c37"
+    ],
+    "ShardIdColumn": "c37",
+    "ColDefs": {
+     "c10": {
+      "Name": "tinyint_column",
+      "T": {
+       "Name": "INT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: tinyint_column tinyint(3)",
+      "Id": "c10"
+     },
+     "c11": {
+      "Name": "text_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: text_column text(65535)",
+      "Id": "c11"
+     },
+     "c12": {
+      "Name": "date_column",
+      "T": {
+       "Name": "DATE",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: date_column date",
+      "Id": "c12"
+     },
+     "c13": {
+      "Name": "smallint_column",
+      "T": {
+       "Name": "INT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: smallint_column smallint(5)",
+      "Id": "c13"
+     },
+     "c14": {
+      "Name": "mediumint_column",
+      "T": {
+       "Name": "INT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: mediumint_column mediumint(7)",
+      "Id": "c14"
+     },
+     "c15": {
+      "Name": "bigint_column",
+      "T": {
+       "Name": "INT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: bigint_column bigint(19)",
+      "Id": "c15"
+     },
+     "c16": {
+      "Name": "float_column",
+      "T": {
+       "Name": "FLOAT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: float_column float(10,2)",
+      "Id": "c16"
+     },
+     "c17": {
+      "Name": "double_column",
+      "T": {
+       "Name": "FLOAT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: double_column double(22)",
+      "Id": "c17"
+     },
+     "c18": {
+      "Name": "decimal_column",
+      "T": {
+       "Name": "NUMERIC",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: decimal_column decimal(10,2)",
+      "Id": "c18"
+     },
+     "c19": {
+      "Name": "datetime_column",
+      "T": {
+       "Name": "TIMESTAMP",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: datetime_column datetime",
+      "Id": "c19"
+     },
+     "c20": {
+      "Name": "timestamp_column",
+      "T": {
+       "Name": "TIMESTAMP",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: timestamp_column timestamp",
+      "Id": "c20"
+     },
+     "c21": {
+      "Name": "time_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: time_column time",
+      "Id": "c21"
+     },
+     "c22": {
+      "Name": "year_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: year_column year",
+      "Id": "c22"
+     },
+     "c23": {
+      "Name": "char_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 10,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: char_column char(10)",
+      "Id": "c23"
+     },
+     "c24": {
+      "Name": "tinyblob_column",
+      "T": {
+       "Name": "BYTES",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: tinyblob_column tinyblob(255)",
+      "Id": "c24"
+     },
+     "c25": {
+      "Name": "tinytext_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: tinytext_column tinytext(255)",
+      "Id": "c25"
+     },
+     "c26": {
+      "Name": "blob_column",
+      "T": {
+       "Name": "BYTES",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: blob_column blob(65535)",
+      "Id": "c26"
+     },
+     "c27": {
+      "Name": "mediumblob_column",
+      "T": {
+       "Name": "BYTES",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: mediumblob_column mediumblob(16777215)",
+      "Id": "c27"
+     },
+     "c28": {
+      "Name": "mediumtext_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: mediumtext_column mediumtext(16777215)",
+      "Id": "c28"
+     },
+     "c29": {
+      "Name": "longblob_column",
+      "T": {
+       "Name": "BYTES",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: longblob_column longblob(4294967295)",
+      "Id": "c29"
+     },
+     "c30": {
+      "Name": "longtext_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: longtext_column longtext(4294967295)",
+      "Id": "c30"
+     },
+     "c31": {
+      "Name": "enum_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: enum_column enum(1)",
+      "Id": "c31"
+     },
+     "c32": {
+      "Name": "bool_column",
+      "T": {
+       "Name": "BOOL",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: bool_column tinyint(1)",
+      "Id": "c32"
+     },
+     "c33": {
+      "Name": "binary_column",
+      "T": {
+       "Name": "BYTES",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: binary_column binary(20)",
+      "Id": "c33"
+     },
+     "c34": {
+      "Name": "varbinary_column",
+      "T": {
+       "Name": "BYTES",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: varbinary_column varbinary(20)",
+      "Id": "c34"
+     },
+     "c35": {
+      "Name": "update_ts",
+      "T": {
+       "Name": "TIMESTAMP",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: update_ts timestamp",
+      "Id": "c35"
+     },
+     "c37": {
+      "Name": "migration_shard_id",
+      "T": {
+       "Name": "STRING",
+       "Len": 50,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "",
+      "Id": "c37"
+     },
+     "c8": {
+      "Name": "id",
+      "T": {
+       "Name": "INT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": true,
+      "Comment": "From: id int(10)",
+      "Id": "c8"
+     },
+     "c9": {
+      "Name": "varchar_column",
+      "T": {
+       "Name": "STRING",
+       "Len": 20,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: varchar_column varchar(20)",
+      "Id": "c9"
+     }
+    },
+    "PrimaryKeys": [
+     {
+      "ColId": "c8",
+      "Desc": false,
+      "Order": 2
+     },
+     {
+      "ColId": "c37",
+      "Desc": false,
+      "Order": 1
+     }
+    ],
+    "ForeignKeys": null,
+    "Indexes": null,
+    "ParentId": "",
+    "Comment": "Spanner schema for source table sample_table",
+    "Id": "t2"
+   }
+  },
+  "SyntheticPKeys": {},
+  "SrcSchema": {
+   "t1": {
+    "Name": "Singers",
+    "Schema": "alltypes",
+    "ColIds": [
+     "c3",
+     "c4",
+     "c5",
+     "c6",
+     "c7"
+    ],
+    "ColDefs": {
+     "c3": {
+      "Name": "SingerId",
+      "Type": {
+       "Name": "bigint",
+       "Mods": [
+        19
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": true,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c3"
+     },
+     "c4": {
+      "Name": "FirstName",
+      "Type": {
+       "Name": "text",
+       "Mods": [
+        65535
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c4"
+     },
+     "c5": {
+      "Name": "LastName",
+      "Type": {
+       "Name": "text",
+       "Mods": [
+        65535
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c5"
+     },
+     "c6": {
+      "Name": "shardId",
+      "Type": {
+       "Name": "varchar",
+       "Mods": [
+        20
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c6"
+     },
+     "c7": {
+      "Name": "update_ts",
+      "Type": {
+       "Name": "timestamp",
+       "Mods": null,
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c7"
+     }
+    },
+    "PrimaryKeys": [
+     {
+      "ColId": "c3",
+      "Desc": false,
+      "Order": 1
+     }
+    ],
+    "ForeignKeys": null,
+    "Indexes": null,
+    "Id": "t1"
+   },
+   "t2": {
+    "Name": "sample_table",
+    "Schema": "alltypes",
+    "ColIds": [
+     "c8",
+     "c9",
+     "c10",
+     "c11",
+     "c12",
+     "c13",
+     "c14",
+     "c15",
+     "c16",
+     "c17",
+     "c18",
+     "c19",
+     "c20",
+     "c21",
+     "c22",
+     "c23",
+     "c24",
+     "c25",
+     "c26",
+     "c27",
+     "c28",
+     "c29",
+     "c30",
+     "c31",
+     "c32",
+     "c33",
+     "c34",
+     "c35"
+    ],
+    "ColDefs": {
+     "c10": {
+      "Name": "tinyint_column",
+      "Type": {
+       "Name": "tinyint",
+       "Mods": [
+        3
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c10"
+     },
+     "c11": {
+      "Name": "text_column",
+      "Type": {
+       "Name": "text",
+       "Mods": [
+        65535
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c11"
+     },
+     "c12": {
+      "Name": "date_column",
+      "Type": {
+       "Name": "date",
+       "Mods": null,
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c12"
+     },
+     "c13": {
+      "Name": "smallint_column",
+      "Type": {
+       "Name": "smallint",
+       "Mods": [
+        5
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c13"
+     },
+     "c14": {
+      "Name": "mediumint_column",
+      "Type": {
+       "Name": "mediumint",
+       "Mods": [
+        7
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c14"
+     },
+     "c15": {
+      "Name": "bigint_column",
+      "Type": {
+       "Name": "bigint",
+       "Mods": [
+        19
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c15"
+     },
+     "c16": {
+      "Name": "float_column",
+      "Type": {
+       "Name": "float",
+       "Mods": [
+        10,
+        2
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c16"
+     },
+     "c17": {
+      "Name": "double_column",
+      "Type": {
+       "Name": "double",
+       "Mods": [
+        22
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c17"
+     },
+     "c18": {
+      "Name": "decimal_column",
+      "Type": {
+       "Name": "decimal",
+       "Mods": [
+        10,
+        2
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c18"
+     },
+     "c19": {
+      "Name": "datetime_column",
+      "Type": {
+       "Name": "datetime",
+       "Mods": null,
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c19"
+     },
+     "c20": {
+      "Name": "timestamp_column",
+      "Type": {
+       "Name": "timestamp",
+       "Mods": null,
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c20"
+     },
+     "c21": {
+      "Name": "time_column",
+      "Type": {
+       "Name": "time",
+       "Mods": null,
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c21"
+     },
+     "c22": {
+      "Name": "year_column",
+      "Type": {
+       "Name": "year",
+       "Mods": null,
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c22"
+     },
+     "c23": {
+      "Name": "char_column",
+      "Type": {
+       "Name": "char",
+       "Mods": [
+        10
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c23"
+     },
+     "c24": {
+      "Name": "tinyblob_column",
+      "Type": {
+       "Name": "tinyblob",
+       "Mods": [
+        255
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c24"
+     },
+     "c25": {
+      "Name": "tinytext_column",
+      "Type": {
+       "Name": "tinytext",
+       "Mods": [
+        255
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c25"
+     },
+     "c26": {
+      "Name": "blob_column",
+      "Type": {
+       "Name": "blob",
+       "Mods": [
+        65535
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c26"
+     },
+     "c27": {
+      "Name": "mediumblob_column",
+      "Type": {
+       "Name": "mediumblob",
+       "Mods": [
+        16777215
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c27"
+     },
+     "c28": {
+      "Name": "mediumtext_column",
+      "Type": {
+       "Name": "mediumtext",
+       "Mods": [
+        16777215
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c28"
+     },
+     "c29": {
+      "Name": "longblob_column",
+      "Type": {
+       "Name": "longblob",
+       "Mods": [
+        4294967295
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c29"
+     },
+     "c30": {
+      "Name": "longtext_column",
+      "Type": {
+       "Name": "longtext",
+       "Mods": [
+        4294967295
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c30"
+     },
+     "c31": {
+      "Name": "enum_column",
+      "Type": {
+       "Name": "enum",
+       "Mods": [
+        1
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c31"
+     },
+     "c32": {
+      "Name": "bool_column",
+      "Type": {
+       "Name": "tinyint",
+       "Mods": [
+        1
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c32"
+     },
+     "c33": {
+      "Name": "binary_column",
+      "Type": {
+       "Name": "binary",
+       "Mods": [
+        20
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c33"
+     },
+     "c34": {
+      "Name": "varbinary_column",
+      "Type": {
+       "Name": "varbinary",
+       "Mods": [
+        20
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c34"
+     },
+     "c35": {
+      "Name": "update_ts",
+      "Type": {
+       "Name": "timestamp",
+       "Mods": null,
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c35"
+     },
+     "c8": {
+      "Name": "id",
+      "Type": {
+       "Name": "int",
+       "Mods": [
+        10
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": true,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c8"
+     },
+     "c9": {
+      "Name": "varchar_column",
+      "Type": {
+       "Name": "varchar",
+       "Mods": [
+        20
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c9"
+     }
+    },
+    "PrimaryKeys": [
+     {
+      "ColId": "c8",
+      "Desc": false,
+      "Order": 1
+     }
+    ],
+    "ForeignKeys": null,
+    "Indexes": null,
+    "Id": "t2"
+   }
+  },
+  "SchemaIssues": {
+   "t1": {
+    "ColumnLevelIssues": {
+     "c3": [],
+     "c36": [
+      28
+     ],
+     "c4": [],
+     "c5": [],
+     "c6": [],
+     "c7": []
+    },
+    "TableLevelIssues": null
+   },
+   "t2": {
+    "ColumnLevelIssues": {
+     "c10": [
+      13
+     ],
+     "c13": [
+      13
+     ],
+     "c14": [
+      13
+     ],
+     "c16": [
+      13
+     ],
+     "c19": [
+      12
+     ],
+     "c21": [
+      14
+     ],
+     "c22": [
+      14
+     ],
+     "c37": [
+      28
+     ],
+     "c8": [
+      13
+     ]
+    },
+    "TableLevelIssues": null
+   }
+  },
+  "Location": {},
+  "TimezoneOffset": "+00:00",
+  "SpDialect": "google_standard_sql",
+  "UniquePKey": {},
+  "Rules": [
+   {
+    "Id": "r38",
+    "Name": "r38",
+    "Type": "add_shard_id_primary_key",
+    "ObjectType": "",
+    "AssociatedObjects": "All Tables",
+    "Enabled": true,
+    "Data": {
+     "AddedAtTheStart": true
+    },
+    "AddedOn": {
+     "TimeOffset": null
+    }
+   }
+  ],
+  "IsSharded": true
+ }
\ No newline at end of file
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql
new file mode 100644
index 0000000000..0be1967989
--- /dev/null
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql
@@ -0,0 +1,49 @@
+CREATE TABLE Singers (
+  SingerId INT64 NOT NULL,
+  FirstName STRING(MAX),
+  LastName STRING(MAX),
+  shardId STRING(20),
+  update_ts TIMESTAMP,
+  migration_shard_id STRING(50),
+) PRIMARY KEY(SingerId, migration_shard_id);
+
+
+CREATE TABLE sample_table (
+  id INT64 NOT NULL,
+  varchar_column STRING(20),
+  tinyint_column INT64,
+  text_column STRING(MAX),
+  date_column DATE,
+  smallint_column INT64,
+  mediumint_column INT64,
+  bigint_column INT64,
+  float_column FLOAT64,
+  double_column FLOAT64,
+  decimal_column NUMERIC,
+  datetime_column TIMESTAMP,
+  timestamp_column TIMESTAMP,
+  time_column STRING(MAX),
+  year_column STRING(MAX),
+  char_column STRING(10),
+  tinyblob_column BYTES(MAX),
+  tinytext_column STRING(MAX),
+  blob_column BYTES(MAX),
+  mediumblob_column BYTES(MAX),
+  mediumtext_column STRING(MAX),
+  longblob_column BYTES(MAX),
+  longtext_column STRING(MAX),
+  enum_column STRING(MAX),
+  bool_column BOOL,
+  binary_column BYTES(MAX),
+  varbinary_column BYTES(MAX),
+  update_ts TIMESTAMP,
+  migration_shard_id STRING(50),
+) PRIMARY KEY(id, migration_shard_id);
+
+
+
+CREATE CHANGE STREAM allstream
+  FOR ALL OPTIONS (
+  value_capture_type = 'NEW_ROW',
+  retention_period = '7d'
+);
\ No newline at end of file

From 73cc74c3140a3f8fa280143a9455e498981f44cd Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Tue, 14 May 2024 10:45:47 -0400
Subject: [PATCH 36/70] Add guidance on deprecating parameters

---
 contributor-docs/code-contributions.md | 27 ++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/contributor-docs/code-contributions.md b/contributor-docs/code-contributions.md
index d22e6b6596..dbcd309025 100644
--- a/contributor-docs/code-contributions.md
+++ b/contributor-docs/code-contributions.md
@@ -323,6 +323,33 @@ BulkInsertMethodOptions getBulkInsertMethod();
 Note: `order` is relevant for templates that can be used from the UI, and
 specify the relative order of parameters.
 
+#### Template Parameter Compatibility/Deprecation
+
+Dataflow Templates attempt to maintain full backwards compatibility with previous versions of the template, such that templates launched by automation or a schedule will continue to function indefinitely. If you need to deprecate a TemplateParameter, you can do so by adding the `@Deprecated` annotation to the template, for example:
+
+```java
+@TemplateParameter.Text(
+    order = 21,
+    optional = true,
+    description = "Deprecated, use XYZ instead",
+    helpText = "This parameter has been deprecated, please use XYZ parameter instead")
+@Deprecated
+BulkInsertMethodOptions getBulkInsertMethod();
+```
+
+You can optionally add a `hiddenUi` attribute to hide it in the UI. This will still keep it available via gcloud/REST API calls:
+
+```java
+@TemplateParameter.Text(
+    order = 21,
+    optional = true,
+    description = "Deprecated, use XYZ instead",
+    helpText = "This parameter has been deprecated, please use XYZ parameter instead",
+    hiddenUi = true)
+@Deprecated
+BulkInsertMethodOptions getBulkInsertMethod();
+```
+
 #### @TemplateIntegrationTest Annotation
 
 This annotation should be used by classes that are used for integration tests of

From 60d9dd742ffa88427c92d4ecad3cf5a347438996 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Tue, 14 May 2024 16:51:03 -0400
Subject: [PATCH 37/70] Add more guidance on adding new templates

---
 .../add-flex-template.md                      |  2 +-
 .../add-load-test.md                          |  2 +-
 contributor-docs/code-contributions.md        | 24 ++++++++++++++++++
 contributor-docs/maintainers-guide.md         |  5 ++--
 tutorials/README.md                           | 25 -------------------
 5 files changed, 29 insertions(+), 29 deletions(-)
 rename tutorials/flex-template.md => contributor-docs/add-flex-template.md (99%)
 rename tutorials/load-test.md => contributor-docs/add-load-test.md (99%)
 delete mode 100644 tutorials/README.md

diff --git a/tutorials/flex-template.md b/contributor-docs/add-flex-template.md
similarity index 99%
rename from tutorials/flex-template.md
rename to contributor-docs/add-flex-template.md
index 8646ca5ef0..ab77d1f8e3 100644
--- a/tutorials/flex-template.md
+++ b/contributor-docs/add-flex-template.md
@@ -1,4 +1,4 @@
-# Flex Template Tutorial
+# Adding a Flex Template
 
 ## Overview
 
diff --git a/tutorials/load-test.md b/contributor-docs/add-load-test.md
similarity index 99%
rename from tutorials/load-test.md
rename to contributor-docs/add-load-test.md
index 6b04e7f275..a1dd508806 100644
--- a/tutorials/load-test.md
+++ b/contributor-docs/add-load-test.md
@@ -1,4 +1,4 @@
-# Load Test Tutorial
+# Adding a Load Test
 
 ## Overview
 
diff --git a/contributor-docs/code-contributions.md b/contributor-docs/code-contributions.md
index dbcd309025..b50aea0849 100644
--- a/contributor-docs/code-contributions.md
+++ b/contributor-docs/code-contributions.md
@@ -243,6 +243,30 @@ The parameter `-Dtest=` can be given to test a single class (e.g., `-Dtest=Pubsu
 
 The same happens when the test is executed from an IDE, just make sure to add the parameters `-Dproject=`, `-DartifactBucket=` and `-Dregion=` as program or VM arguments.
 
+### Running Load Tests
+
+For information on adding and running load tests, see [Adding a Load Test](./add-load-test.md).
+
+### Adding New Templates
+
+If you are interested in introducing a new template, please file an issue using the [Google Issue Tracker](https://issuetracker.google.com/issues/new?component=187168&template=0) before doing so. You need approval before moving forward. Any new templates must be flex templates in the v2 directory.
+
+Once you have approval, follow these steps from the root of the repo to get started on your template.
+
+First, install the [maven plugin](#templates-plugin), then create a subdirectory in the repository: `v2/<name-of-your-template>`. `<name-of-your-template>` should follow the pattern `<source>-to-<sink>`. From there, you can follow the steps in [Adding a Flex Template](./add-flex-template.md) to develop your template.
+
+All new templates must comply with the following guidance:
+
+- [ ] Template addition has been approved by the core Dataflow Templates team.
+- [ ] The template must be a Flex Template located in the `v2/<template name>` directory
+- [ ] The template does not contain a duplicated/fork of a Beam IO.
+- [ ] The template contains integration tests.
+- [ ] The template contains load tests.
+- [ ] All template parameters are documented, and documentation has been auto-generated.
+- [ ] Terraform modules have been autogenerated.
+- [ ] For partner templates (Templates contributed by other companies), support partnership agreements and clear escalation paths must exist. Please reach out to the Dataflow team directly to get these in place before contributing a template.
+- [ ] For templates contributed by Googlers, go/support-dataflow-playbook/templates-triaging should list contact / on-call information from that team.
+
 ## Metadata Annotations
 
 A template requires more information than just a name and description. For
diff --git a/contributor-docs/maintainers-guide.md b/contributor-docs/maintainers-guide.md
index f6cdbe7be5..ff161a8e36 100644
--- a/contributor-docs/maintainers-guide.md
+++ b/contributor-docs/maintainers-guide.md
@@ -33,8 +33,9 @@ must be maintained across all Templates:
 
 ### Introducing New Templates
 
-If you are interested in introducing a new template, please send an email to the core Dataflow Templates
-team before doing so. Any new templates must be flex templates in the v2 directory
+If you are interested in introducing a new template, please file an issue using the [Google Issue Tracker](https://issuetracker.google.com/issues/new?component=187168&template=0) before doing so. Any new templates must be flex templates in the v2 directory.
+
+For documentation on adding new templates, see the [code contribution guide](./code-contributions.md).
 
 ### Forking Beam Code
 
diff --git a/tutorials/README.md b/tutorials/README.md
deleted file mode 100644
index 81344b99d8..0000000000
--- a/tutorials/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Tutorials
-
-## Overview
-
-These tutorials will guide you through the basic steps of developing a Dataflow
-Template.
-
-## General Tips
-
-1.  To avoid surprises in billing from leftover artifacts, you may want to run
-    tutorials in a clean Google Cloud Project and then delete it when done with
-    the tutorials.
-2.  Tutorials will likely require access to different Google Cloud resources. Be
-    sure to
-    [enable the necessary APIs](https://cloud.google.com/endpoints/docs/openapi/enable-api),
-    which should be listed in the tutorial.
-3.  Please avoid putting in a PR for the code created in a tutorial. We do,
-    however, appreciate PRs fixing mistakes and offering improvements.
-
-## Existing Tutorials
-
-*   [Flex Templates](flex-template.md): Develop a simple Flex Template and run
-    it on Google Cloud.
-*   [Load Tests](load-test.md): Write a load test for an Apache Beam pipeline on
-    Dataflow and gather performance metrics.

From afedb2568e8a4bb0e9cbee12b55a7e7e7dd1c64c Mon Sep 17 00:00:00 2001
From: djagaluru <djagaluru@google.com>
Date: Wed, 15 May 2024 06:35:18 +0000
Subject: [PATCH 38/70] GcsToSourceDB template integration test

---
 v2/gcs-to-sourcedb/pom.xml                    |  26 ++
 .../GCSToSourceDbWithoutReaderIT.java         | 225 ++++++++++++++++++
 .../GCSToSourceDbWithoutReaderIT/events.txt   |   1 +
 .../GCSToSourceDbWithoutReaderIT/session.json | 135 +++++++++++
 .../spanner-schema.sql                        |   4 +
 5 files changed, 391 insertions(+)
 create mode 100644 v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
 create mode 100644 v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/events.txt
 create mode 100644 v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/session.json
 create mode 100644 v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/spanner-schema.sql

diff --git a/v2/gcs-to-sourcedb/pom.xml b/v2/gcs-to-sourcedb/pom.xml
index d609c3a738..3e93800db0 100644
--- a/v2/gcs-to-sourcedb/pom.xml
+++ b/v2/gcs-to-sourcedb/pom.xml
@@ -74,5 +74,31 @@
             <version>${junit.version}</version>
             <scope>test</scope>
         </dependency>
+
+        <!-- Test dependencies -->
+        <dependency>
+            <groupId>com.google.cloud.teleport</groupId>
+            <artifactId>it-google-cloud-platform</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.google.cloud.teleport</groupId>
+            <artifactId>it-conditions</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.beam</groupId>
+            <artifactId>beam-it-jdbc</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <!--        TODO - Remove when https://github.com/apache/beam/pull/29732 is released. -->
+        <dependency>
+            <groupId>com.google.cloud.teleport</groupId>
+            <artifactId>it-jdbc</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 </project>
diff --git a/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
new file mode 100644
index 0000000000..ec34eb820a
--- /dev/null
+++ b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
+
+import com.google.cloud.teleport.metadata.SkipDirectRunnerTest;
+import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
+import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard;
+import com.google.common.io.Resources;
+import com.google.gson.Gson;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.it.common.PipelineLauncher;
+import org.apache.beam.it.common.PipelineLauncher.LaunchConfig;
+import org.apache.beam.it.common.utils.PipelineUtils;
+import org.apache.beam.it.common.utils.ResourceManagerUtils;
+import org.apache.beam.it.gcp.TemplateTestBase;
+import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
+import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.apache.beam.it.jdbc.CustomMySQLResourceManager;
+import org.apache.beam.it.jdbc.JDBCResourceManager;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Integration test for {@link GCSToSourceDb} Flex template without launching reader job. */
+@Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class})
+@TemplateIntegrationTest(GCSToSourceDb.class)
+@RunWith(JUnit4.class)
+public class GCSToSourceDbWithoutReaderIT extends TemplateTestBase {
+
+  private static final Logger LOG = LoggerFactory.getLogger(GCSToSourceDbWithoutReaderIT.class);
+
+  private static final String SPANNER_DDL_RESOURCE =
+      "GCSToSourceDbWithoutReaderIT/spanner-schema.sql";
+
+  private static final String SESSION_FILE_RESOURSE = "GCSToSourceDbWithoutReaderIT/session.json";
+
+  private static final String TABLE = "Users";
+  private static HashSet<GCSToSourceDbWithoutReaderIT> testInstances = new HashSet<>();
+  private static PipelineLauncher.LaunchInfo jobInfo;
+  public static SpannerResourceManager spannerResourceManager;
+  private static SpannerResourceManager spannerMetadataResourceManager;
+  private static CustomMySQLResourceManager jdbcResourceManager;
+  private static GcsResourceManager gcsResourceManager;
+
+  /**
+   * Setup resource managers and Launch dataflow job once during the execution of this test class.
+   *
+   * @throws IOException
+   */
+  @Before
+  public void setUp() throws IOException {
+    skipBaseCleanup = true;
+    synchronized (GCSToSourceDbWithoutReaderIT.class) {
+      testInstances.add(this);
+      if (jobInfo == null) {
+        spannerResourceManager = createSpannerDatabase(SPANNER_DDL_RESOURCE);
+        spannerMetadataResourceManager = createSpannerMetadataDatabase();
+
+        jdbcResourceManager = CustomMySQLResourceManager.builder(testName).build();
+        createMySQLSchema(jdbcResourceManager);
+
+        gcsResourceManager =
+            GcsResourceManager.builder(artifactBucketName, getClass().getSimpleName(), credentials)
+                .build();
+        createAndUploadShardConfigToGcs(gcsResourceManager, jdbcResourceManager);
+        gcsResourceManager.uploadArtifact(
+            "input/session.json", Resources.getResource(SESSION_FILE_RESOURSE).getPath());
+
+        launchWriterDataflowJob();
+      }
+    }
+  }
+
+  /**
+   * Cleanup dataflow job and all the resources and resource managers.
+   *
+   * @throws IOException
+   */
+  @AfterClass
+  public static void cleanUp() throws IOException {
+    for (GCSToSourceDbWithoutReaderIT instance : testInstances) {
+      instance.tearDownBase();
+    }
+    ResourceManagerUtils.cleanResources(
+        spannerResourceManager,
+        jdbcResourceManager,
+        spannerMetadataResourceManager,
+        gcsResourceManager);
+  }
+
+  @Test
+  public void testGCSToSource() throws IOException, InterruptedException {
+    assertThatPipeline(jobInfo).isRunning();
+    // Write events to GCS
+    gcsClient.uploadArtifact(
+        "cdc/Shard1/2024-05-13T08:43:10.000Z-2024-05-13T08:43:20.000Z-pane-0-last-0-of-1.txt",
+        Resources.getResource("GCSToSourceDbWithoutReaderIT/events.txt").getPath());
+
+    // Assert events on Mysql
+    assertRowInMySQL();
+  }
+
+  private void assertRowInMySQL() throws InterruptedException {
+    long rowCount = 0;
+    for (int i = 0; rowCount != 1 && i < 60; ++i) {
+      rowCount = jdbcResourceManager.getRowCount(TABLE);
+      LOG.info("Row count = {}, Waiting for 30s if row count not = 1", rowCount);
+      Thread.sleep(10000);
+    }
+    assertThat(rowCount).isEqualTo(1);
+    List<Map<String, Object>> rows = jdbcResourceManager.readTable(TABLE);
+    assertThat(rows).hasSize(1);
+    assertThat(rows.get(0).get("id")).isEqualTo(1);
+    assertThat(rows.get(0).get("name")).isEqualTo("FF");
+  }
+
+  private SpannerResourceManager createSpannerDatabase(String spannerDdlResourceFile)
+      throws IOException {
+    SpannerResourceManager spannerResourceManager =
+        SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION)
+            .maybeUseStaticInstance()
+            .build(); // DB name is appended with prefix to avoid clashes
+    String ddl =
+        String.join(
+            " ",
+            Resources.readLines(
+                Resources.getResource(spannerDdlResourceFile), StandardCharsets.UTF_8));
+    ddl = ddl.trim();
+    String[] ddls = ddl.split(";");
+    for (String d : ddls) {
+      if (!d.isBlank()) {
+        spannerResourceManager.executeDdlStatement(d);
+      }
+    }
+    return spannerResourceManager;
+  }
+
+  private SpannerResourceManager createSpannerMetadataDatabase() throws IOException {
+    SpannerResourceManager spannerMetadataResourceManager =
+        SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION)
+            .maybeUseStaticInstance()
+            .build(); // DB name is appended with prefix to avoid clashes
+    String dummy = "create table t1(id INT64 ) primary key(id)";
+    spannerMetadataResourceManager.executeDdlStatement(dummy);
+    return spannerMetadataResourceManager;
+  }
+
+  private void createMySQLSchema(CustomMySQLResourceManager jdbcResourceManager) {
+    HashMap<String, String> columns = new HashMap<>();
+    columns.put("id", "INT NOT NULL");
+    columns.put("name", "VARCHAR(25)");
+    JDBCResourceManager.JDBCSchema schema = new JDBCResourceManager.JDBCSchema(columns, "id");
+
+    jdbcResourceManager.createTable(TABLE, schema);
+  }
+
+  private void launchWriterDataflowJob() throws IOException {
+    Map<String, String> params =
+        new HashMap<>() {
+          {
+            put("sessionFilePath", getGcsPath("input/session.json"));
+            put("spannerProjectId", PROJECT);
+            put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
+            put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
+            put("sourceShardsFilePath", getGcsPath("input/shard.json"));
+            put("runIdentifier", "run1");
+            put("GCSInputDirectoryPath", getGcsPath("cdc"));
+            put("startTimestamp", "2024-05-13T08:43:10.000Z");
+            put("windowDuration", "10s");
+          }
+        };
+    String jobName = PipelineUtils.createJobName(testName);
+    LaunchConfig.Builder options = LaunchConfig.builder(jobName, specPath);
+    options.setParameters(params);
+    // Run
+    jobInfo = launchTemplate(options, false);
+  }
+
+  private void createAndUploadShardConfigToGcs(
+      GcsResourceManager gcsResourceManager, CustomMySQLResourceManager jdbcResourceManager)
+      throws IOException {
+    Shard shard = new Shard();
+    shard.setLogicalShardId("Shard1");
+    shard.setUser(jdbcResourceManager.getUsername());
+    shard.setHost(jdbcResourceManager.getHost());
+    shard.setPassword(jdbcResourceManager.getPassword());
+    shard.setPort(String.valueOf(jdbcResourceManager.getPort()));
+    shard.setDbName(jdbcResourceManager.getDatabaseName());
+    JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject();
+    jsObj.remove("secretManagerUri"); // remove field secretManagerUri
+    JsonArray ja = new JsonArray();
+    ja.add(jsObj);
+    String shardFileContents = ja.toString();
+    LOG.info("Shard file contents: {}", shardFileContents);
+    gcsResourceManager.createArtifact("input/shard.json", shardFileContents);
+  }
+}
diff --git a/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/events.txt b/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/events.txt
new file mode 100644
index 0000000000..4eff491142
--- /dev/null
+++ b/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/events.txt
@@ -0,0 +1 @@
+{"commitTimestamp":{"seconds":1715589792,"nanos":536623000},"serverTransactionId":"MTc4NDg0Njc0NjU1NDc4MDEyNTQ\u003d","recordSequence":"00000000","tableName":"Users","mods":[{"keysJson":"{\"id\":\"1\"}","oldValuesJson":"{}","newValuesJson":"{\"name\":\"FF\"}"}],"modType":"INSERT","numberOfRecordsInTransaction":1,"transactionTag":""}
diff --git a/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/session.json b/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/session.json
new file mode 100644
index 0000000000..2bf1337d76
--- /dev/null
+++ b/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/session.json
@@ -0,0 +1,135 @@
+{
+  "SessionName": "NewSession",
+  "EditorName": "",
+  "DatabaseType": "mysql",
+  "DatabaseName": "session_it",
+  "Dialect": "google_standard_sql",
+  "Notes": null,
+  "Tags": null,
+  "SpSchema": {
+    "t136": {
+      "Name": "Users",
+      "ColIds": [
+        "c142",
+        "c143"
+      ],
+      "ShardIdColumn": "",
+      "ColDefs": {
+        "c142": {
+          "Name": "id",
+          "T": {
+            "Name": "INT64",
+            "Len": 0,
+            "IsArray": false
+          },
+          "NotNull": true,
+          "Comment": "From: id int",
+          "Id": "c142"
+        },
+        "c143": {
+          "Name": "name",
+          "T": {
+            "Name": "STRING",
+            "Len": 25,
+            "IsArray": false
+          },
+          "NotNull": false,
+          "Comment": "From: name varchar(25)",
+          "Id": "c143"
+        }
+      },
+      "PrimaryKeys": [
+        {
+          "ColId": "c142",
+          "Desc": false,
+          "Order": 1
+        }
+      ],
+      "ForeignKeys": null,
+      "Indexes": null,
+      "ParentId": "",
+      "Comment": "Spanner schema for source table Category",
+      "Id": "t136"
+    }
+  },
+  "SyntheticPKeys": {},
+  "SrcSchema": {
+    "t136": {
+      "Name": "Users",
+      "Schema": "eventsit",
+      "ColIds": [
+        "c142",
+        "c143"
+      ],
+      "ColDefs": {
+        "c142": {
+          "Name": "id",
+          "Type": {
+            "Name": "int",
+            "ArrayBounds": null
+          },
+          "NotNull": true,
+          "Ignored": {
+            "Check": false,
+            "Identity": false,
+            "Default": false,
+            "Exclusion": false,
+            "ForeignKey": false,
+            "AutoIncrement": false
+          },
+          "Id": "c142"
+        },
+        "c143": {
+          "Name": "name",
+          "Type": {
+            "Name": "varchar",
+            "Mods": [
+              25
+            ],
+            "ArrayBounds": null
+          },
+          "NotNull": false,
+          "Ignored": {
+            "Check": false,
+            "Identity": false,
+            "Default": false,
+            "Exclusion": false,
+            "ForeignKey": false,
+            "AutoIncrement": false
+          },
+          "Id": "c143"
+        }
+      },
+      "PrimaryKeys": [
+        {
+          "ColId": "c142",
+          "Desc": false,
+          "Order": 1
+        }
+      ],
+      "ForeignKeys": null,
+      "Indexes": null,
+      "Id": "t136"
+    }
+  },
+  "SchemaIssues": {
+    "t136": {
+      "ColumnLevelIssues": {
+        "c142": [
+          14
+        ],
+        "c143": []
+      },
+      "TableLevelIssues": null
+    }
+  },
+  "Location": {},
+  "TimezoneOffset": "+00:00",
+  "SpDialect": "google_standard_sql",
+  "UniquePKey": {},
+  "Rules": [],
+  "IsSharded": false,
+  "SpRegion": "",
+  "ResourceValidation": false,
+  "UI": false
+}
\ No newline at end of file
diff --git a/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/spanner-schema.sql b/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/spanner-schema.sql
new file mode 100644
index 0000000000..b534a21f64
--- /dev/null
+++ b/v2/gcs-to-sourcedb/src/test/resources/GCSToSourceDbWithoutReaderIT/spanner-schema.sql
@@ -0,0 +1,4 @@
+CREATE TABLE IF NOT EXISTS Users (
+    id INT64 NOT NULL,
+    name STRING(25),
+) PRIMARY KEY(id);

From 83f4071fda8e79f3a73e566cda3c58ba27a4842a Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 12:35:55 +0530
Subject: [PATCH 39/70] added integration test for rev replication reader job -
 custom shard id

---
 ...SpannerChangeStreamToGcsCustomShardIT.java | 296 ++++++++++++++++++
 .../SpannerChangeStreamToGcsSimpleIT.java     |   4 +-
 .../session.json                              | 161 ++++++++++
 .../spanner-schema.sql                        |  11 +
 .../com/custom/CustomShardIdFetcherForIT.java |  44 +++
 5 files changed, 514 insertions(+), 2 deletions(-)
 create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
 create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/session.json
 create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/spanner-schema.sql
 create mode 100644 v2/spanner-custom-shard/src/main/java/com/custom/CustomShardIdFetcherForIT.java

diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
new file mode 100644
index 0000000000..f6e572be19
--- /dev/null
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import static org.apache.beam.it.gcp.artifacts.matchers.ArtifactAsserts.assertThatArtifacts;
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
+
+import com.google.cloud.spanner.Mutation;
+import com.google.cloud.teleport.metadata.SkipDirectRunnerTest;
+import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
+import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard;
+import com.google.common.io.Resources;
+import com.google.gson.Gson;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.util.*;
+import java.util.regex.Pattern;
+import org.apache.beam.it.common.PipelineLauncher;
+import org.apache.beam.it.common.PipelineOperator;
+import org.apache.beam.it.common.utils.IORedirectUtil;
+import org.apache.beam.it.common.utils.PipelineUtils;
+import org.apache.beam.it.conditions.ChainedConditionCheck;
+import org.apache.beam.it.gcp.TemplateTestBase;
+import org.apache.beam.it.gcp.artifacts.Artifact;
+import org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils;
+import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
+import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck;
+import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Integration test for multiple shard database with custom shard id. */
+@Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class})
+@TemplateIntegrationTest(SpannerChangeStreamsToShardedFileSink.class)
+@RunWith(JUnit4.class)
+public class SpannerChangeStreamToGcsCustomShardIT extends TemplateTestBase {
+  private static final Logger LOG = LoggerFactory.getLogger(SpannerChangeStreamToGcsSimpleIT.class);
+  private static SpannerResourceManager spannerResourceManager;
+  private static SpannerResourceManager spannerMetadataResourceManager;
+  private static HashSet<SpannerChangeStreamToGcsCustomShardIT> testInstances = new HashSet<>();
+  private static final String spannerDdl =
+      "SpannerChangeStreamToGcsCustomShardIT/spanner-schema.sql";
+  private static final String sessionFileResourceName =
+      "SpannerChangeStreamToGcsCustomShardIT/session.json";
+  private static PipelineLauncher.LaunchInfo jobInfo;
+  private static String spannerDatabaseName = "";
+  private static String spannerMetadataDatabaseName = "";
+  private static GcsResourceManager gcsResourceManager;
+
+  /**
+   * Does the following setup:
+   *
+   * <p>1. Creates a Spanner database with a given table 2. Creates a shard file with the connection
+   * details 3. The session file for the same is taken from the resources and uploaded to GCS 4.
+   * Places the session file and shard file in GCS 5. Creates the change stream in Spanner database
+   * 6. Creates the metadata database 7. Creates a JAR of v2/spanner-custom-shard and uploads it to
+   * GCS 8. Launches the job to read from Spanner and write to GCS
+   *
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Before
+  public void setUp() throws IOException, InterruptedException {
+    skipBaseCleanup = true;
+    synchronized (SpannerChangeStreamToGcsSimpleIT.class) {
+      testInstances.add(this);
+      if (jobInfo == null) {
+        createGcsResourceManager();
+        createSpannerDatabase();
+        createAndUploadShardConfigToGcs();
+        uploadSessionFileToGcs();
+        createSpannerMetadataDatabase();
+        createAndUploadJarToGcs();
+        launchReaderDataflowJob();
+      }
+    }
+  }
+
+  @Test
+  public void testMultiShardsRecordWrittenToGcsWithCustomShardId()
+      throws IOException, java.lang.InterruptedException {
+    // Construct a ChainedConditionCheck with below stages.
+    // 1. Wait for the metadata table to have the start time of reader job
+    // 2. Write 2 records per shard to Spanner
+    // 3. Wait on GCS to have the files
+    // 4. Match the PK in GCS with the PK written to Spanner
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    SpannerRowsCheck.builder(
+                            spannerMetadataResourceManager, "spanner_to_gcs_metadata")
+                        .setMinRows(1)
+                        .setMaxRows(1)
+                        .build()))
+            .build();
+    // Wait for conditions
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(10)), conditionCheck);
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+    // Perform writes to Spanner
+    writeSpannerDataForSingers(1, "one", "");
+    writeSpannerDataForSingers(2, "two", "");
+    writeSpannerDataForSingers(3, "three", "");
+    writeSpannerDataForSingers(4, "four", "");
+
+    // Assert file present in GCS with the needed data
+    assertFileContentsInGCSForMultipleShards();
+  }
+
+  private void writeSpannerDataForSingers(int singerId, String firstName, String shardId) {
+    // Write a single record to Spanner
+    Mutation m =
+        Mutation.newInsertOrUpdateBuilder("Singers")
+            .set("SingerId")
+            .to(singerId)
+            .set("FirstName")
+            .to(firstName)
+            .set("migration_shard_id")
+            .to(shardId)
+            .build();
+    spannerResourceManager.write(m);
+  }
+
+  private void assertFileContentsInGCSForMultipleShards() throws java.lang.InterruptedException {
+    List<Artifact> artifactsShardA = null;
+    List<Artifact> artifactsShardB = null;
+    Thread.sleep(
+        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
+    // at-least. If not present wait additional 3 minutes before failing
+    for (int i = 0; i < 10; i++) {
+      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
+      artifactsShardA =
+          gcsResourceManager.listArtifacts("output/testShardA/", Pattern.compile(".*\\.txt$"));
+      artifactsShardB =
+          gcsResourceManager.listArtifacts("output/testShardB/", Pattern.compile(".*\\.txt$"));
+
+      // Ideally both the mutations written to spanner per shard will commit within 10 seconds.
+      // But that does not guarantee that they will be in the same file, since they can commit
+      // within 1 second interval boundary
+      if (artifactsShardB.size() >= 1 && artifactsShardA.size() >= 1) {
+        break;
+      }
+    }
+    assertThatArtifacts(artifactsShardB).hasFiles();
+    assertThatArtifacts(artifactsShardA).hasFiles();
+    // checks that any of the artifact has the given content
+    assertThatArtifacts(artifactsShardB).hasContent("SingerId\\\":\\\"2");
+    assertThatArtifacts(artifactsShardB).hasContent("SingerId\\\":\\\"4");
+    assertThatArtifacts(artifactsShardA).hasContent("SingerId\\\":\\\"1");
+    assertThatArtifacts(artifactsShardA).hasContent("SingerId\\\":\\\"3");
+  }
+
+  private void createAndUploadShardConfigToGcs() throws IOException {
+    List<String> shardNames = new ArrayList<>();
+    shardNames.add("testShardA");
+    shardNames.add("testShardB");
+    JsonArray ja = new JsonArray();
+
+    for (String shardName : shardNames) {
+      Shard shard = new Shard();
+      shard.setLogicalShardId(shardName);
+      shard.setUser("dummy");
+      shard.setHost("dummy");
+      shard.setPassword("dummy");
+      shard.setPort("3306");
+      JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject();
+      jsObj.remove("secretManagerUri"); // remove field secretManagerUri
+      ja.add(jsObj);
+    }
+
+    String shardFileContents = ja.toString();
+    LOG.info("Shard file contents: {}", shardFileContents);
+    // -DartifactBucket has the bucket name
+    gcsResourceManager.createArtifact("input/shard.json", shardFileContents);
+  }
+
+  private void uploadSessionFileToGcs() throws IOException {
+    gcsResourceManager.uploadArtifact(
+        "input/session.json", Resources.getResource(sessionFileResourceName).getPath());
+  }
+
+  private void createSpannerMetadataDatabase() throws IOException {
+    spannerMetadataResourceManager =
+        SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION)
+            .maybeUseStaticInstance()
+            .build(); // DB name is appended with prefix to avoid clashes
+    String dummy = "create table t1(id INT64 ) primary key(id)";
+    spannerMetadataResourceManager.executeDdlStatement(dummy);
+    // needed to create separate metadata database
+    spannerMetadataDatabaseName = spannerMetadataResourceManager.getDatabaseId();
+  }
+
+  private void launchReaderDataflowJob() throws IOException {
+    // default parameters
+    Map<String, String> params =
+        new HashMap<>() {
+          {
+            put("sessionFilePath", getGcsFullPath("input/session.json"));
+            put("instanceId", spannerResourceManager.getInstanceId());
+            put("databaseId", spannerResourceManager.getDatabaseId());
+            put("spannerProjectId", PROJECT);
+            put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
+            put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
+            put("sourceShardsFilePath", getGcsFullPath("input/shard.json"));
+            put("changeStreamName", "allstream");
+            put("runIdentifier", "run1");
+            put("gcsOutputDirectory", getGcsFullPath("output"));
+            put("shardingCustomJarPath", getGcsFullPath("input/customShard.jar"));
+            put("shardingCustomClassName", "com.custom.CustomShardIdFetcherForIT");
+          }
+        };
+
+    // Construct template
+    String jobName = PipelineUtils.createJobName("rr-it");
+    // /-DunifiedWorker=true when using runner v2
+    PipelineLauncher.LaunchConfig.Builder options =
+        PipelineLauncher.LaunchConfig.builder(jobName, specPath);
+    options.setParameters(params);
+    options.addEnvironment("additionalExperiments", Collections.singletonList("use_runner_v2"));
+    // Run
+    jobInfo = launchTemplate(options, false);
+    assertThatPipeline(jobInfo).isRunning();
+  }
+
+  private void createSpannerDatabase() throws IOException {
+    spannerResourceManager =
+        SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION)
+            .maybeUseStaticInstance()
+            .build(); // DB name is appended with prefix to avoid clashes
+    String ddl =
+        String.join(
+            " ", Resources.readLines(Resources.getResource(spannerDdl), StandardCharsets.UTF_8));
+    ddl = ddl.trim();
+    String[] ddls = ddl.split(";");
+    for (String d : ddls) {
+      if (!d.isBlank()) {
+        spannerResourceManager.executeDdlStatement(d);
+      }
+    }
+    spannerDatabaseName = spannerResourceManager.getDatabaseId();
+  }
+
+  private void createAndUploadJarToGcs() throws IOException, InterruptedException {
+    String[] commands = {"cd ../spanner-custom-shard", "mvn install"};
+
+    // Join the commands with && to execute them sequentially
+    String[] shellCommand = {"/bin/bash", "-c", String.join(" && ", commands)};
+
+    Process exec = Runtime.getRuntime().exec(shellCommand);
+
+    IORedirectUtil.redirectLinesLog(exec.getInputStream(), LOG);
+    IORedirectUtil.redirectLinesLog(exec.getErrorStream(), LOG);
+
+    if (exec.waitFor() != 0) {
+      throw new RuntimeException("Error staging template, check Maven logs.");
+    }
+    gcsResourceManager.uploadArtifact(
+        "input/customShard.jar",
+        "../spanner-custom-shard/target/spanner-custom-shard-1.0-SNAPSHOT.jar");
+  }
+
+  private void createGcsResourceManager() {
+    gcsResourceManager =
+        GcsResourceManager.builder(artifactBucketName, getClass().getSimpleName(), credentials)
+            .build(); // DB name is appended with prefix to avoid clashes
+  }
+
+  private String getGcsFullPath(String artifactId) {
+    return ArtifactUtils.getFullGcsPath(
+        artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId);
+  }
+}
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
index 7fb40e231f..f4fbf989d0 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
@@ -84,7 +84,7 @@ public class SpannerChangeStreamToGcsSimpleIT extends TemplateTestBase {
    * <p>1. Creates a Spanner database with a given table 2. Creates a shard file with the connection
    * details 3. The session file for the same is taken from the resources and uploaded to GCS 4.
    * Places the session file and shard file in GCS 5. Creates the change stream in Spanner database
-   * 6. Creates the metadata database 8. Launches the job to read from Spanner and write to GCS
+   * 6. Creates the metadata database 7. Launches the job to read from Spanner and write to GCS
    *
    * @throws IOException
    */
@@ -272,7 +272,7 @@ public void testMultiShardsRecordWrittenToGcs()
       throws IOException, java.lang.InterruptedException {
     // Construct a ChainedConditionCheck with below stages.
     // 1. Wait for the metadata table to have the start time of reader job
-    // 2. Write a 2 records per shard to Spanner
+    // 2. Write 2 records per shard to Spanner
     // 3. Wait on GCS to have the files
     // 4. Match the PK in GCS with the PK written to Spanner
     ChainedConditionCheck conditionCheck =
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/session.json b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/session.json
new file mode 100644
index 0000000000..4abaef179b
--- /dev/null
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/session.json
@@ -0,0 +1,161 @@
+{
+  "SpSchema": {
+   "t1": {
+    "Name": "Singers",
+    "ColIds": [
+     "c3",
+     "c4",
+     "c36"
+    ],
+    "ShardIdColumn": "c36",
+    "ColDefs": {
+     "c3": {
+      "Name": "SingerId",
+      "T": {
+       "Name": "INT64",
+       "Len": 0,
+       "IsArray": false
+      },
+      "NotNull": true,
+      "Comment": "From: SingerId bigint(19)",
+      "Id": "c3"
+     },
+     "c36": {
+      "Name": "migration_shard_id",
+      "T": {
+       "Name": "STRING",
+       "Len": 50,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "",
+      "Id": "c36"
+     },
+     "c4": {
+      "Name": "FirstName",
+      "T": {
+       "Name": "STRING",
+       "Len": 9223372036854775807,
+       "IsArray": false
+      },
+      "NotNull": false,
+      "Comment": "From: FirstName text(65535)",
+      "Id": "c4"
+     }
+    },
+    "PrimaryKeys": [
+     {
+      "ColId": "c3",
+      "Desc": false,
+      "Order": 2
+     },
+     {
+      "ColId": "c36",
+      "Desc": false,
+      "Order": 1
+     }
+    ],
+    "ForeignKeys": null,
+    "Indexes": null,
+    "ParentId": "",
+    "Comment": "Spanner schema for source table Singers",
+    "Id": "t1"
+   }
+  },
+  "SyntheticPKeys": {},
+  "SrcSchema": {
+   "t1": {
+    "Name": "Singers",
+    "Schema": "alltypes",
+    "ColIds": [
+     "c3",
+     "c4"
+    ],
+    "ColDefs": {
+     "c3": {
+      "Name": "SingerId",
+      "Type": {
+       "Name": "bigint",
+       "Mods": [
+        19
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": true,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c3"
+     },
+     "c4": {
+      "Name": "FirstName",
+      "Type": {
+       "Name": "text",
+       "Mods": [
+        65535
+       ],
+       "ArrayBounds": null
+      },
+      "NotNull": false,
+      "Ignored": {
+       "Check": false,
+       "Identity": false,
+       "Default": false,
+       "Exclusion": false,
+       "ForeignKey": false,
+       "AutoIncrement": false
+      },
+      "Id": "c4"
+     }
+    },
+    "PrimaryKeys": [
+     {
+      "ColId": "c3",
+      "Desc": false,
+      "Order": 1
+     }
+    ],
+    "ForeignKeys": null,
+    "Indexes": null,
+    "Id": "t1"
+   }
+  },
+  "SchemaIssues": {
+   "t1": {
+    "ColumnLevelIssues": {
+     "c3": [],
+     "c36": [
+      28
+     ],
+     "c4": []
+    },
+    "TableLevelIssues": null
+   }
+  },
+  "Location": {},
+  "TimezoneOffset": "+00:00",
+  "SpDialect": "google_standard_sql",
+  "UniquePKey": {},
+  "Rules": [
+   {
+    "Id": "r38",
+    "Name": "r38",
+    "Type": "add_shard_id_primary_key",
+    "ObjectType": "",
+    "AssociatedObjects": "All Tables",
+    "Enabled": true,
+    "Data": {
+     "AddedAtTheStart": true
+    },
+    "AddedOn": {
+     "TimeOffset": null
+    }
+   }
+  ],
+  "IsSharded": true
+ }
\ No newline at end of file
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/spanner-schema.sql b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/spanner-schema.sql
new file mode 100644
index 0000000000..84334cc008
--- /dev/null
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsCustomShardIT/spanner-schema.sql
@@ -0,0 +1,11 @@
+CREATE TABLE Singers (
+  SingerId INT64 NOT NULL,
+  FirstName STRING(MAX),
+  migration_shard_id STRING(50),
+) PRIMARY KEY(SingerId, migration_shard_id);
+
+CREATE CHANGE STREAM allstream
+  FOR ALL OPTIONS (
+  value_capture_type = 'NEW_ROW',
+  retention_period = '7d'
+);
\ No newline at end of file
diff --git a/v2/spanner-custom-shard/src/main/java/com/custom/CustomShardIdFetcherForIT.java b/v2/spanner-custom-shard/src/main/java/com/custom/CustomShardIdFetcherForIT.java
new file mode 100644
index 0000000000..c6612ed75f
--- /dev/null
+++ b/v2/spanner-custom-shard/src/main/java/com/custom/CustomShardIdFetcherForIT.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.custom;
+
+import com.google.cloud.teleport.v2.spanner.utils.IShardIdFetcher;
+import com.google.cloud.teleport.v2.spanner.utils.ShardIdRequest;
+import com.google.cloud.teleport.v2.spanner.utils.ShardIdResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class CustomShardIdFetcherForIT implements IShardIdFetcher {
+  private static final Logger LOG = LoggerFactory.getLogger(CustomShardIdFetcher.class);
+
+  @Override
+  public void init(String parameters) {
+    LOG.info("init called with {}", parameters);
+  }
+
+  @Override
+  public ShardIdResponse getShardId(ShardIdRequest shardIdRequest) {
+    LOG.info("Returning custom sharding function");
+    ShardIdResponse shardIdResponse = new ShardIdResponse();
+    Long singerId = (Long) shardIdRequest.getSpannerRecord().get("SingerId");
+    if (singerId % 2 == 0) {
+      shardIdResponse.setLogicalShardId("testShardB");
+    } else {
+      shardIdResponse.setLogicalShardId("testShardA");
+    }
+    return shardIdResponse;
+  }
+}

From cd72e42931148a4b653a164d50970330840367aa Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 13:15:44 +0530
Subject: [PATCH 40/70] fixed checkstyle violation

---
 .../templates/SpannerChangeStreamToGcsCustomShardIT.java   | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
index f6e572be19..1b1cad1300 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
@@ -30,7 +30,12 @@
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.time.Duration;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
 import java.util.regex.Pattern;
 import org.apache.beam.it.common.PipelineLauncher;
 import org.apache.beam.it.common.PipelineOperator;

From c10c2e88c0500ecf01e9b93cf5126dfb2797390e Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 14:55:18 +0530
Subject: [PATCH 41/70] addressing some comemnts

---
 .../storage/conditions/GCSArtifactsCheck.java |  90 +++++++++++++
 .../gcp/storage/conditions/package-info.java  |  20 +++
 ...SpannerChangeStreamToGcsCustomShardIT.java | 122 ++++++------------
 .../SpannerChangeStreamToGcsITBase.java       | 109 ++++++++++++++++
 .../SpannerChangeStreamToGcsSimpleIT.java     |  74 +++--------
 5 files changed, 273 insertions(+), 142 deletions(-)
 create mode 100644 it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
 create mode 100644 it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/package-info.java
 create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java

diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
new file mode 100644
index 0000000000..7fb49c2365
--- /dev/null
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
@@ -0,0 +1,90 @@
+package org.apache.beam.it.gcp.storage.conditions;
+
+import com.google.auto.value.AutoValue;
+import java.util.List;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
+import org.apache.beam.it.conditions.ConditionCheck;
+import org.apache.beam.it.gcp.artifacts.Artifact;
+import org.apache.beam.it.gcp.storage.GcsResourceManager;
+
+@AutoValue
+public abstract class GCSArtifactsCheck extends ConditionCheck {
+  abstract GcsResourceManager gcsResourceManager();
+
+  abstract String prefix();
+
+  abstract Pattern regex();
+
+  abstract Integer minSize();
+
+  @Nullable
+  abstract Integer maxSize();
+
+  @Override
+  public String getDescription() {
+    if (maxSize() != null) {
+      return String.format(
+          "GCS resource check if folder %s with regex %s has between %d and %d artifacts",
+          prefix(), regex(), minSize(), maxSize());
+    }
+    return String.format(
+        "GCS resource check if folder %s with regex %s has %d artifacts",
+        prefix(), regex(), minSize());
+  }
+
+  @Override
+  public CheckResult check() {
+    List<Artifact> artifacts = gcsResourceManager().listArtifacts(prefix(), regex());
+    if (artifacts.size() < minSize()) {
+      return new CheckResult(
+          false,
+          String.format("Expected %d artifacts but has only %d", minSize(), artifacts.size()));
+    }
+    if (maxSize() != null && artifacts.size() > maxSize()) {
+      return new CheckResult(
+          false,
+          String.format("Expected up to %d artifacts but found %d", maxSize(), artifacts.size()));
+    }
+
+    if (maxSize() != null) {
+      return new CheckResult(
+          true,
+          String.format(
+              "Expected between %d and %d artifacts and found %d",
+              minSize(), maxSize(), artifacts.size()));
+    }
+
+    return new CheckResult(
+        true, String.format("Expected at least %d rows and found %d", minSize(), artifacts.size()));
+  }
+
+  public static GCSArtifactsCheck.Builder builder(
+      GcsResourceManager resourceManager, String prefix) {
+    return new AutoValue_GCSArtifactsCheck.Builder()
+        .setGcsResourceManager(resourceManager)
+        .setPrefix(prefix);
+  }
+
+  /** Builder for {@link GCSArtifactsCheck}. */
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    public abstract GCSArtifactsCheck.Builder setGcsResourceManager(
+        GcsResourceManager gcsResourceManager);
+
+    public abstract GCSArtifactsCheck.Builder setPrefix(String prefix);
+
+    public abstract GCSArtifactsCheck.Builder setRegex(Pattern regex);
+
+    public abstract GCSArtifactsCheck.Builder setMinSize(Integer minSize);
+
+    public abstract GCSArtifactsCheck.Builder setMaxSize(Integer maxSize);
+
+    abstract GCSArtifactsCheck autoBuild();
+
+    public GCSArtifactsCheck build() {
+      return autoBuild();
+    }
+  }
+}
diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/package-info.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/package-info.java
new file mode 100644
index 0000000000..43d1101d6f
--- /dev/null
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Package that contains reusable GCS conditions. */
+package org.apache.beam.it.gcp.storage.conditions;
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
index 1b1cad1300..1832f70a68 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
@@ -23,12 +23,10 @@
 import com.google.cloud.teleport.metadata.SkipDirectRunnerTest;
 import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
 import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard;
-import com.google.common.io.Resources;
 import com.google.gson.Gson;
 import com.google.gson.JsonArray;
 import com.google.gson.JsonObject;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -39,15 +37,14 @@
 import java.util.regex.Pattern;
 import org.apache.beam.it.common.PipelineLauncher;
 import org.apache.beam.it.common.PipelineOperator;
-import org.apache.beam.it.common.utils.IORedirectUtil;
 import org.apache.beam.it.common.utils.PipelineUtils;
+import org.apache.beam.it.common.utils.ResourceManagerUtils;
 import org.apache.beam.it.conditions.ChainedConditionCheck;
-import org.apache.beam.it.gcp.TemplateTestBase;
 import org.apache.beam.it.gcp.artifacts.Artifact;
-import org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils;
 import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
 import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck;
 import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -60,19 +57,18 @@
 @Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class})
 @TemplateIntegrationTest(SpannerChangeStreamsToShardedFileSink.class)
 @RunWith(JUnit4.class)
-public class SpannerChangeStreamToGcsCustomShardIT extends TemplateTestBase {
+public class SpannerChangeStreamToGcsCustomShardIT extends SpannerChangeStreamToGcsITBase {
   private static final Logger LOG = LoggerFactory.getLogger(SpannerChangeStreamToGcsSimpleIT.class);
+  private static HashSet<SpannerChangeStreamToGcsCustomShardIT> testInstances = new HashSet<>();
+
+  private static GcsResourceManager gcsResourceManager;
   private static SpannerResourceManager spannerResourceManager;
   private static SpannerResourceManager spannerMetadataResourceManager;
-  private static HashSet<SpannerChangeStreamToGcsCustomShardIT> testInstances = new HashSet<>();
   private static final String spannerDdl =
       "SpannerChangeStreamToGcsCustomShardIT/spanner-schema.sql";
   private static final String sessionFileResourceName =
       "SpannerChangeStreamToGcsCustomShardIT/session.json";
   private static PipelineLauncher.LaunchInfo jobInfo;
-  private static String spannerDatabaseName = "";
-  private static String spannerMetadataDatabaseName = "";
-  private static GcsResourceManager gcsResourceManager;
 
   /**
    * Does the following setup:
@@ -92,20 +88,31 @@ public void setUp() throws IOException, InterruptedException {
     synchronized (SpannerChangeStreamToGcsSimpleIT.class) {
       testInstances.add(this);
       if (jobInfo == null) {
-        createGcsResourceManager();
-        createSpannerDatabase();
+        gcsResourceManager = createGcsResourceManager(getClass().getSimpleName());
+        spannerResourceManager = createSpannerResourceManager();
+        createSpannerDatabase(spannerResourceManager, spannerDdl);
+        uploadSessionFileToGcs(gcsResourceManager, sessionFileResourceName);
+        spannerMetadataResourceManager = createSpannerMetadataResourceManager();
+        createSpannerMetadataDatabase(spannerMetadataResourceManager);
+        createAndUploadJarToGcs(gcsResourceManager);
         createAndUploadShardConfigToGcs();
-        uploadSessionFileToGcs();
-        createSpannerMetadataDatabase();
-        createAndUploadJarToGcs();
         launchReaderDataflowJob();
       }
     }
   }
 
+  @AfterClass
+  public static void cleanUp() throws IOException {
+    for (SpannerChangeStreamToGcsCustomShardIT instance : testInstances) {
+      instance.tearDownBase();
+    }
+    ResourceManagerUtils.cleanResources(
+        spannerResourceManager, spannerMetadataResourceManager, gcsResourceManager);
+  }
+
   @Test
   public void testMultiShardsRecordWrittenToGcsWithCustomShardId()
-      throws IOException, java.lang.InterruptedException {
+      throws java.lang.InterruptedException {
     // Construct a ChainedConditionCheck with below stages.
     // 1. Wait for the metadata table to have the start time of reader job
     // 2. Write 2 records per shard to Spanner
@@ -193,7 +200,6 @@ private void createAndUploadShardConfigToGcs() throws IOException {
       shard.setPassword("dummy");
       shard.setPort("3306");
       JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject();
-      jsObj.remove("secretManagerUri"); // remove field secretManagerUri
       ja.add(jsObj);
     }
 
@@ -203,38 +209,32 @@ private void createAndUploadShardConfigToGcs() throws IOException {
     gcsResourceManager.createArtifact("input/shard.json", shardFileContents);
   }
 
-  private void uploadSessionFileToGcs() throws IOException {
-    gcsResourceManager.uploadArtifact(
-        "input/session.json", Resources.getResource(sessionFileResourceName).getPath());
-  }
-
-  private void createSpannerMetadataDatabase() throws IOException {
-    spannerMetadataResourceManager =
-        SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION)
-            .maybeUseStaticInstance()
-            .build(); // DB name is appended with prefix to avoid clashes
-    String dummy = "create table t1(id INT64 ) primary key(id)";
-    spannerMetadataResourceManager.executeDdlStatement(dummy);
-    // needed to create separate metadata database
-    spannerMetadataDatabaseName = spannerMetadataResourceManager.getDatabaseId();
-  }
-
   private void launchReaderDataflowJob() throws IOException {
     // default parameters
     Map<String, String> params =
         new HashMap<>() {
           {
-            put("sessionFilePath", getGcsFullPath("input/session.json"));
+            put(
+                "sessionFilePath",
+                getGcsFullPath(
+                    gcsResourceManager, "input/session.json", getClass().getSimpleName()));
             put("instanceId", spannerResourceManager.getInstanceId());
             put("databaseId", spannerResourceManager.getDatabaseId());
             put("spannerProjectId", PROJECT);
             put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
             put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
-            put("sourceShardsFilePath", getGcsFullPath("input/shard.json"));
+            put(
+                "sourceShardsFilePath",
+                getGcsFullPath(gcsResourceManager, "input/shard.json", getClass().getSimpleName()));
             put("changeStreamName", "allstream");
             put("runIdentifier", "run1");
-            put("gcsOutputDirectory", getGcsFullPath("output"));
-            put("shardingCustomJarPath", getGcsFullPath("input/customShard.jar"));
+            put(
+                "gcsOutputDirectory",
+                getGcsFullPath(gcsResourceManager, "output", getClass().getSimpleName()));
+            put(
+                "shardingCustomJarPath",
+                getGcsFullPath(
+                    gcsResourceManager, "input/customShard.jar", getClass().getSimpleName()));
             put("shardingCustomClassName", "com.custom.CustomShardIdFetcherForIT");
           }
         };
@@ -250,52 +250,4 @@ private void launchReaderDataflowJob() throws IOException {
     jobInfo = launchTemplate(options, false);
     assertThatPipeline(jobInfo).isRunning();
   }
-
-  private void createSpannerDatabase() throws IOException {
-    spannerResourceManager =
-        SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION)
-            .maybeUseStaticInstance()
-            .build(); // DB name is appended with prefix to avoid clashes
-    String ddl =
-        String.join(
-            " ", Resources.readLines(Resources.getResource(spannerDdl), StandardCharsets.UTF_8));
-    ddl = ddl.trim();
-    String[] ddls = ddl.split(";");
-    for (String d : ddls) {
-      if (!d.isBlank()) {
-        spannerResourceManager.executeDdlStatement(d);
-      }
-    }
-    spannerDatabaseName = spannerResourceManager.getDatabaseId();
-  }
-
-  private void createAndUploadJarToGcs() throws IOException, InterruptedException {
-    String[] commands = {"cd ../spanner-custom-shard", "mvn install"};
-
-    // Join the commands with && to execute them sequentially
-    String[] shellCommand = {"/bin/bash", "-c", String.join(" && ", commands)};
-
-    Process exec = Runtime.getRuntime().exec(shellCommand);
-
-    IORedirectUtil.redirectLinesLog(exec.getInputStream(), LOG);
-    IORedirectUtil.redirectLinesLog(exec.getErrorStream(), LOG);
-
-    if (exec.waitFor() != 0) {
-      throw new RuntimeException("Error staging template, check Maven logs.");
-    }
-    gcsResourceManager.uploadArtifact(
-        "input/customShard.jar",
-        "../spanner-custom-shard/target/spanner-custom-shard-1.0-SNAPSHOT.jar");
-  }
-
-  private void createGcsResourceManager() {
-    gcsResourceManager =
-        GcsResourceManager.builder(artifactBucketName, getClass().getSimpleName(), credentials)
-            .build(); // DB name is appended with prefix to avoid clashes
-  }
-
-  private String getGcsFullPath(String artifactId) {
-    return ArtifactUtils.getFullGcsPath(
-        artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId);
-  }
 }
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
new file mode 100644
index 0000000000..be9d8af948
--- /dev/null
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.teleport.v2.templates;
+
+import com.google.common.io.Resources;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import org.apache.beam.it.common.utils.IORedirectUtil;
+import org.apache.beam.it.gcp.TemplateTestBase;
+import org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils;
+import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
+import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base class for SpannerChangeStreamToGcs integration tests. It provides helper functions related
+ * to environment setup.
+ */
+public class SpannerChangeStreamToGcsITBase extends TemplateTestBase {
+
+  public static String spannerDatabaseName = "";
+  public static String spannerMetadataDatabaseName = "";
+  private static final Logger LOG = LoggerFactory.getLogger(SpannerChangeStreamToGcsITBase.class);
+
+  public GcsResourceManager createGcsResourceManager(String identifierSuffix) {
+    return GcsResourceManager.builder(artifactBucketName, identifierSuffix, credentials)
+        .build(); // DB name is appended with prefix to avoid clashes
+  }
+
+  public SpannerResourceManager createSpannerResourceManager() {
+    return SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION)
+        .maybeUseStaticInstance()
+        .build();
+  }
+
+  public void createSpannerDatabase(
+      SpannerResourceManager spannerResourceManager, String spannerDdl) throws IOException {
+    String ddl =
+        String.join(
+            " ", Resources.readLines(Resources.getResource(spannerDdl), StandardCharsets.UTF_8));
+    ddl = ddl.trim();
+    String[] ddls = ddl.split(";");
+    for (String d : ddls) {
+      if (!d.isBlank()) {
+        spannerResourceManager.executeDdlStatement(d);
+      }
+    }
+    spannerDatabaseName = spannerResourceManager.getDatabaseId();
+  }
+
+  public void uploadSessionFileToGcs(
+      GcsResourceManager gcsResourceManager, String sessionFileResourceName) throws IOException {
+    gcsResourceManager.uploadArtifact(
+        "input/session.json", Resources.getResource(sessionFileResourceName).getPath());
+  }
+
+  public SpannerResourceManager createSpannerMetadataResourceManager() {
+    return SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION)
+        .maybeUseStaticInstance()
+        .build();
+  }
+
+  public void createSpannerMetadataDatabase(SpannerResourceManager spannerMetadataResourceManager) {
+    String dummy = "create table t1(id INT64 ) primary key(id)";
+    spannerMetadataResourceManager.executeDdlStatement(dummy);
+    // needed to create separate metadata database
+    spannerMetadataDatabaseName = spannerMetadataResourceManager.getDatabaseId();
+  }
+
+  public void createAndUploadJarToGcs(GcsResourceManager gcsResourceManager)
+      throws IOException, InterruptedException {
+    String[] commands = {"cd ../spanner-custom-shard", "mvn install"};
+
+    // Join the commands with && to execute them sequentially
+    String[] shellCommand = {"/bin/bash", "-c", String.join(" && ", commands)};
+
+    Process exec = Runtime.getRuntime().exec(shellCommand);
+
+    IORedirectUtil.redirectLinesLog(exec.getInputStream(), LOG);
+    IORedirectUtil.redirectLinesLog(exec.getErrorStream(), LOG);
+
+    if (exec.waitFor() != 0) {
+      throw new RuntimeException("Error staging template, check Maven logs.");
+    }
+    gcsResourceManager.uploadArtifact(
+        "input/customShard.jar",
+        "../spanner-custom-shard/target/spanner-custom-shard-1.0-SNAPSHOT.jar");
+  }
+
+  public String getGcsFullPath(
+      GcsResourceManager gcsResourceManager, String artifactId, String identifierSuffix) {
+    return ArtifactUtils.getFullGcsPath(
+        artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId);
+  }
+}
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
index f4fbf989d0..afc1b6e805 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
@@ -24,12 +24,10 @@
 import com.google.cloud.teleport.metadata.SkipDirectRunnerTest;
 import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
 import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard;
-import com.google.common.io.Resources;
 import com.google.gson.Gson;
 import com.google.gson.JsonArray;
 import com.google.gson.JsonObject;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Base64;
@@ -45,9 +43,7 @@
 import org.apache.beam.it.common.utils.PipelineUtils;
 import org.apache.beam.it.common.utils.ResourceManagerUtils;
 import org.apache.beam.it.conditions.ChainedConditionCheck;
-import org.apache.beam.it.gcp.TemplateTestBase;
 import org.apache.beam.it.gcp.artifacts.Artifact;
-import org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils;
 import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
 import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck;
 import org.apache.beam.it.gcp.storage.GcsResourceManager;
@@ -64,7 +60,7 @@
 @Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class})
 @TemplateIntegrationTest(SpannerChangeStreamsToShardedFileSink.class)
 @RunWith(JUnit4.class)
-public class SpannerChangeStreamToGcsSimpleIT extends TemplateTestBase {
+public class SpannerChangeStreamToGcsSimpleIT extends SpannerChangeStreamToGcsITBase {
   private static final Logger LOG = LoggerFactory.getLogger(SpannerChangeStreamToGcsSimpleIT.class);
   private static SpannerResourceManager spannerResourceManager;
   private static SpannerResourceManager spannerMetadataResourceManager;
@@ -94,11 +90,13 @@ public void setUp() throws IOException {
     synchronized (SpannerChangeStreamToGcsSimpleIT.class) {
       testInstances.add(this);
       if (jobInfo == null) {
-        createGcsResourceManager();
-        createSpannerDatabase();
+        gcsResourceManager = createGcsResourceManager(getClass().getSimpleName());
+        spannerResourceManager = createSpannerResourceManager();
+        createSpannerDatabase(spannerResourceManager, spannerDdl);
+        uploadSessionFileToGcs(gcsResourceManager, sessionFileResourceName);
+        spannerMetadataResourceManager = createSpannerMetadataResourceManager();
+        createSpannerMetadataDatabase(spannerMetadataResourceManager);
         createAndUploadShardConfigToGcs();
-        uploadSessionFileToGcs();
-        createSpannerMetadataDatabase();
         launchReaderDataflowJob();
       }
     }
@@ -141,30 +139,6 @@ public void testSingleRecordWrittenToGcs() throws IOException, java.lang.Interru
     assertFileContentsInGCS();
   }
 
-  private void createGcsResourceManager() {
-    gcsResourceManager =
-        GcsResourceManager.builder(artifactBucketName, getClass().getSimpleName(), credentials)
-            .build(); // DB name is appended with prefix to avoid clashes
-  }
-
-  private void createSpannerDatabase() throws IOException {
-    spannerResourceManager =
-        SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION)
-            .maybeUseStaticInstance()
-            .build(); // DB name is appended with prefix to avoid clashes
-    String ddl =
-        String.join(
-            " ", Resources.readLines(Resources.getResource(spannerDdl), StandardCharsets.UTF_8));
-    ddl = ddl.trim();
-    String[] ddls = ddl.split(";");
-    for (String d : ddls) {
-      if (!d.isBlank()) {
-        spannerResourceManager.executeDdlStatement(d);
-      }
-    }
-    spannerDatabaseName = spannerResourceManager.getDatabaseId();
-  }
-
   private void createAndUploadShardConfigToGcs() throws IOException {
     List<String> shardNames = new ArrayList<>();
     shardNames.add("testShardA");
@@ -191,37 +165,28 @@ private void createAndUploadShardConfigToGcs() throws IOException {
     gcsResourceManager.createArtifact("input/shard.json", shardFileContents);
   }
 
-  private void uploadSessionFileToGcs() throws IOException {
-    gcsResourceManager.uploadArtifact(
-        "input/session.json", Resources.getResource(sessionFileResourceName).getPath());
-  }
-
-  private void createSpannerMetadataDatabase() throws IOException {
-    spannerMetadataResourceManager =
-        SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION)
-            .maybeUseStaticInstance()
-            .build(); // DB name is appended with prefix to avoid clashes
-    String dummy = "create table t1(id INT64 ) primary key(id)";
-    spannerMetadataResourceManager.executeDdlStatement(dummy);
-    // needed to create separate metadata database
-    spannerMetadataDatabaseName = spannerMetadataResourceManager.getDatabaseId();
-  }
-
   private void launchReaderDataflowJob() throws IOException {
     // default parameters
     Map<String, String> params =
         new HashMap<>() {
           {
-            put("sessionFilePath", getGcsFullPath("input/session.json"));
+            put(
+                "sessionFilePath",
+                getGcsFullPath(
+                    gcsResourceManager, "input/session.json", getClass().getSimpleName()));
             put("instanceId", spannerResourceManager.getInstanceId());
             put("databaseId", spannerResourceManager.getDatabaseId());
             put("spannerProjectId", PROJECT);
             put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
             put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
-            put("sourceShardsFilePath", getGcsFullPath("input/shard.json"));
+            put(
+                "sourceShardsFilePath",
+                getGcsFullPath(gcsResourceManager, "input/shard.json", getClass().getSimpleName()));
             put("changeStreamName", "allstream");
             put("runIdentifier", "run1");
-            put("gcsOutputDirectory", getGcsFullPath("output"));
+            put(
+                "gcsOutputDirectory",
+                getGcsFullPath(gcsResourceManager, "output", getClass().getSimpleName()));
           }
         };
 
@@ -445,9 +410,4 @@ private void assertFileContentsInGCSForAllDatatypes()
     assertThatArtifacts(artifacts).hasContent("varbinary_column\\\":null");
     assertThatArtifacts(artifacts).hasContent("varchar_column\\\":\\\"abc");
   }
-
-  private String getGcsFullPath(String artifactId) {
-    return ArtifactUtils.getFullGcsPath(
-        artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId);
-  }
 }

From 8ba27cf1888c22b898d43c30a1d2e3245bbf8ba6 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 15:54:11 +0530
Subject: [PATCH 42/70] addressed comments

---
 .../storage/conditions/GCSArtifactsCheck.java |   5 +-
 ...SpannerChangeStreamToGcsCustomShardIT.java |  44 ++++---
 .../SpannerChangeStreamToGcsSimpleIT.java     | 112 ++++++++++--------
 3 files changed, 93 insertions(+), 68 deletions(-)

diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
index 7fb49c2365..c371be1b7a 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
@@ -60,10 +60,11 @@ public CheckResult check() {
   }
 
   public static GCSArtifactsCheck.Builder builder(
-      GcsResourceManager resourceManager, String prefix) {
+      GcsResourceManager resourceManager, String prefix, Pattern regex) {
     return new AutoValue_GCSArtifactsCheck.Builder()
         .setGcsResourceManager(resourceManager)
-        .setPrefix(prefix);
+        .setPrefix(prefix)
+        .setRegex(regex);
   }
 
   /** Builder for {@link GCSArtifactsCheck}. */
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
index 1832f70a68..877f4620c6 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
@@ -44,6 +44,7 @@
 import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
 import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck;
 import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.apache.beam.it.gcp.storage.conditions.GCSArtifactsCheck;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.Test;
@@ -158,25 +159,30 @@ private void writeSpannerDataForSingers(int singerId, String firstName, String s
   }
 
   private void assertFileContentsInGCSForMultipleShards() throws java.lang.InterruptedException {
-    List<Artifact> artifactsShardA = null;
-    List<Artifact> artifactsShardB = null;
-    Thread.sleep(
-        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
-    // at-least. If not present wait additional 3 minutes before failing
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
-      artifactsShardA =
-          gcsResourceManager.listArtifacts("output/testShardA/", Pattern.compile(".*\\.txt$"));
-      artifactsShardB =
-          gcsResourceManager.listArtifacts("output/testShardB/", Pattern.compile(".*\\.txt$"));
-
-      // Ideally both the mutations written to spanner per shard will commit within 10 seconds.
-      // But that does not guarantee that they will be in the same file, since they can commit
-      // within 1 second interval boundary
-      if (artifactsShardB.size() >= 1 && artifactsShardA.size() >= 1) {
-        break;
-      }
-    }
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    GCSArtifactsCheck.builder(
+                            gcsResourceManager, "output/testShardA/", Pattern.compile(".*\\.txt$"))
+                        .setMinSize(1)
+                        .build(),
+                    GCSArtifactsCheck.builder(
+                            gcsResourceManager, "output/testShardB/", Pattern.compile(".*\\.txt$"))
+                        .setMinSize(1)
+                        .build()))
+            .build();
+
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(6)), conditionCheck);
+
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+
+    List<Artifact> artifactsShardA =
+        gcsResourceManager.listArtifacts("output/testShardA/", Pattern.compile(".*\\.txt$"));
+    List<Artifact> artifactsShardB =
+        gcsResourceManager.listArtifacts("output/testShardB/", Pattern.compile(".*\\.txt$"));
     assertThatArtifacts(artifactsShardB).hasFiles();
     assertThatArtifacts(artifactsShardA).hasFiles();
     // checks that any of the artifact has the given content
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
index afc1b6e805..d5c73f4e1b 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
@@ -47,6 +47,7 @@
 import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
 import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck;
 import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.apache.beam.it.gcp.storage.conditions.GCSArtifactsCheck;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.Test;
@@ -155,7 +156,6 @@ private void createAndUploadShardConfigToGcs() throws IOException {
       shard.setPassword("dummy");
       shard.setPort("3306");
       JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject();
-      jsObj.remove("secretManagerUri"); // remove field secretManagerUri
       ja.add(jsObj);
     }
 
@@ -216,18 +216,25 @@ private void writeSpannerDataForSingers(int singerId, String firstName, String s
   }
 
   private void assertFileContentsInGCS() throws IOException, java.lang.InterruptedException {
-    List<Artifact> artifacts = null;
-    Thread.sleep(
-        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
-    // at-least. If not present wait additional 3 minutes before failing
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
-      artifacts =
-          gcsResourceManager.listArtifacts("output/testShardA/", Pattern.compile(".*\\.txt$"));
-      if (artifacts.size() == 1) {
-        break;
-      }
-    }
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    GCSArtifactsCheck.builder(
+                            gcsResourceManager, "output/testShardA/", Pattern.compile(".*\\.txt$"))
+                        .setMinSize(1)
+                        .setMaxSize(1)
+                        .build()))
+            .build();
+
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(6)), conditionCheck);
+
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+
+    List<Artifact> artifacts =
+        gcsResourceManager.listArtifacts("output/testShardA/", Pattern.compile(".*\\.txt$"));
     assertThat(artifacts).hasSize(1);
     assertThatArtifacts(artifacts).hasContent("SingerId\\\":\\\"1");
   }
@@ -265,27 +272,31 @@ public void testMultiShardsRecordWrittenToGcs()
     assertFileContentsInGCSForMultipleShards();
   }
 
-  private void assertFileContentsInGCSForMultipleShards()
-      throws IOException, java.lang.InterruptedException {
-    List<Artifact> artifactsShardB = null;
-    List<Artifact> artifactsShardC = null;
-    Thread.sleep(
-        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
-    // at-least. If not present wait additional 3 minutes before failing
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
-      artifactsShardB =
-          gcsResourceManager.listArtifacts("output/testShardB/", Pattern.compile(".*\\.txt$"));
-      artifactsShardC =
-          gcsResourceManager.listArtifacts("output/testShardC/", Pattern.compile(".*\\.txt$"));
+  private void assertFileContentsInGCSForMultipleShards() {
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    GCSArtifactsCheck.builder(
+                            gcsResourceManager, "output/testShardB/", Pattern.compile(".*\\.txt$"))
+                        .setMinSize(1)
+                        .build(),
+                    GCSArtifactsCheck.builder(
+                            gcsResourceManager, "output/testShardC/", Pattern.compile(".*\\.txt$"))
+                        .setMinSize(1)
+                        .build()))
+            .build();
 
-      // Ideally both the mutations written to spanner per shard will commit within 10 seconds.
-      // But that does not guarantee that they will be in the same file, since they can commit
-      // within 1 second interval boundary
-      if (artifactsShardB.size() >= 1 && artifactsShardC.size() >= 1) {
-        break;
-      }
-    }
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(6)), conditionCheck);
+
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+
+    List<Artifact> artifactsShardB =
+        gcsResourceManager.listArtifacts("output/testShardB/", Pattern.compile(".*\\.txt$"));
+    List<Artifact> artifactsShardC =
+        gcsResourceManager.listArtifacts("output/testShardC/", Pattern.compile(".*\\.txt$"));
     assertThatArtifacts(artifactsShardB).hasFiles();
     assertThatArtifacts(artifactsShardC).hasFiles();
     // checks that any of the artifact has the given content
@@ -363,20 +374,27 @@ private void writeSpannerDataForAllDatatypes() {
     spannerResourceManager.write(m);
   }
 
-  private void assertFileContentsInGCSForAllDatatypes()
-      throws IOException, java.lang.InterruptedException {
-    List<Artifact> artifacts = null;
-    Thread.sleep(
-        180000); // wait sufficiently for the file to be generated. It takes about 3 minutes
-    // at-least. If not present wait additional 3 minutes before failing
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds
-      artifacts =
-          gcsResourceManager.listArtifacts("output/testShardD/", Pattern.compile(".*\\.txt$"));
-      if (artifacts.size() == 1) {
-        break;
-      }
-    }
+  private void assertFileContentsInGCSForAllDatatypes() {
+    ChainedConditionCheck conditionCheck =
+        ChainedConditionCheck.builder(
+                List.of(
+                    GCSArtifactsCheck.builder(
+                            gcsResourceManager, "output/testShardD/", Pattern.compile(".*\\.txt$"))
+                        .setMinSize(1)
+                        .setMaxSize(1)
+                        .build()))
+            .build();
+
+    PipelineOperator.Result result =
+        pipelineOperator()
+            .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(6)), conditionCheck);
+
+    // Assert Conditions
+    assertThatResult(result).meetsConditions();
+
+    List<Artifact> artifacts =
+        gcsResourceManager.listArtifacts("output/testShardD/", Pattern.compile(".*\\.txt$"));
+
     assertThat(artifacts).hasSize(1);
     assertThatArtifacts(artifacts).hasContent("id\\\":\\\"1");
     assertThatArtifacts(artifacts).hasContent("year_column\\\":\\\"2023");

From 08407293c557cf6cba3ae9fce00021541b045bf5 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 15:58:23 +0530
Subject: [PATCH 43/70] minor changes

---
 .../beam/it/gcp/storage/conditions/GCSArtifactsCheck.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
index c371be1b7a..30d86aea6e 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
@@ -25,7 +25,7 @@ public abstract class GCSArtifactsCheck extends ConditionCheck {
   public String getDescription() {
     if (maxSize() != null) {
       return String.format(
-          "GCS resource check if folder %s with regex %s has between %d and %d artifacts",
+          "GCS resource check if folder path %s with regex %s has between %d and %d artifacts",
           prefix(), regex(), minSize(), maxSize());
     }
     return String.format(
@@ -56,7 +56,7 @@ public CheckResult check() {
     }
 
     return new CheckResult(
-        true, String.format("Expected at least %d rows and found %d", minSize(), artifacts.size()));
+        true, String.format("Expected at least %d artifacts and found %d", minSize(), artifacts.size()));
   }
 
   public static GCSArtifactsCheck.Builder builder(

From e6ebe003b7e53ba1845fdd9660eb8c9823da3a69 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 15:59:54 +0530
Subject: [PATCH 44/70] spotless fix

---
 .../beam/it/gcp/storage/conditions/GCSArtifactsCheck.java      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
index 30d86aea6e..a9f52626ff 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
@@ -56,7 +56,8 @@ public CheckResult check() {
     }
 
     return new CheckResult(
-        true, String.format("Expected at least %d artifacts and found %d", minSize(), artifacts.size()));
+        true,
+        String.format("Expected at least %d artifacts and found %d", minSize(), artifacts.size()));
   }
 
   public static GCSArtifactsCheck.Builder builder(

From b7558bf3e4dede9a878245dc795ec7fed9358c90 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 16:23:51 +0530
Subject: [PATCH 45/70] launch job

---
 ...SpannerChangeStreamToGcsCustomShardIT.java | 56 +++--------------
 .../SpannerChangeStreamToGcsITBase.java       | 62 ++++++++++++++++++-
 .../SpannerChangeStreamToGcsSimpleIT.java     | 51 +++------------
 3 files changed, 77 insertions(+), 92 deletions(-)

diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
index 877f4620c6..e8a052378b 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
@@ -16,7 +16,6 @@
 package com.google.cloud.teleport.v2.templates;
 
 import static org.apache.beam.it.gcp.artifacts.matchers.ArtifactAsserts.assertThatArtifacts;
-import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
 import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
 
 import com.google.cloud.spanner.Mutation;
@@ -29,15 +28,11 @@
 import java.io.IOException;
 import java.time.Duration;
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.regex.Pattern;
 import org.apache.beam.it.common.PipelineLauncher;
 import org.apache.beam.it.common.PipelineOperator;
-import org.apache.beam.it.common.utils.PipelineUtils;
 import org.apache.beam.it.common.utils.ResourceManagerUtils;
 import org.apache.beam.it.conditions.ChainedConditionCheck;
 import org.apache.beam.it.gcp.artifacts.Artifact;
@@ -97,7 +92,14 @@ public void setUp() throws IOException, InterruptedException {
         createSpannerMetadataDatabase(spannerMetadataResourceManager);
         createAndUploadJarToGcs(gcsResourceManager);
         createAndUploadShardConfigToGcs();
-        launchReaderDataflowJob();
+        jobInfo =
+            launchReaderDataflowJob(
+                gcsResourceManager,
+                spannerResourceManager,
+                spannerMetadataResourceManager,
+                getClass().getSimpleName(),
+                "input/customShard.jar",
+                "com.custom.CustomShardIdFetcherForIT");
       }
     }
   }
@@ -214,46 +216,4 @@ private void createAndUploadShardConfigToGcs() throws IOException {
     // -DartifactBucket has the bucket name
     gcsResourceManager.createArtifact("input/shard.json", shardFileContents);
   }
-
-  private void launchReaderDataflowJob() throws IOException {
-    // default parameters
-    Map<String, String> params =
-        new HashMap<>() {
-          {
-            put(
-                "sessionFilePath",
-                getGcsFullPath(
-                    gcsResourceManager, "input/session.json", getClass().getSimpleName()));
-            put("instanceId", spannerResourceManager.getInstanceId());
-            put("databaseId", spannerResourceManager.getDatabaseId());
-            put("spannerProjectId", PROJECT);
-            put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
-            put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
-            put(
-                "sourceShardsFilePath",
-                getGcsFullPath(gcsResourceManager, "input/shard.json", getClass().getSimpleName()));
-            put("changeStreamName", "allstream");
-            put("runIdentifier", "run1");
-            put(
-                "gcsOutputDirectory",
-                getGcsFullPath(gcsResourceManager, "output", getClass().getSimpleName()));
-            put(
-                "shardingCustomJarPath",
-                getGcsFullPath(
-                    gcsResourceManager, "input/customShard.jar", getClass().getSimpleName()));
-            put("shardingCustomClassName", "com.custom.CustomShardIdFetcherForIT");
-          }
-        };
-
-    // Construct template
-    String jobName = PipelineUtils.createJobName("rr-it");
-    // /-DunifiedWorker=true when using runner v2
-    PipelineLauncher.LaunchConfig.Builder options =
-        PipelineLauncher.LaunchConfig.builder(jobName, specPath);
-    options.setParameters(params);
-    options.addEnvironment("additionalExperiments", Collections.singletonList("use_runner_v2"));
-    // Run
-    jobInfo = launchTemplate(options, false);
-    assertThatPipeline(jobInfo).isRunning();
-  }
 }
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
index be9d8af948..c48bcc23fb 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
@@ -15,10 +15,17 @@
  */
 package com.google.cloud.teleport.v2.templates;
 
+import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
+
 import com.google.common.io.Resources;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.beam.it.common.PipelineLauncher;
 import org.apache.beam.it.common.utils.IORedirectUtil;
+import org.apache.beam.it.common.utils.PipelineUtils;
 import org.apache.beam.it.gcp.TemplateTestBase;
 import org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils;
 import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
@@ -104,6 +111,59 @@ public void createAndUploadJarToGcs(GcsResourceManager gcsResourceManager)
   public String getGcsFullPath(
       GcsResourceManager gcsResourceManager, String artifactId, String identifierSuffix) {
     return ArtifactUtils.getFullGcsPath(
-        artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId);
+        artifactBucketName, identifierSuffix, gcsResourceManager.runId(), artifactId);
+  }
+
+  public PipelineLauncher.LaunchInfo launchReaderDataflowJob(
+      GcsResourceManager gcsResourceManager,
+      SpannerResourceManager spannerResourceManager,
+      SpannerResourceManager spannerMetadataResourceManager,
+      String identifierSuffix,
+      String shardingCustomJarPath,
+      String shardingCustomClassName)
+      throws IOException {
+    // default parameters
+    Map<String, String> params =
+        new HashMap<>() {
+          {
+            put(
+                "sessionFilePath",
+                getGcsFullPath(gcsResourceManager, "input/session.json", identifierSuffix));
+            put("instanceId", spannerResourceManager.getInstanceId());
+            put("databaseId", spannerResourceManager.getDatabaseId());
+            put("spannerProjectId", PROJECT);
+            put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
+            put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
+            put(
+                "sourceShardsFilePath",
+                getGcsFullPath(gcsResourceManager, "input/shard.json", identifierSuffix));
+            put("changeStreamName", "allstream");
+            put("runIdentifier", "run1");
+            put(
+                "gcsOutputDirectory",
+                getGcsFullPath(gcsResourceManager, "output", identifierSuffix));
+          }
+        };
+
+    if (shardingCustomJarPath != null) {
+      params.put(
+          "shardingCustomJarPath",
+          getGcsFullPath(gcsResourceManager, shardingCustomJarPath, identifierSuffix));
+    }
+    if (shardingCustomClassName != null) {
+      params.put("shardingCustomClassName", shardingCustomClassName);
+    }
+
+    // Construct template
+    String jobName = PipelineUtils.createJobName("rr-it");
+    // /-DunifiedWorker=true when using runner v2
+    PipelineLauncher.LaunchConfig.Builder options =
+        PipelineLauncher.LaunchConfig.builder(jobName, specPath);
+    options.setParameters(params);
+    options.addEnvironment("additionalExperiments", Collections.singletonList("use_runner_v2"));
+    // Run
+    PipelineLauncher.LaunchInfo jobInfo = launchTemplate(options, false);
+    assertThatPipeline(jobInfo).isRunning();
+    return jobInfo;
   }
 }
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
index d5c73f4e1b..ab90103e4f 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
@@ -17,7 +17,6 @@
 
 import static com.google.common.truth.Truth.assertThat;
 import static org.apache.beam.it.gcp.artifacts.matchers.ArtifactAsserts.assertThatArtifacts;
-import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
 import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
 
 import com.google.cloud.spanner.Mutation;
@@ -31,16 +30,11 @@
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Base64;
-import java.util.Collections;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.regex.Pattern;
 import org.apache.beam.it.common.PipelineLauncher;
-import org.apache.beam.it.common.PipelineLauncher.LaunchConfig;
 import org.apache.beam.it.common.PipelineOperator;
-import org.apache.beam.it.common.utils.PipelineUtils;
 import org.apache.beam.it.common.utils.ResourceManagerUtils;
 import org.apache.beam.it.conditions.ChainedConditionCheck;
 import org.apache.beam.it.gcp.artifacts.Artifact;
@@ -98,7 +92,14 @@ public void setUp() throws IOException {
         spannerMetadataResourceManager = createSpannerMetadataResourceManager();
         createSpannerMetadataDatabase(spannerMetadataResourceManager);
         createAndUploadShardConfigToGcs();
-        launchReaderDataflowJob();
+        jobInfo =
+            launchReaderDataflowJob(
+                gcsResourceManager,
+                spannerResourceManager,
+                spannerMetadataResourceManager,
+                getClass().getSimpleName(),
+                null,
+                null);
       }
     }
   }
@@ -165,42 +166,6 @@ private void createAndUploadShardConfigToGcs() throws IOException {
     gcsResourceManager.createArtifact("input/shard.json", shardFileContents);
   }
 
-  private void launchReaderDataflowJob() throws IOException {
-    // default parameters
-    Map<String, String> params =
-        new HashMap<>() {
-          {
-            put(
-                "sessionFilePath",
-                getGcsFullPath(
-                    gcsResourceManager, "input/session.json", getClass().getSimpleName()));
-            put("instanceId", spannerResourceManager.getInstanceId());
-            put("databaseId", spannerResourceManager.getDatabaseId());
-            put("spannerProjectId", PROJECT);
-            put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
-            put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
-            put(
-                "sourceShardsFilePath",
-                getGcsFullPath(gcsResourceManager, "input/shard.json", getClass().getSimpleName()));
-            put("changeStreamName", "allstream");
-            put("runIdentifier", "run1");
-            put(
-                "gcsOutputDirectory",
-                getGcsFullPath(gcsResourceManager, "output", getClass().getSimpleName()));
-          }
-        };
-
-    // Construct template
-    String jobName = PipelineUtils.createJobName("rr-it");
-    // /-DunifiedWorker=true when using runner v2
-    LaunchConfig.Builder options = LaunchConfig.builder(jobName, specPath);
-    options.setParameters(params);
-    options.addEnvironment("additionalExperiments", Collections.singletonList("use_runner_v2"));
-    // Run
-    jobInfo = launchTemplate(options, false);
-    assertThatPipeline(jobInfo).isRunning();
-  }
-
   private void writeSpannerDataForSingers(int singerId, String firstName, String shardId) {
     // Write a single record to Spanner for the given logical shard
     Mutation m =

From 064f8e556d5cd32f2849962d8671441ab7a24052 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Wed, 15 May 2024 16:38:06 +0530
Subject: [PATCH 46/70] added some more encapsulation

---
 .../SpannerChangeStreamToGcsCustomShardIT.java       | 11 ++++++++---
 .../v2/templates/SpannerChangeStreamToGcsITBase.java | 12 ++++++++++++
 .../templates/SpannerChangeStreamToGcsSimpleIT.java  | 11 ++++++++---
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
index e8a052378b..8403f76ce4 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsCustomShardIT.java
@@ -86,10 +86,13 @@ public void setUp() throws IOException, InterruptedException {
       if (jobInfo == null) {
         gcsResourceManager = createGcsResourceManager(getClass().getSimpleName());
         spannerResourceManager = createSpannerResourceManager();
-        createSpannerDatabase(spannerResourceManager, spannerDdl);
-        uploadSessionFileToGcs(gcsResourceManager, sessionFileResourceName);
         spannerMetadataResourceManager = createSpannerMetadataResourceManager();
-        createSpannerMetadataDatabase(spannerMetadataResourceManager);
+        prepareLaunchParameters(
+            gcsResourceManager,
+            spannerResourceManager,
+            spannerMetadataResourceManager,
+            spannerDdl,
+            sessionFileResourceName);
         createAndUploadJarToGcs(gcsResourceManager);
         createAndUploadShardConfigToGcs();
         jobInfo =
@@ -167,10 +170,12 @@ private void assertFileContentsInGCSForMultipleShards() throws java.lang.Interru
                     GCSArtifactsCheck.builder(
                             gcsResourceManager, "output/testShardA/", Pattern.compile(".*\\.txt$"))
                         .setMinSize(1)
+                        .setMaxSize(2)
                         .build(),
                     GCSArtifactsCheck.builder(
                             gcsResourceManager, "output/testShardB/", Pattern.compile(".*\\.txt$"))
                         .setMinSize(1)
+                        .setMaxSize(2)
                         .build()))
             .build();
 
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
index c48bcc23fb..768f285d71 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsITBase.java
@@ -114,6 +114,18 @@ public String getGcsFullPath(
         artifactBucketName, identifierSuffix, gcsResourceManager.runId(), artifactId);
   }
 
+  public void prepareLaunchParameters(
+      GcsResourceManager gcsResourceManager,
+      SpannerResourceManager spannerResourceManager,
+      SpannerResourceManager spannerMetadataResourceManager,
+      String spannerDdl,
+      String sessionFileResourceName)
+      throws IOException {
+    createSpannerDatabase(spannerResourceManager, spannerDdl);
+    uploadSessionFileToGcs(gcsResourceManager, sessionFileResourceName);
+    createSpannerMetadataDatabase(spannerMetadataResourceManager);
+  }
+
   public PipelineLauncher.LaunchInfo launchReaderDataflowJob(
       GcsResourceManager gcsResourceManager,
       SpannerResourceManager spannerResourceManager,
diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
index ab90103e4f..d33c165bfb 100644
--- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
+++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java
@@ -87,10 +87,13 @@ public void setUp() throws IOException {
       if (jobInfo == null) {
         gcsResourceManager = createGcsResourceManager(getClass().getSimpleName());
         spannerResourceManager = createSpannerResourceManager();
-        createSpannerDatabase(spannerResourceManager, spannerDdl);
-        uploadSessionFileToGcs(gcsResourceManager, sessionFileResourceName);
         spannerMetadataResourceManager = createSpannerMetadataResourceManager();
-        createSpannerMetadataDatabase(spannerMetadataResourceManager);
+        prepareLaunchParameters(
+            gcsResourceManager,
+            spannerResourceManager,
+            spannerMetadataResourceManager,
+            spannerDdl,
+            sessionFileResourceName);
         createAndUploadShardConfigToGcs();
         jobInfo =
             launchReaderDataflowJob(
@@ -244,10 +247,12 @@ private void assertFileContentsInGCSForMultipleShards() {
                     GCSArtifactsCheck.builder(
                             gcsResourceManager, "output/testShardB/", Pattern.compile(".*\\.txt$"))
                         .setMinSize(1)
+                        .setMaxSize(2)
                         .build(),
                     GCSArtifactsCheck.builder(
                             gcsResourceManager, "output/testShardC/", Pattern.compile(".*\\.txt$"))
                         .setMinSize(1)
+                        .setMaxSize(2)
                         .build()))
             .build();
 

From 6958138e8072f53f82eb1cdbda21670f63bcf88c Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Wed, 15 May 2024 09:55:18 -0400
Subject: [PATCH 47/70] Clarify approval

---
 contributor-docs/code-contributions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contributor-docs/code-contributions.md b/contributor-docs/code-contributions.md
index b50aea0849..e5a409ea35 100644
--- a/contributor-docs/code-contributions.md
+++ b/contributor-docs/code-contributions.md
@@ -257,8 +257,8 @@ First, install the [maven plugin](#templates-plugin), then create a subdirectory
 
 All new templates must comply with the following guidance:
 
-- [ ] Template addition has been approved by the core Dataflow Templates team.
-- [ ] The template must be a Flex Template located in the `v2/<template name>` directory
+- [ ] Template addition has been approved by the Dataflow team in a [Google Issue Tracker Issue](https://issuetracker.google.com/issues/new?component=187168&template=0).
+- [ ] The template must be a Flex Template located in the `v2/<template name>` directory.
 - [ ] The template does not contain a duplicated/fork of a Beam IO.
 - [ ] The template contains integration tests.
 - [ ] The template contains load tests.

From 7907ccff459a8c742fe993ff41031be6ce2e6bc7 Mon Sep 17 00:00:00 2001
From: Dippatel98 <dippatel@google.com>
Date: Tue, 14 May 2024 21:08:49 +0000
Subject: [PATCH 48/70] Refactor KafkaToBigQueryFlexOptions

---
 .../options/KafkaToBigQueryFlexOptions.java   | 34 +++++++++++++------
 .../v2/transforms/AvroDynamicTransform.java   |  2 +-
 .../BigQueryDynamicDestination.java           |  2 +-
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java
index 18644118b0..4aa70cfeca 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java
@@ -82,13 +82,14 @@ public interface KafkaToBigQueryFlexOptions
 
   @TemplateParameter.Enum(
       order = 6,
+      name = "messageFormat",
       enumOptions = {
         @TemplateParameter.TemplateEnumOption("AVRO"),
         @TemplateParameter.TemplateEnumOption("JSON")
       },
       optional = true,
-      description = "The message format",
-      helpText = "The message format. Can be AVRO or JSON.")
+      description = "The Kafka message format",
+      helpText = "The Kafka message format. Can be AVRO or JSON.")
   @Default.String("AVRO")
   String getMessageFormat();
 
@@ -97,14 +98,17 @@ public interface KafkaToBigQueryFlexOptions
   // TODO: Sync the enum options with all the Kafka Templates.
   @TemplateParameter.Enum(
       order = 7,
+      name = "avroFormat",
+      parentName = "messageFormat",
+      parentTriggerValues = {"AVRO"},
       enumOptions = {
         @TemplateParameter.TemplateEnumOption("CONFLUENT_WIRE_FORMAT"),
         @TemplateParameter.TemplateEnumOption("NON_WIRE_FORMAT")
       },
       optional = true,
-      description = "Use the confluent wire format for avro messages.",
+      description = "The format to use for avro messages.",
       helpText =
-          "This parameter is used to indicate if the avro messages use confluent wire format. Default is true (Confluent Wire Format)")
+          "This parameter is used to indicate what format to use for the avro messages. Default is CONFLUENT_WIRE_FORMAT.")
   @Default.String("CONFLUENT_WIRE_FORMAT")
   String getAvroFormat();
 
@@ -112,6 +116,8 @@ public interface KafkaToBigQueryFlexOptions
 
   @TemplateParameter.GcsReadFile(
       order = 8,
+      parentName = "messageFormat",
+      parentTriggerValues = {"AVRO"},
       optional = true,
       description = "Cloud Storage path to the Avro schema file",
       helpText = "Cloud Storage path to Avro schema file. For example, gs://MyBucket/file.avsc.")
@@ -121,6 +127,8 @@ public interface KafkaToBigQueryFlexOptions
 
   @TemplateParameter.Text(
       order = 9,
+      parentName = "avroFormat",
+      parentTriggerValues = {"CONFLUENT_WIRE_FORMAT"},
       optional = true,
       description = "Schema Registry Connection URL.",
       helpText =
@@ -131,25 +139,31 @@ public interface KafkaToBigQueryFlexOptions
 
   @TemplateParameter.Text(
       order = 11,
+      parentName = "avroFormat",
+      parentTriggerValues = {"CONFLUENT_WIRE_FORMAT"},
       optional = true,
       description = "BigQuery output dataset",
       helpText =
-          "BigQuery output dataset to write the output to."
-              + "Tables will be created dynamically in the dataset.")
+          "BigQuery output dataset to write the output to. Tables will be created dynamically in the dataset."
+              + " If the tables are created beforehand, the table names should follow the specified naming convention."
+              + " The name should be `bqTableNamePrefix + Avro Schema FullName` {@link org.apache.avro.Schema.getFullName},"
+              + " each word will be seperated by a hyphen '-'.")
   String getOutputDataset();
 
   void setOutputDataset(String value);
 
   @TemplateParameter.Text(
       order = 12,
+      parentName = "avroFormat",
+      parentTriggerValues = {"CONFLUENT_WIRE_FORMAT"},
       optional = true,
-      description = "Name prefix to be used while creating BigQuery output tables.",
+      description = "Naming prefix to be used while creating BigQuery output tables.",
       helpText =
-          "Name prefix to be used while creating BigQuery output tables. Only applicable when using schema registry.")
+          "Naming prefix to be used while creating BigQuery output tables. Only applicable when using schema registry.")
   @Default.String("")
-  String getBQTableNamePrefix();
+  String getBqTableNamePrefix();
 
-  void setBQTableNamePrefix(String value);
+  void setBqTableNamePrefix(String value);
 
   @TemplateParameter.Boolean(
       order = 13,
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java
index 074084ee94..ec9ec4e73e 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java
@@ -84,7 +84,7 @@ public WriteResult expand(PCollection<KafkaRecord<byte[], byte[]>> kafkaRecords)
                 BigQueryDynamicDestination.of(
                     options.getProject(),
                     options.getOutputDataset(),
-                    options.getBQTableNamePrefix()))
+                    options.getBqTableNamePrefix()))
             .withWriteDisposition(WriteDisposition.valueOf(options.getWriteDisposition()))
             .withCreateDisposition(CreateDisposition.valueOf(options.getCreateDisposition()))
             .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
index 8f43357d00..b02c862abb 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
@@ -65,7 +65,7 @@ public TableDestination getTable(GenericRecord element) {
 
   @Override
   public TableSchema getSchema(GenericRecord element) {
-    // TODO: Test if sending null can work here, might be mroe efficient.
+    // TODO: Test if sending null can work here, might be more efficient.
     return BigQueryUtils.toTableSchema(AvroUtils.toBeamSchema(element.getSchema()));
   }
 

From 098256262ab906867dd152086ec6936b3bf6e343 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Wed, 15 May 2024 13:49:49 -0400
Subject: [PATCH 49/70] Add more docs on adding ITs

---
 ...est.md => add-integration-or-load-test.md} | 72 +++++++++++++++++--
 contributor-docs/code-contributions.md        |  4 +-
 2 files changed, 69 insertions(+), 7 deletions(-)
 rename contributor-docs/{add-load-test.md => add-integration-or-load-test.md} (86%)

diff --git a/contributor-docs/add-load-test.md b/contributor-docs/add-integration-or-load-test.md
similarity index 86%
rename from contributor-docs/add-load-test.md
rename to contributor-docs/add-integration-or-load-test.md
index a1dd508806..c9bdc1c16e 100644
--- a/contributor-docs/add-load-test.md
+++ b/contributor-docs/add-integration-or-load-test.md
@@ -1,4 +1,4 @@
-# Adding a Load Test
+# Adding an Integration or Load Test
 
 ## Overview
 
@@ -305,6 +305,8 @@ synthetic records/messages at a user specified QPS. These messages are generated
 based on a user specified schema. Please look at [this](https://github.com/GoogleCloudPlatform/DataflowTemplates/tree/main/v2/streaming-data-generator#creating-the-schema-file)
 guide to create a schema file.
 
+Data generators are typically used in load tests, not integration tests.
+
 Pre-existing schema templates can also be used instead of specifying a schema. 
 Supported schema templates are,
 
@@ -373,6 +375,32 @@ of messages. If messageLimit is not specified, the data generator generates
 messages at specified QPS till timeout.
 
 
+## Write an Integration test
+
+Integration tests will be written using JUnit. The structure of the load test will
+vary on whether the pipeline under test is a `Batch` or `Streaming` pipeline and
+the type of test.
+
+### Structure
+First extend the test class from the [TemplateTestBase](https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/TemplateTestBase.java)
+class. TemplateTestBase contains helper methods which abstract irrelevant 
+information and make it easier to write tests. It also defines some 
+clients and variables which are useful for writing tests.
+
+```java
+import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
+import org.apache.beam.it.gcp.TemplateTestBase;
+import org.junit.runners.JUnit4;
+@Category({TemplateIntegrationTest.class})
+@TemplateIntegrationTest(WordCount.class)
+@RunWith(JUnit4.class)
+public class WordCountIT extends TemplateTestBase {
+
+}
+```
+
+From there, you can add test cases as described below in [Test Cases](#test-cases).
+
 ## Write a Load test
 
 Load tests will be written using JUnit. The structure of the load test will
@@ -389,8 +417,13 @@ NOTE: Any class extending `LoadTestBase` will need to implement a `launcher`
 method which creates the appropriate PipelineLauncher to be used for the test.
 
 ```java
-import com.google.cloud.teleport.it.LoadTestBase;
-public class WordCountLoadTest extends LoadTestBase {
+import com.google.cloud.teleport.metadata.TemplateLoadTest;
+import com.google.cloud.teleport.it.TemplateLoadTestBase;
+import org.junit.runners.JUnit4;
+@Category(TemplateLoadTest.class)
+@TemplateLoadTest(WordCount.class)
+@RunWith(JUnit4.class)
+public class WordCountLoadTest extends TemplateLoadTestBase {
 
   PipelineLauncher launcher() {
     return new DefaultPipelineLauncher.builder().setCredentials(CREDENTIALS).build();
@@ -403,7 +436,13 @@ public class WordCountLoadTest extends LoadTestBase {
 NOTE: For Google-provided template load tests, `TemplateLoadTestBase` can be used, whereas for 
 Apache Beam I/O load tests `IOLoadTestBase` can be used.
 
-### Test cases
+From there, you can add test cases as described below in [Test Cases](#test-cases).
+
+## Test cases
+
+There are generally 2 classes of load and integration tests: backlog tests and steady state
+tests (streaming-only). These largely function in the same manner, with minor differences
+which are called out in the code below.
 
 #### Backlog Tests
 
@@ -423,6 +462,9 @@ public void testBacklog() {
   TableId table = bigQueryResourceManager.createTable(testName, SCHEMA);
   
   // Generate fake data to Pub/Sub topic
+  // In a normal integration test (non-load test), you can use the resource manager
+  // directly. For example:
+  // pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData);
   DataGenerator dataGenerator =
        DataGenerator.builderWithSchemaTemplate(testName + "-data-generator", "GAME_EVENT")
            .setQPS("200000")
@@ -437,7 +479,9 @@ public void testBacklog() {
     .addParameter("inputSubscription", inputSubscription.toString())
     .addParameter("outputTableSpec", toTableSpec(project, table))
     .build();
-  // launch pipeline under test
+  // Launch pipeline under test. For load tests, use the pipelineLauncher
+  // For integration tests, you can use launchTemplate, for example:
+  // LaunchInfo info = launchTemplate(options)
   LaunchInfo info = pipelineLauncher.launch(PROJECT, REGION, options);
   assertThatPipeline(info).isRunning();
 
@@ -469,6 +513,9 @@ public void testSteadyState1hr() {
   TableId table = bigQueryResourceManager.createTable(
         testName, SCHEMA, System.currentTimeMillis() + 7200000); // expire in 2 hrs
   // Generate fake data to Pub/Sub topic at 100,000 QPS
+  // In a normal integration test (non-load test), you can use the resource manager
+  // directly. For example:
+  // pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData);
   DataGenerator dataGenerator = 
     DataGenerator.builderWithSchemaTemplate(testName + "-data-generator","GAME_EVENT")
       .setQPS("100000")
@@ -481,7 +528,9 @@ public void testSteadyState1hr() {
     .addParameter("inputSubscription", inputSubscription.toString())
     .addParameter("outputTableSpec", toTableSpec(project, table))
     .build();
-  // launch pipeline under test
+  // Launch pipeline under test. For load tests, use the pipelineLauncher
+  // For integration tests, you can use launchTemplate, for example:
+  // LaunchInfo info = launchTemplate(options)
   LaunchInfo info = pipelineLauncher.launch(project, region, options);
   assertThatPipeline(info).isRunning();
   
@@ -543,3 +592,14 @@ For manually running a load test execute the following commands on the CLI use t
 
    Additional parameters can be specified using  `-D<param-name>=<param-value>`
 
+To run an integration test, follow step 1 above, then run the specific test:
+
+```shell
+mvn clean verify \
+  -PtemplatesIntegrationTests \
+  -Dtest="<test-class>#<test-method>" -Dproject=$PROJECT \
+  -DartifactBucket=$ARTIFACT_BUCKET -DexportProject=$EXPORT_PROJECT \
+  -DexportDataset=$EXPORT_DATASET -DexportTable=$EXPORT_TABLE \
+  -DfailIfNoTests=false -DtrimStackTrace=false
+```
+
diff --git a/contributor-docs/code-contributions.md b/contributor-docs/code-contributions.md
index e5a409ea35..4ce6d2201e 100644
--- a/contributor-docs/code-contributions.md
+++ b/contributor-docs/code-contributions.md
@@ -226,6 +226,8 @@ Notes:
 
 ### Running Integration Tests
 
+For information on adding integration tests, see [Adding an Integration or Load Test](./add-integration-or-load-test.md)
+
 To run integration tests, the developer plugin can be also used to stage template on-demand (in case the parameter `-DspecPath=` is not specified).
 
 For example, to run all the integration tests in a specific module (in the example below, `v2/googlecloud-to-googlecloud`):
@@ -245,7 +247,7 @@ The same happens when the test is executed from an IDE, just make sure to add th
 
 ### Running Load Tests
 
-For information on adding and running load tests, see [Adding a Load Test](./add-load-test.md).
+For information on adding and running load tests, see [Adding a Load Test](./add-integration-or-load-test.md).
 
 ### Adding New Templates
 

From d749ce82ffa647cf01d91d358982e91a26cb4935 Mon Sep 17 00:00:00 2001
From: djagaluru <djagaluru@google.com>
Date: Wed, 15 May 2024 18:13:10 +0000
Subject: [PATCH 50/70] addressing comments

---
 .../apache/beam/it/gcp/TemplateTestBase.java  |  5 +++
 .../GCSToSourceDbWithoutReaderIT.java         | 40 +++----------------
 2 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/TemplateTestBase.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/TemplateTestBase.java
index 481b5a6bb6..e93ebd5894 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/TemplateTestBase.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/TemplateTestBase.java
@@ -571,6 +571,11 @@ protected String getGcsPath(String artifactId) {
         artifactBucketName, getClass().getSimpleName(), gcsClient.runId(), artifactId);
   }
 
+  protected String getGcsPath(String artifactId, GcsResourceManager gcsResourceManager) {
+    return ArtifactUtils.getFullGcsPath(
+        artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId);
+  }
+
   /** Create the default configuration {@link PipelineOperator.Config} for a specific job info. */
   protected PipelineOperator.Config createConfig(LaunchInfo info) {
     return createConfig(info, null);
diff --git a/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
index ec34eb820a..f9c820e791 100644
--- a/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
+++ b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
@@ -26,7 +26,6 @@
 import com.google.gson.JsonArray;
 import com.google.gson.JsonObject;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -57,15 +56,11 @@ public class GCSToSourceDbWithoutReaderIT extends TemplateTestBase {
 
   private static final Logger LOG = LoggerFactory.getLogger(GCSToSourceDbWithoutReaderIT.class);
 
-  private static final String SPANNER_DDL_RESOURCE =
-      "GCSToSourceDbWithoutReaderIT/spanner-schema.sql";
-
   private static final String SESSION_FILE_RESOURSE = "GCSToSourceDbWithoutReaderIT/session.json";
 
   private static final String TABLE = "Users";
   private static HashSet<GCSToSourceDbWithoutReaderIT> testInstances = new HashSet<>();
   private static PipelineLauncher.LaunchInfo jobInfo;
-  public static SpannerResourceManager spannerResourceManager;
   private static SpannerResourceManager spannerMetadataResourceManager;
   private static CustomMySQLResourceManager jdbcResourceManager;
   private static GcsResourceManager gcsResourceManager;
@@ -81,7 +76,6 @@ public void setUp() throws IOException {
     synchronized (GCSToSourceDbWithoutReaderIT.class) {
       testInstances.add(this);
       if (jobInfo == null) {
-        spannerResourceManager = createSpannerDatabase(SPANNER_DDL_RESOURCE);
         spannerMetadataResourceManager = createSpannerMetadataDatabase();
 
         jdbcResourceManager = CustomMySQLResourceManager.builder(testName).build();
@@ -110,10 +104,7 @@ public static void cleanUp() throws IOException {
       instance.tearDownBase();
     }
     ResourceManagerUtils.cleanResources(
-        spannerResourceManager,
-        jdbcResourceManager,
-        spannerMetadataResourceManager,
-        gcsResourceManager);
+        jdbcResourceManager, spannerMetadataResourceManager, gcsResourceManager);
   }
 
   @Test
@@ -121,7 +112,7 @@ public void testGCSToSource() throws IOException, InterruptedException {
     assertThatPipeline(jobInfo).isRunning();
     // Write events to GCS
     gcsClient.uploadArtifact(
-        "cdc/Shard1/2024-05-13T08:43:10.000Z-2024-05-13T08:43:20.000Z-pane-0-last-0-of-1.txt",
+        "output/Shard1/2024-05-13T08:43:10.000Z-2024-05-13T08:43:20.000Z-pane-0-last-0-of-1.txt",
         Resources.getResource("GCSToSourceDbWithoutReaderIT/events.txt").getPath());
 
     // Assert events on Mysql
@@ -142,27 +133,6 @@ private void assertRowInMySQL() throws InterruptedException {
     assertThat(rows.get(0).get("name")).isEqualTo("FF");
   }
 
-  private SpannerResourceManager createSpannerDatabase(String spannerDdlResourceFile)
-      throws IOException {
-    SpannerResourceManager spannerResourceManager =
-        SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION)
-            .maybeUseStaticInstance()
-            .build(); // DB name is appended with prefix to avoid clashes
-    String ddl =
-        String.join(
-            " ",
-            Resources.readLines(
-                Resources.getResource(spannerDdlResourceFile), StandardCharsets.UTF_8));
-    ddl = ddl.trim();
-    String[] ddls = ddl.split(";");
-    for (String d : ddls) {
-      if (!d.isBlank()) {
-        spannerResourceManager.executeDdlStatement(d);
-      }
-    }
-    return spannerResourceManager;
-  }
-
   private SpannerResourceManager createSpannerMetadataDatabase() throws IOException {
     SpannerResourceManager spannerMetadataResourceManager =
         SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION)
@@ -186,13 +156,13 @@ private void launchWriterDataflowJob() throws IOException {
     Map<String, String> params =
         new HashMap<>() {
           {
-            put("sessionFilePath", getGcsPath("input/session.json"));
+            put("sessionFilePath", getGcsPath("input/session.json", gcsResourceManager));
             put("spannerProjectId", PROJECT);
             put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId());
             put("metadataInstance", spannerMetadataResourceManager.getInstanceId());
-            put("sourceShardsFilePath", getGcsPath("input/shard.json"));
+            put("sourceShardsFilePath", getGcsPath("input/shard.json", gcsResourceManager));
             put("runIdentifier", "run1");
-            put("GCSInputDirectoryPath", getGcsPath("cdc"));
+            put("GCSInputDirectoryPath", getGcsPath("output", gcsResourceManager));
             put("startTimestamp", "2024-05-13T08:43:10.000Z");
             put("windowDuration", "10s");
           }

From 8e3fa76ff916e7b17271fa7037ecbf0680fa481f Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Wed, 15 May 2024 14:35:26 -0400
Subject: [PATCH 51/70] Add a stale workflow to handle old issues/prs

---
 .github/workflows/stale.yml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 .github/workflows/stale.yml

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 0000000000..fa9a65cd2d
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,26 @@
+# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
+#
+# You can adjust the behavior by modifying this file.
+# For more information, see:
+# https://github.com/actions/stale
+name: Mark stale issues and pull requests
+
+on:
+  schedule:
+  - cron: '23 2 * * *'
+
+jobs:
+  stale:
+
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+
+    steps:
+    - uses: actions/stale@v5
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-issue-message: 'Stale issue message'
+        stale-pr-message: 'Stale pull request message'
+        days-before-stale: 180

From 5fe8dade9e57ff16c61d5111bd8cc0fca8855e74 Mon Sep 17 00:00:00 2001
From: Ahmed Abualsaud <ahmedabualsaud@google.com>
Date: Wed, 15 May 2024 15:03:25 -0400
Subject: [PATCH 52/70] add mapping to query help text

---
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 250dac46ab..965afb09f7 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -107,7 +107,8 @@ public interface JdbcToBigQueryOptions
       regexes = {"^.+$"},
       groupName = "Source",
       description = "JDBC source SQL query",
-      helpText = "The query to run on the source to extract the data.",
+      helpText = "The query to run on the source to extract the data. Important SQL to BigQuery type mappings to keep in mind:\n" +
+              "DATETIME --> TIMESTAMP",
       example = "select * from sampledb.sample_table")
   String getQuery();
 

From 586318199e31fe10f91df86cdc5a99ab2c67f916 Mon Sep 17 00:00:00 2001
From: Ahmed Abualsaud <ahmedabualsaud@google.com>
Date: Wed, 15 May 2024 15:51:38 -0400
Subject: [PATCH 53/70] add some context

---
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index 965afb09f7..a8844b1fd5 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -107,8 +107,10 @@ public interface JdbcToBigQueryOptions
       regexes = {"^.+$"},
       groupName = "Source",
       description = "JDBC source SQL query",
-      helpText = "The query to run on the source to extract the data. Important SQL to BigQuery type mappings to keep in mind:\n" +
-              "DATETIME --> TIMESTAMP",
+      helpText = "The query to run on the source to extract the data. Note that some JDBC SQL and BigQuery types, although sharing the same name, have some differences. " +
+              "Some important SQL -> BigQuery type mappings to keep in mind are:\n" +
+              "DATETIME --> TIMESTAMP\n" +
+              "\nSome type casting may be required if your schemas do not match.",
       example = "select * from sampledb.sample_table")
   String getQuery();
 

From b49adde1733fd41e255b00bc57a00edd9f0ef9fe Mon Sep 17 00:00:00 2001
From: Ahmed Abualsaud <ahmedabualsaud@google.com>
Date: Wed, 15 May 2024 16:35:04 -0400
Subject: [PATCH 54/70] spotless

---
 .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
index a8844b1fd5..5335712767 100644
--- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
+++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java
@@ -107,10 +107,11 @@ public interface JdbcToBigQueryOptions
       regexes = {"^.+$"},
       groupName = "Source",
       description = "JDBC source SQL query",
-      helpText = "The query to run on the source to extract the data. Note that some JDBC SQL and BigQuery types, although sharing the same name, have some differences. " +
-              "Some important SQL -> BigQuery type mappings to keep in mind are:\n" +
-              "DATETIME --> TIMESTAMP\n" +
-              "\nSome type casting may be required if your schemas do not match.",
+      helpText =
+          "The query to run on the source to extract the data. Note that some JDBC SQL and BigQuery types, although sharing the same name, have some differences. "
+              + "Some important SQL -> BigQuery type mappings to keep in mind are:\n"
+              + "DATETIME --> TIMESTAMP\n"
+              + "\nType casting may be required if your schemas do not match.",
       example = "select * from sampledb.sample_table")
   String getQuery();
 

From 6e630a6123034172fd69876acae1528e56ca3e2e Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Wed, 15 May 2024 16:45:49 -0400
Subject: [PATCH 55/70] Apply suggestion

---
 contributor-docs/add-integration-or-load-test.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contributor-docs/add-integration-or-load-test.md b/contributor-docs/add-integration-or-load-test.md
index c9bdc1c16e..ef56147eb5 100644
--- a/contributor-docs/add-integration-or-load-test.md
+++ b/contributor-docs/add-integration-or-load-test.md
@@ -462,7 +462,7 @@ public void testBacklog() {
   TableId table = bigQueryResourceManager.createTable(testName, SCHEMA);
   
   // Generate fake data to Pub/Sub topic
-  // In a normal integration test (non-load test), you can use the resource manager
+  // In a normal integration test (small amount of data), you can use the resource manager
   // directly. For example:
   // pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData);
   DataGenerator dataGenerator =
@@ -513,7 +513,7 @@ public void testSteadyState1hr() {
   TableId table = bigQueryResourceManager.createTable(
         testName, SCHEMA, System.currentTimeMillis() + 7200000); // expire in 2 hrs
   // Generate fake data to Pub/Sub topic at 100,000 QPS
-  // In a normal integration test (non-load test), you can use the resource manager
+  // In a normal integration test (small amount of data), you can use the resource manager
   // directly. For example:
   // pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData);
   DataGenerator dataGenerator = 

From cc1ced36dadeca2d7f957c6eab3cfc77a922889e Mon Sep 17 00:00:00 2001
From: djagaluru <djagaluru@google.com>
Date: Thu, 16 May 2024 05:39:15 +0000
Subject: [PATCH 56/70] Uploading events file early

---
 .../teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
index f9c820e791..5c2a29da91 100644
--- a/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
+++ b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
@@ -109,11 +109,11 @@ public static void cleanUp() throws IOException {
 
   @Test
   public void testGCSToSource() throws IOException, InterruptedException {
-    assertThatPipeline(jobInfo).isRunning();
     // Write events to GCS
     gcsClient.uploadArtifact(
         "output/Shard1/2024-05-13T08:43:10.000Z-2024-05-13T08:43:20.000Z-pane-0-last-0-of-1.txt",
         Resources.getResource("GCSToSourceDbWithoutReaderIT/events.txt").getPath());
+    assertThatPipeline(jobInfo).isRunning();
 
     // Assert events on Mysql
     assertRowInMySQL();

From de1b60d3fe4b71b9192b7e67ab0298348b2b619d Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Thu, 16 May 2024 12:23:14 +0530
Subject: [PATCH 57/70] added unit tests

---
 .../conditions/GCSArtifactsCheckTest.java     | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java

diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java
new file mode 100644
index 0000000000..c6680576a9
--- /dev/null
+++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java
@@ -0,0 +1,70 @@
+package org.apache.beam.it.gcp.storage.conditions;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.storage.Blob;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+import org.apache.beam.it.gcp.artifacts.GcsArtifact;
+import org.apache.beam.it.gcp.storage.GcsResourceManager;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link GCSArtifactsCheck}. */
+@RunWith(JUnit4.class)
+public class GCSArtifactsCheckTest {
+  private GcsResourceManager gcsResourceManager = mock(GcsResourceManager.class);
+
+  private GCSArtifactsCheck gcsArtifactsCheck;
+
+  Pattern regex = mock(Pattern.class);
+
+  @Before
+  public void setUp() {
+    // Create the GCSArtifactsCheck instance
+    gcsArtifactsCheck =
+        GCSArtifactsCheck.builder(gcsResourceManager, "test-folder", regex)
+            .setMinSize(1)
+            .setMaxSize(5) // You can set maxSize as needed for testing
+            .build();
+  }
+
+  @Test
+  public void testCheck_Success() {
+    Blob blob = mock(Blob.class);
+    when(gcsResourceManager.listArtifacts("test-folder", regex))
+        .thenReturn(Arrays.asList(new GcsArtifact(blob)));
+
+    // Call the check method
+    GCSArtifactsCheck.CheckResult result = gcsArtifactsCheck.check();
+
+    // Verify the result
+    assertEquals(true, result.isSuccess());
+    assertEquals("Expected between 1 and 5 artifacts and found 1", result.getMessage());
+
+    // Verify that listArtifacts method was called with correct parameters
+    verify(gcsResourceManager, times(1)).listArtifacts("test-folder", regex);
+  }
+
+  @Test
+  public void testCheck_Failure() {
+    // Mock the listArtifacts method to return an empty list
+    when(gcsResourceManager.listArtifacts("test-folder", regex)).thenReturn(Arrays.asList());
+
+    // Call the check method
+    GCSArtifactsCheck.CheckResult result = gcsArtifactsCheck.check();
+
+    // Verify the result
+    assertEquals(false, result.isSuccess());
+    assertEquals("Expected 1 artifacts but has only 0", result.getMessage());
+
+    // Verify that listArtifacts method was called with correct parameters
+    verify(gcsResourceManager, times(1)).listArtifacts("test-folder", regex);
+  }
+}

From 4fa23e526e550ad8496b908f442a778fed3b8229 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Thu, 16 May 2024 14:48:05 +0530
Subject: [PATCH 58/70] added unit test

---
 .../custom/CustomShardIdFetcherForITTest.java | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 v2/spanner-custom-shard/src/test/java/com/custom/CustomShardIdFetcherForITTest.java

diff --git a/v2/spanner-custom-shard/src/test/java/com/custom/CustomShardIdFetcherForITTest.java b/v2/spanner-custom-shard/src/test/java/com/custom/CustomShardIdFetcherForITTest.java
new file mode 100644
index 0000000000..0a6a8d871b
--- /dev/null
+++ b/v2/spanner-custom-shard/src/test/java/com/custom/CustomShardIdFetcherForITTest.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.custom;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.teleport.v2.spanner.utils.ShardIdRequest;
+import com.google.cloud.teleport.v2.spanner.utils.ShardIdResponse;
+import java.util.Map;
+import org.junit.Test;
+
+/** Tests for CustomShardIdFetcherForIT class. */
+public class CustomShardIdFetcherForITTest {
+  @Test
+  public void testGetShardId() {
+    CustomShardIdFetcherForIT customShardIdFetcher = new CustomShardIdFetcherForIT();
+    Map<String, Object> row = Map.of("SingerId", 2L);
+    ShardIdResponse actual = customShardIdFetcher.getShardId(new ShardIdRequest("table1", row));
+    ShardIdResponse expected = new ShardIdResponse();
+    expected.setLogicalShardId("testShardB");
+    assertEquals(actual.getLogicalShardId(), expected.getLogicalShardId());
+    row = Map.of("SingerId", 1L);
+    actual = customShardIdFetcher.getShardId(new ShardIdRequest("table1", row));
+    expected.setLogicalShardId("testShardA");
+    assertEquals(actual.getLogicalShardId(), expected.getLogicalShardId());
+  }
+}

From 7a3000c68329680bb69444d9c9b930e8f334dcf0 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Thu, 16 May 2024 09:30:07 -0400
Subject: [PATCH 59/70] Replace changed-files with modules-to-build

---
 .github/actions/setup-env/action.yml       |  9 ---
 .github/workflows/java-pr.yml              | 25 ++------
 .github/workflows/release.yml              |  6 +-
 cicd/internal/flags/common-flags.go        | 48 ++++-----------
 cicd/internal/op/maven.go                  | 11 ++--
 cicd/internal/workflows/maven-workflows.go | 70 +---------------------
 6 files changed, 24 insertions(+), 145 deletions(-)

diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml
index 4f2cb143e2..8ef59df42a 100644
--- a/.github/actions/setup-env/action.yml
+++ b/.github/actions/setup-env/action.yml
@@ -46,10 +46,6 @@ inputs:
     description: 'The version of Python to install'
     required: false
     default: '3.11'
-outputs:
-  changed-files:
-    description: 'Comma-separated list of files that were changed'
-    value: ${{ steps.changed-files.outputs.all_changed_and_modified_files }}
 
 runs:
   using: 'composite'
@@ -59,11 +55,6 @@ runs:
       with:
         go-version: ${{ inputs.go-version }}
         cache: false
-    - name: Get Changed Files
-      id: changed-files
-      uses: tj-actions/changed-files@4c5f5d698fbf2d763d5f13815ac7c2ccbef1ff7f # v44.2.0
-      with:
-        separator: ','
     # It shouldn't take too long to build all of this, and it will at least
     # make running the target program easier
     - name: Build CI/CD
diff --git a/.github/workflows/java-pr.yml b/.github/workflows/java-pr.yml
index ec617ebae7..967cec71c5 100644
--- a/.github/workflows/java-pr.yml
+++ b/.github/workflows/java-pr.yml
@@ -48,13 +48,11 @@ jobs:
     steps:
       - name: Checkout Code
         uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
-        with:
-          fetch-depth: 2  # setup-env action assumes fetch depth of at least 2 for https://github.com/tj-actions/changed-files?tab=readme-ov-file#on-push-%EF%B8%8F
       - name: Setup Environment
         id: setup-env
         uses: ./.github/actions/setup-env
       - name: Run Spotless
-        run: ./cicd/run-spotless --changed-files="${{ steps.setup-env.outputs.changed-files }}"
+        run: ./cicd/run-spotless
   checkstyle_check:
     name: Checkstyle
     timeout-minutes: 10
@@ -62,13 +60,11 @@ jobs:
     steps:
       - name: Checkout Code
         uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
-        with:
-          fetch-depth: 2  # setup-env action assumes fetch depth of at least 2 for https://github.com/tj-actions/changed-files?tab=readme-ov-file#on-push-%EF%B8%8F
       - name: Setup Environment
         id: setup-env
         uses: ./.github/actions/setup-env
       - name: Run Checkstyle
-        run: ./cicd/run-checkstyle --changed-files="${{ steps.setup-env.outputs.changed-files }}"
+        run: ./cicd/run-checkstyle
   java_build:
     name: Build
     timeout-minutes: 60
@@ -76,13 +72,11 @@ jobs:
     steps:
       - name: Checkout Code
         uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
-        with:
-          fetch-depth: 2  # setup-env action assumes fetch depth of at least 2 for https://github.com/tj-actions/changed-files?tab=readme-ov-file#on-push-%EF%B8%8F
       - name: Setup Environment
         id: setup-env
         uses: ./.github/actions/setup-env
       - name: Run Build
-        run: ./cicd/run-build --changed-files="${{ steps.setup-env.outputs.changed-files }}"
+        run: ./cicd/run-build
       - name: Cleanup Java Environment
         uses: ./.github/actions/cleanup-java-env
   java_unit_tests:
@@ -93,13 +87,11 @@ jobs:
     steps:
       - name: Checkout Code
         uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
-        with:
-          fetch-depth: 2  # setup-env action assumes fetch depth of at least 2 for https://github.com/tj-actions/changed-files?tab=readme-ov-file#on-push-%EF%B8%8F
       - name: Setup Environment
         id: setup-env
         uses: ./.github/actions/setup-env
       - name: Run Unit Tests
-        run: ./cicd/run-unit-tests --changed-files="${{ steps.setup-env.outputs.changed-files }}"
+        run: ./cicd/run-unit-tests
       - name: Upload Unit Tests Report
         uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
         if: always() # always run even if the previous step fails
@@ -124,15 +116,12 @@ jobs:
     steps:
       - name: Checkout Code
         uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
-        with:
-          fetch-depth: 2  # setup-env action assumes fetch depth of at least 2 for https://github.com/tj-actions/changed-files?tab=readme-ov-file#on-push-%EF%B8%8F
       - name: Setup Environment
         id: setup-env
         uses: ./.github/actions/setup-env
       - name: Run Integration Smoke Tests
         run: | 
           ./cicd/run-it-smoke-tests \
-          --changed-files="${{ steps.setup-env.outputs.changed-files }}" \
           --it-region="us-central1" \
           --it-project="cloud-teleport-testing" \
           --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
@@ -155,15 +144,12 @@ jobs:
     steps:
       - name: Checkout Code
         uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
-        with:
-          fetch-depth: 2  # setup-env action assumes fetch depth of at least 2 for https://github.com/tj-actions/changed-files?tab=readme-ov-file#on-push-%EF%B8%8F
       - name: Setup Environment
         id: setup-env
         uses: ./.github/actions/setup-env
       - name: Run Integration Tests
         run: | 
           ./cicd/run-it-tests \
-          --changed-files="${{ steps.setup-env.outputs.changed-files }}" \
           --it-region="us-central1" \
           --it-project="cloud-teleport-testing" \
           --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
@@ -187,15 +173,12 @@ jobs:
     steps:
     - name: Checkout Code
       uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
-      with:
-        fetch-depth: 2  # setup-env action assumes fetch depth of at least 2 for https://github.com/tj-actions/changed-files?tab=readme-ov-file#on-push-%EF%B8%8F
     - name: Setup Environment
       id: setup-env
       uses: ./.github/actions/setup-env
     - name: Run Load Tests
       run: |
         ./cicd/run-load-tests \
-        --changed-files="${{ steps.setup-env.outputs.changed-files }}" \
         --it-region="us-central1" \
         --it-project="cloud-teleport-testing" \
         --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4d4657c743..e31a80c1d7 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -68,13 +68,12 @@ jobs:
       id: setup-env
       uses: ./.github/actions/setup-env
     - name: Run Build
-      run: ./cicd/run-build --changed-files="pom.xml"
+      run: ./cicd/run-build
     - name: Run Unit Tests
-      run: ./cicd/run-unit-tests --changed-files="pom.xml"
+      run: ./cicd/run-unit-tests
     - name: Run Integration Smoke Tests
       run: |
         ./cicd/run-it-smoke-tests \
-        --changed-files="pom.xml" \
         --it-region="us-central1" \
         --it-project="cloud-teleport-testing" \
         --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
@@ -84,7 +83,6 @@ jobs:
     - name: Run Integration Tests
       run: |
         ./cicd/run-it-tests \
-        --changed-files="pom.xml" \
         --it-region="us-central1" \
         --it-project="cloud-teleport-testing" \
         --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
diff --git a/cicd/internal/flags/common-flags.go b/cicd/internal/flags/common-flags.go
index faf965f00f..647710e976 100644
--- a/cicd/internal/flags/common-flags.go
+++ b/cicd/internal/flags/common-flags.go
@@ -18,53 +18,25 @@ package flags
 
 import (
 	"flag"
-	"fmt"
-	"log"
-	"regexp"
 	"strings"
 )
 
+const (
+	ALL     = ""
+	SPANNER = "v2/datastream-to-spanner,v2/sourcedb-to-spanner,v2/spanner-change-streams-to-sharded-file-sink"
+)
+
 // Avoid making these vars public.
 var (
-	changedFiles string
+	modulesToBuild string
 )
 
 // Registers all common flags. Must be called before flag.Parse().
 func RegisterCommonFlags() {
-	flag.StringVar(&changedFiles, "changed-files", "", "List of changed files as a comma-separated string")
+	flag.StringVar(&modulesToBuild, "modules-to-build", ALL, "List of modules to build/run commands against")
 }
 
-// Returns all changed files with regexes. If no regexes are passed, all files are returned. If even one
-// is passed, then only file paths with a match anywhere in the file will be returned. If multiple are
-// passed, it is equivalent to (regex1|regex2|...|regexN)
-func ChangedFiles(regexes ...string) []string {
-	if len(changedFiles) == 0 {
-		log.Println("WARNING: No changed files were passed. This could indicate an error.")
-		return make([]string, 0)
-	}
-
-	files := strings.Split(changedFiles, ",")
-	if len(regexes) == 0 {
-		return files
-	}
-
-	var fullRegex string
-	if len(regexes) == 1 {
-		fullRegex = regexes[0]
-	} else {
-		fullRegex = fmt.Sprintf("(%s)", strings.Join(regexes, "|"))
-	}
-	re := regexp.MustCompile(fullRegex)
-
-	results := make([]string, 0)
-	for _, f := range files {
-		if re.MatchString(f) {
-			results = append(results, f)
-		}
-	}
-
-	if len(results) == 0 {
-		log.Println("INFO: All changed files got filtered out.")
-	}
-	return results
+// Returns all modules to build.
+func ModulesToBuild(regexes ...string) []string {
+	return strings.Split(modulesToBuild, ",")
 }
diff --git a/cicd/internal/op/maven.go b/cicd/internal/op/maven.go
index 74cee339cf..dc25e99fd0 100644
--- a/cicd/internal/op/maven.go
+++ b/cicd/internal/op/maven.go
@@ -36,8 +36,11 @@ func RunMavenOnPom(pom string, cmd string, args ...string) error {
 // Runs the given Maven command on a specified module. Considering the input, this is equivalent to:
 //
 //	mvn -B {cmd} -f {pom} -pl {module} {args...}
-func RunMavenOnModule(pom string, cmd string, module string, args ...string) error {
-	// fullArgs := []string{"-pl", module}
-	// fullArgs = append(fullArgs, args...)
-	return RunMavenOnPom(pom, cmd, args...)
+func RunMavenOnModule(pom string, cmd string, modules string, args ...string) error {
+	if len(modules) == 0 {
+		return RunMavenOnPom(pom, cmd, args...)
+	}
+	fullArgs := []string{"-pl", modules}
+	fullArgs = append(fullArgs, args...)
+	return RunMavenOnPom(pom, cmd, fullArgs...)
 }
diff --git a/cicd/internal/workflows/maven-workflows.go b/cicd/internal/workflows/maven-workflows.go
index 74ecb6d7c2..eb2c0ffa2b 100644
--- a/cicd/internal/workflows/maven-workflows.go
+++ b/cicd/internal/workflows/maven-workflows.go
@@ -17,15 +17,11 @@
 package workflows
 
 import (
-	"fmt"
-	"log"
-	"path/filepath"
 	"strconv"
 	"strings"
 
 	"github.com/GoogleCloudPlatform/DataflowTemplates/cicd/internal/flags"
 	"github.com/GoogleCloudPlatform/DataflowTemplates/cicd/internal/op"
-	"github.com/GoogleCloudPlatform/DataflowTemplates/cicd/internal/repo"
 )
 
 const (
@@ -202,10 +198,7 @@ func (*mvnVerifyWorkflow) Run(args ...string) error {
 }
 
 func RunForChangedModules(cmd string, args ...string) error {
-	changed := flags.ChangedFiles(javaFileRegex, xmlFileRegex)
-	if len(changed) == 0 {
-		return nil
-	}
+	modules := flags.ModulesToBuild(javaFileRegex, xmlFileRegex)
 
 	parsedArgs := []string{}
 	for _, arg := range args {
@@ -214,67 +207,6 @@ func RunForChangedModules(cmd string, args ...string) error {
 		}
 	}
 
-	// Collect the modules together for a single call. Maven can work out the install order.
-	modules := make([]string, 0)
-
-	// We need to append the base dependency modules, because they are needed to build all
-	// other modules.
-	for root, children := range repo.GetModulesForPaths(changed) {
-		if len(children) == 0 {
-			modules = append(modules, root)
-			continue
-		}
-
-		// A change to the root POM could impact all children, so build them all.
-		buildAll := false
-		for _, c := range changed {
-			if c == filepath.Join(root, "pom.xml") {
-				buildAll = true
-				break
-			}
-		}
-		if buildAll {
-			modules = append(modules, root)
-			continue
-		}
-
-		withoutRoot := removeRoot(children)
-		if len(withoutRoot) == 0 {
-			log.Printf("All files under %s were irrelevant root-level files", root)
-		}
-		for _, m := range withoutRoot {
-			modules = append(modules, fmt.Sprintf("%s/%s", root, m))
-		}
-	}
-
-	if len(modules) == 0 {
-		log.Println("All modules were filtered out.")
-		return nil
-	}
-
-	has_it := false
-	has_common := false
-	has_v2 := false
-	for _, module := range modules {
-		if len(module) > 1 && module[:2] == "it" {
-			has_it = true
-		}
-		if module == "v2/common" {
-			has_common = true
-		}
-		if module == "v2" {
-			has_v2 = true
-		}
-	}
-	if has_it && !has_common {
-		modules = append(modules, "v2/common")
-	}
-	if (has_v2 || has_common) && !has_it {
-		modules = append(modules, "it")
-	}
-
-	modules = append(modules, "plugins/templates-maven-plugin")
-
 	return op.RunMavenOnModule(unifiedPom, cmd, strings.Join(modules, ","), parsedArgs...)
 }
 

From d78ca1e228e94ffeb043bb0a196020d80fb4766e Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Thu, 16 May 2024 09:44:14 -0400
Subject: [PATCH 60/70] Fix tests

---
 cicd/internal/flags/common-flags.go        | 16 +++--
 cicd/internal/flags/common-flags_test.go   | 69 ++++------------------
 cicd/internal/workflows/maven-workflows.go |  8 +--
 3 files changed, 25 insertions(+), 68 deletions(-)

diff --git a/cicd/internal/flags/common-flags.go b/cicd/internal/flags/common-flags.go
index 647710e976..b85344fb60 100644
--- a/cicd/internal/flags/common-flags.go
+++ b/cicd/internal/flags/common-flags.go
@@ -22,13 +22,14 @@ import (
 )
 
 const (
-	ALL     = ""
-	SPANNER = "v2/datastream-to-spanner,v2/sourcedb-to-spanner,v2/spanner-change-streams-to-sharded-file-sink"
+	ALL     = "ALL"
+	SPANNER = "SPANNER"
 )
 
 // Avoid making these vars public.
 var (
 	modulesToBuild string
+	moduleMap      = map[string]string{ALL: "", SPANNER: "v2/datastream-to-spanner,v2/sourcedb-to-spanner,v2/spanner-change-streams-to-sharded-file-sink"}
 )
 
 // Registers all common flags. Must be called before flag.Parse().
@@ -37,6 +38,13 @@ func RegisterCommonFlags() {
 }
 
 // Returns all modules to build.
-func ModulesToBuild(regexes ...string) []string {
-	return strings.Split(modulesToBuild, ",")
+func ModulesToBuild() []string {
+	m := modulesToBuild
+	if val, ok := moduleMap[modulesToBuild]; ok {
+		m = val
+	}
+	if len(m) == 0 {
+		return make([]string, 0)
+	}
+	return strings.Split(m, ",")
 }
diff --git a/cicd/internal/flags/common-flags_test.go b/cicd/internal/flags/common-flags_test.go
index 0881b1bb21..1f220c6871 100644
--- a/cicd/internal/flags/common-flags_test.go
+++ b/cicd/internal/flags/common-flags_test.go
@@ -21,79 +21,34 @@ import (
 	"testing"
 )
 
-func TestChangedFilesNoRegex(t *testing.T) {
+func TestModulesToBuild(t *testing.T) {
 	tests := []struct {
 		input    string
 		expected []string
 	}{
 		{
-			input:    "file1,file2",
-			expected: []string{"file1", "file2"},
+			input:    "m1,m2",
+			expected: []string{"m1", "m2"},
 		},
 		{
-			input:    "file1",
-			expected: []string{"file1"},
-		},
-	}
-
-	for _, test := range tests {
-		changedFiles = test.input
-		actual := ChangedFiles()
-		if !reflect.DeepEqual(actual, test.expected) {
-			t.Errorf("Returned files are not equal. Expected %v. Got %v.", test.expected, actual)
-		}
-	}
-}
-
-func TestChangedFilesNoRegexEmpty(t *testing.T) {
-	changedFiles = ""
-	actual := ChangedFiles()
-	if len(actual) != 0 {
-		t.Errorf("Expected empty slice, but got %v of len %v", actual, len(actual))
-	}
-}
-
-func TestChangedFilesRegexes(t *testing.T) {
-	tests := []struct {
-		files    string
-		regexes  []string
-		expected []string
-	}{
-		{
-			files:    "file1,file2,file3",
-			regexes:  []string{"file[1|3]"},
-			expected: []string{"file1", "file3"},
+			input:    "m1",
+			expected: []string{"m1"},
 		},
 		{
-			files:    "file1,file2,file3",
-			regexes:  []string{"f.+1", "fi.+3"},
-			expected: []string{"file1", "file3"},
+			input:    "ALL",
+			expected: []string{},
 		},
 		{
-			files:    "file1,file2,fileN",
-			regexes:  []string{"\\d"},
-			expected: []string{"file1", "file2"},
-		},
-		{
-			files:    "foo.c,bar.cc",
-			regexes:  []string{"\\.c$"},
-			expected: []string{"foo.c"},
+			input:    "SPANNER",
+			expected: []string{"v2/datastream-to-spanner", "v2/sourcedb-to-spanner", "v2/spanner-change-streams-to-sharded-file-sink"},
 		},
 	}
 
 	for _, test := range tests {
-		changedFiles = test.files
-		actual := ChangedFiles(test.regexes...)
+		modulesToBuild = test.input
+		actual := ModulesToBuild()
 		if !reflect.DeepEqual(actual, test.expected) {
-			t.Errorf("Returned files are not equal. Expected %v. Got %v.", test.expected, actual)
+			t.Errorf("Returned modules are not equal. Expected %v. Got %v.", test.expected, actual)
 		}
 	}
 }
-
-func TestChangedFilesRegexesNoMatch(t *testing.T) {
-	changedFiles = "foo,bar"
-	actual := ChangedFiles("file")
-	if len(actual) != 0 {
-		t.Errorf("Expected empty slice but got %v", actual)
-	}
-}
diff --git a/cicd/internal/workflows/maven-workflows.go b/cicd/internal/workflows/maven-workflows.go
index eb2c0ffa2b..a2b5cf47fb 100644
--- a/cicd/internal/workflows/maven-workflows.go
+++ b/cicd/internal/workflows/maven-workflows.go
@@ -33,12 +33,6 @@ const (
 	spotlessCheckCmd   = "spotless:check"
 	checkstyleCheckCmd = "checkstyle:check"
 
-	// regexes
-	javaFileRegex     = "\\.java$"
-	xmlFileRegex      = "\\.xml$"
-	markdownFileRegex = "\\.md$"
-	pomFileRegex      = "pom\\.xml$"
-
 	// notable files
 	unifiedPom = "pom.xml"
 )
@@ -198,7 +192,7 @@ func (*mvnVerifyWorkflow) Run(args ...string) error {
 }
 
 func RunForChangedModules(cmd string, args ...string) error {
-	modules := flags.ModulesToBuild(javaFileRegex, xmlFileRegex)
+	modules := flags.ModulesToBuild()
 
 	parsedArgs := []string{}
 	for _, arg := range args {

From e3c63109833836b186735c284025c935d8d0b896 Mon Sep 17 00:00:00 2001
From: Shreya Khajanchi <khajanchi@google.com>
Date: Thu, 16 May 2024 19:47:43 +0530
Subject: [PATCH 61/70] added license review

---
 .../storage/conditions/GCSArtifactsCheck.java   | 17 +++++++++++++++++
 .../conditions/GCSArtifactsCheckTest.java       | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
index a9f52626ff..f352a3650c 100644
--- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
+++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheck.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.beam.it.gcp.storage.conditions;
 
 import com.google.auto.value.AutoValue;
diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java
index c6680576a9..dde152693c 100644
--- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java
+++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/conditions/GCSArtifactsCheckTest.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.beam.it.gcp.storage.conditions;
 
 import static org.junit.Assert.assertEquals;

From e2a295d6a6c916c9e4f7195aaa2ab8165d631f16 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Thu, 16 May 2024 11:00:29 -0400
Subject: [PATCH 62/70] Add more spanner deps

---
 cicd/internal/flags/common-flags.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicd/internal/flags/common-flags.go b/cicd/internal/flags/common-flags.go
index b85344fb60..06cd49e7d3 100644
--- a/cicd/internal/flags/common-flags.go
+++ b/cicd/internal/flags/common-flags.go
@@ -29,7 +29,7 @@ const (
 // Avoid making these vars public.
 var (
 	modulesToBuild string
-	moduleMap      = map[string]string{ALL: "", SPANNER: "v2/datastream-to-spanner,v2/sourcedb-to-spanner,v2/spanner-change-streams-to-sharded-file-sink"}
+	moduleMap      = map[string]string{ALL: "", SPANNER: "v2/datastream-to-spanner/,v2/spanner-common/,v2/spanner-change-streams-to-sharded-file-sink/,v2/gcs-to-sourcedb/,v2/spanner-migrations-sdk/,v2/spanner-custom-shard/,v2/sourcedb-to-spanner/,v2/common/,metadata"}
 )
 
 // Registers all common flags. Must be called before flag.Parse().

From 82f24d6c22d6643a8ee3d64965e50a6d498a8ffd Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Thu, 16 May 2024 11:02:38 -0400
Subject: [PATCH 63/70] fix test

---
 cicd/internal/flags/common-flags_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicd/internal/flags/common-flags_test.go b/cicd/internal/flags/common-flags_test.go
index 1f220c6871..3f09929a3b 100644
--- a/cicd/internal/flags/common-flags_test.go
+++ b/cicd/internal/flags/common-flags_test.go
@@ -40,7 +40,7 @@ func TestModulesToBuild(t *testing.T) {
 		},
 		{
 			input:    "SPANNER",
-			expected: []string{"v2/datastream-to-spanner", "v2/sourcedb-to-spanner", "v2/spanner-change-streams-to-sharded-file-sink"},
+			expected: []string{"v2/datastream-to-spanner/", "v2/spanner-common/", "v2/spanner-change-streams-to-sharded-file-sink/", "v2/gcs-to-sourcedb/", "v2/spanner-migrations-sdk/", "v2/spanner-custom-shard/", "v2/sourcedb-to-spanner/", "v2/common/", "metadata"},
 		},
 	}
 

From e07eddc40708861f6b99840444f7c751a62c7580 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Thu, 16 May 2024 12:05:00 -0400
Subject: [PATCH 64/70] add templates-maven-plugin

---
 cicd/internal/flags/common-flags.go      | 2 +-
 cicd/internal/flags/common-flags_test.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cicd/internal/flags/common-flags.go b/cicd/internal/flags/common-flags.go
index 06cd49e7d3..20923999fb 100644
--- a/cicd/internal/flags/common-flags.go
+++ b/cicd/internal/flags/common-flags.go
@@ -29,7 +29,7 @@ const (
 // Avoid making these vars public.
 var (
 	modulesToBuild string
-	moduleMap      = map[string]string{ALL: "", SPANNER: "v2/datastream-to-spanner/,v2/spanner-common/,v2/spanner-change-streams-to-sharded-file-sink/,v2/gcs-to-sourcedb/,v2/spanner-migrations-sdk/,v2/spanner-custom-shard/,v2/sourcedb-to-spanner/,v2/common/,metadata"}
+	moduleMap      = map[string]string{ALL: "", SPANNER: "v2/datastream-to-spanner/,v2/spanner-common/,v2/spanner-change-streams-to-sharded-file-sink/,v2/gcs-to-sourcedb/,v2/spanner-migrations-sdk/,v2/spanner-custom-shard/,v2/sourcedb-to-spanner/,v2/common/,metadata,plugins/templates-maven-plugin"}
 )
 
 // Registers all common flags. Must be called before flag.Parse().
diff --git a/cicd/internal/flags/common-flags_test.go b/cicd/internal/flags/common-flags_test.go
index 3f09929a3b..a079ab1b3a 100644
--- a/cicd/internal/flags/common-flags_test.go
+++ b/cicd/internal/flags/common-flags_test.go
@@ -40,7 +40,7 @@ func TestModulesToBuild(t *testing.T) {
 		},
 		{
 			input:    "SPANNER",
-			expected: []string{"v2/datastream-to-spanner/", "v2/spanner-common/", "v2/spanner-change-streams-to-sharded-file-sink/", "v2/gcs-to-sourcedb/", "v2/spanner-migrations-sdk/", "v2/spanner-custom-shard/", "v2/sourcedb-to-spanner/", "v2/common/", "metadata"},
+			expected: []string{"v2/datastream-to-spanner/", "v2/spanner-common/", "v2/spanner-change-streams-to-sharded-file-sink/", "v2/gcs-to-sourcedb/", "v2/spanner-migrations-sdk/", "v2/spanner-custom-shard/", "v2/sourcedb-to-spanner/", "v2/common/", "metadata", "plugins/templates-maven-plugin"},
 		},
 	}
 

From 7e6a8208a148409c2b85db6efaf519bdd791c1f9 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Thu, 16 May 2024 13:52:47 -0400
Subject: [PATCH 65/70] Provide useful stale message

---
 .github/workflows/stale.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index fa9a65cd2d..88150de9d6 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -21,6 +21,8 @@ jobs:
     - uses: actions/stale@v5
       with:
         repo-token: ${{ secrets.GITHUB_TOKEN }}
-        stale-issue-message: 'Stale issue message'
-        stale-pr-message: 'Stale pull request message'
+        stale-issue-message: 'This issue has been marked as stale due to 180 days of inactivity. It will be closed in 1 week if no further activity occurs. If you think that’s incorrect or this pull request requires a review, please simply write any comment. If closed, you can revive the issue at any time. Thank you for your contributions.'
+        close-issue-message: 'This issue has been closed due to lack of activity. If you think that is incorrect, or the pull request requires review, you can revive the PR at any time.'
+        stale-pr-message: 'This pull request has been marked as stale due to 180 days of inactivity. It will be closed in 1 week if no further activity occurs. If you think that’s incorrect or this pull request requires a review, please simply write any comment. If closed, you can revive the PR at any time. Thank you for your contributions.'
+        close-pr-message: 'This pull request has been closed due to lack of activity. If you think that is incorrect, or the pull request requires review, you can revive the PR at any time.'
         days-before-stale: 180

From cf9de143452464fe4778108ebbd24c8ff58c6ce8 Mon Sep 17 00:00:00 2001
From: Manit Gupta <manitgupta@google.com>
Date: Fri, 17 May 2024 09:13:57 +0530
Subject: [PATCH 66/70] Add Spanner PR workflow and exclude Spanner from global
 workflow

---
 .github/workflows/java-pr.yml              |   8 +
 .github/workflows/spanner-pr.yml           | 201 +++++++++++++++++++++
 cicd/internal/flags/common-flags.go        |   2 +-
 cicd/internal/flags/common-flags_test.go   |   2 +-
 cicd/internal/op/maven.go                  |  18 +-
 cicd/internal/workflows/maven-workflows.go |  10 +-
 6 files changed, 223 insertions(+), 18 deletions(-)
 create mode 100644 .github/workflows/spanner-pr.yml

diff --git a/.github/workflows/java-pr.yml b/.github/workflows/java-pr.yml
index 967cec71c5..014e1cedb4 100644
--- a/.github/workflows/java-pr.yml
+++ b/.github/workflows/java-pr.yml
@@ -27,6 +27,14 @@ on:
       # This will make it easier to verify action changes don't break anything.
       - '.github/actions/setup-env/*'
       - '.github/workflows/java-pr.yml'
+      # Exclude spanner paths from global run
+      - '!v2/datastream-to-spanner/**'
+      - '!v2/spanner-common/**'
+      - '!v2/spanner-change-streams-to-sharded-file-sink/**'
+      - '!v2/gcs-to-sourcedb/**'
+      - '!v2/spanner-migrations-sdk/**'
+      - '!v2/spanner-custom-shard/**'
+      - '!v2/sourcedb-to-spanner/**'
   schedule:
     - cron: "0 */12 * * *"
   workflow_dispatch:
diff --git a/.github/workflows/spanner-pr.yml b/.github/workflows/spanner-pr.yml
new file mode 100644
index 0000000000..a6722035ec
--- /dev/null
+++ b/.github/workflows/spanner-pr.yml
@@ -0,0 +1,201 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Checks that are intended to run on PRs containing Java code.
+
+name: Spanner PR
+
+on:
+  pull_request:
+    branches:
+    - 'main'
+    paths:
+      # Include spanner paths only
+      - '.github/workflows/spanner-pr.yml'
+      - 'v2/datastream-to-spanner/**'
+      - 'v2/spanner-common/**'
+      - 'v2/spanner-change-streams-to-sharded-file-sink/**'
+      - 'v2/gcs-to-sourcedb/**'
+      - 'v2/spanner-migrations-sdk/**'
+      - 'v2/spanner-custom-shard/**'
+      - 'v2/sourcedb-to-spanner/**'
+  schedule:
+  - cron: "0 */12 * * *"
+  workflow_dispatch:
+
+concurrency:
+  group: java-pr-${{ github.event.issue.number || github.run_id }}
+  cancel-in-progress: true
+
+env:
+  MAVEN_OPTS: -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=error
+
+permissions: read-all
+
+jobs:
+  spotless_check:
+    name: Spotless
+    timeout-minutes: 10
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+    - name: Setup Environment
+      id: setup-env
+      uses: ./.github/actions/setup-env
+    - name: Run Spotless
+      run: |
+        ./cicd/run-spotless \
+        --modules-to-build="SPANNER"
+  checkstyle_check:
+    name: Checkstyle
+    timeout-minutes: 10
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+    - name: Setup Environment
+      id: setup-env
+      uses: ./.github/actions/setup-env
+    - name: Run Checkstyle
+      run: |
+        ./cicd/run-checkstyle \
+        --modules-to-build="SPANNER"
+  java_build:
+    name: Build
+    timeout-minutes: 60
+    runs-on: [self-hosted, it]
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+    - name: Setup Environment
+      id: setup-env
+      uses: ./.github/actions/setup-env
+    - name: Run Build
+      run: |
+        ./cicd/run-build \
+        --modules-to-build="SPANNER"
+    - name: Cleanup Java Environment
+      uses: ./.github/actions/cleanup-java-env
+  java_unit_tests:
+    name: Unit Tests
+    needs: [java_build]
+    timeout-minutes: 60
+    runs-on: [self-hosted, it]
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+    - name: Setup Environment
+      id: setup-env
+      uses: ./.github/actions/setup-env
+    - name: Run Unit Tests
+      run: |
+        ./cicd/run-unit-tests \
+        --modules-to-build="SPANNER"
+    - name: Upload Unit Tests Report
+      uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+      if: always() # always run even if the previous step fails
+      with:
+        name: surefire-test-results
+        path: '**/surefire-reports/TEST-*.xml'
+        retention-days: 1
+    - name: Upload coverage reports to Codecov
+      uses: codecov/codecov-action@v4.0.1
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        slug: GoogleCloudPlatform/DataflowTemplates
+        files: 'target/site/jacoco-aggregate/jacoco.xml'
+    - name: Cleanup Java Environment
+      uses: ./.github/actions/cleanup-java-env
+  java_integration_smoke_tests_templates:
+    name: Dataflow Templates Integration Smoke Tests
+    needs: [spotless_check, checkstyle_check, java_build, java_unit_tests]
+    timeout-minutes: 60
+    # Run on any runner that matches all the specified runs-on values.
+    runs-on: [self-hosted, it]
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+    - name: Setup Environment
+      id: setup-env
+      uses: ./.github/actions/setup-env
+    - name: Run Integration Smoke Tests
+      run: |
+        ./cicd/run-it-smoke-tests \
+        --modules-to-build="SPANNER" \
+        --it-region="us-central1" \
+        --it-project="cloud-teleport-testing" \
+        --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
+        --it-private-connectivity="datastream-private-connect-us-central1"
+    - name: Upload Smoke Tests Report
+      uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+      if: always() # always run even if the previous step fails
+      with:
+        name: surefire-test-results
+        path: '**/surefire-reports/TEST-*.xml'
+        retention-days: 1
+    - name: Cleanup Java Environment
+      uses: ./.github/actions/cleanup-java-env
+  java_integration_tests_templates:
+    name: Dataflow Templates Integration Tests
+    needs: [java_integration_smoke_tests_templates]
+    timeout-minutes: 180
+    # Run on any runner that matches all the specified runs-on values.
+    runs-on: [self-hosted, it]
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+    - name: Setup Environment
+      id: setup-env
+      uses: ./.github/actions/setup-env
+    - name: Run Integration Tests
+      run: |
+        ./cicd/run-it-tests \
+        --modules-to-build="SPANNER" \
+        --it-region="us-central1" \
+        --it-project="cloud-teleport-testing" \
+        --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
+        --it-private-connectivity="datastream-private-connect-us-central1"
+    - name: Upload Integration Tests Report
+      uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+      if: always() # always run even if the previous step fails
+      with:
+        name: surefire-test-results
+        path: '**/surefire-reports/TEST-*.xml'
+        retention-days: 1
+    - name: Cleanup Java Environment
+      uses: ./.github/actions/cleanup-java-env
+  java_load_tests_templates:
+    if: contains(github.event.pull_request.labels.*.name, 'run-load-tests')
+    name: Dataflow Templates Load Tests
+    needs: [spotless_check, checkstyle_check, java_build, java_unit_tests, java_integration_tests_templates]
+    timeout-minutes: 600
+    # Run on any runner that matches all the specified runs-on values.
+    runs-on: [self-hosted, perf]
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+    - name: Setup Environment
+      id: setup-env
+      uses: ./.github/actions/setup-env
+    - name: Run Load Tests
+      run: |
+        ./cicd/run-load-tests \
+        --modules-to-build="SPANNER" \
+        --it-region="us-central1" \
+        --it-project="cloud-teleport-testing" \
+        --it-artifact-bucket="cloud-teleport-testing-it-gitactions" \
+        --it-private-connectivity="datastream-private-connect-us-central1"
+    - name: Cleanup Java Environment
+      uses: ./.github/actions/cleanup-java-env
diff --git a/cicd/internal/flags/common-flags.go b/cicd/internal/flags/common-flags.go
index 20923999fb..899d997619 100644
--- a/cicd/internal/flags/common-flags.go
+++ b/cicd/internal/flags/common-flags.go
@@ -29,7 +29,7 @@ const (
 // Avoid making these vars public.
 var (
 	modulesToBuild string
-	moduleMap      = map[string]string{ALL: "", SPANNER: "v2/datastream-to-spanner/,v2/spanner-common/,v2/spanner-change-streams-to-sharded-file-sink/,v2/gcs-to-sourcedb/,v2/spanner-migrations-sdk/,v2/spanner-custom-shard/,v2/sourcedb-to-spanner/,v2/common/,metadata,plugins/templates-maven-plugin"}
+	moduleMap      = map[string]string{ALL: "", SPANNER: "v2/datastream-to-spanner/,v2/spanner-change-streams-to-sharded-file-sink/,v2/gcs-to-sourcedb/,v2/sourcedb-to-spanner/,plugins/templates-maven-plugin"}
 )
 
 // Registers all common flags. Must be called before flag.Parse().
diff --git a/cicd/internal/flags/common-flags_test.go b/cicd/internal/flags/common-flags_test.go
index a079ab1b3a..fc49b542b3 100644
--- a/cicd/internal/flags/common-flags_test.go
+++ b/cicd/internal/flags/common-flags_test.go
@@ -40,7 +40,7 @@ func TestModulesToBuild(t *testing.T) {
 		},
 		{
 			input:    "SPANNER",
-			expected: []string{"v2/datastream-to-spanner/", "v2/spanner-common/", "v2/spanner-change-streams-to-sharded-file-sink/", "v2/gcs-to-sourcedb/", "v2/spanner-migrations-sdk/", "v2/spanner-custom-shard/", "v2/sourcedb-to-spanner/", "v2/common/", "metadata", "plugins/templates-maven-plugin"},
+			expected: []string{"v2/datastream-to-spanner/", "v2/spanner-change-streams-to-sharded-file-sink/", "v2/gcs-to-sourcedb/", "v2/sourcedb-to-spanner/", "plugins/templates-maven-plugin"},
 		},
 	}
 
diff --git a/cicd/internal/op/maven.go b/cicd/internal/op/maven.go
index dc25e99fd0..93c659a04b 100644
--- a/cicd/internal/op/maven.go
+++ b/cicd/internal/op/maven.go
@@ -18,6 +18,8 @@ package op
 
 import (
 	"strings"
+
+	"github.com/GoogleCloudPlatform/DataflowTemplates/cicd/internal/flags"
 )
 
 // Runs the given Maven command on a specified POM file. Considering the input, this is equivalent to:
@@ -29,18 +31,18 @@ func RunMavenOnPom(pom string, cmd string, args ...string) error {
 	fullArgs = append(fullArgs, "-f", pom)
 	fullArgs = append(fullArgs, "-e")
 	fullArgs = append(fullArgs, args...)
-
+	modules := flags.ModulesToBuild()
+	if len(modules) != 0 {
+		moduleArgs := []string{"-pl", strings.Join(modules, ",")}
+		fullArgs = append(fullArgs, moduleArgs...)
+		fullArgs = append(fullArgs, "-am")
+	}
 	return RunCmdAndStreamOutput("mvn", fullArgs)
 }
 
 // Runs the given Maven command on a specified module. Considering the input, this is equivalent to:
 //
 //	mvn -B {cmd} -f {pom} -pl {module} {args...}
-func RunMavenOnModule(pom string, cmd string, modules string, args ...string) error {
-	if len(modules) == 0 {
-		return RunMavenOnPom(pom, cmd, args...)
-	}
-	fullArgs := []string{"-pl", modules}
-	fullArgs = append(fullArgs, args...)
-	return RunMavenOnPom(pom, cmd, fullArgs...)
+func RunMavenOnModule(pom string, cmd string, args ...string) error {
+	return RunMavenOnPom(pom, cmd, args...)
 }
diff --git a/cicd/internal/workflows/maven-workflows.go b/cicd/internal/workflows/maven-workflows.go
index a2b5cf47fb..75eecc083e 100644
--- a/cicd/internal/workflows/maven-workflows.go
+++ b/cicd/internal/workflows/maven-workflows.go
@@ -17,11 +17,8 @@
 package workflows
 
 import (
-	"strconv"
-	"strings"
-
-	"github.com/GoogleCloudPlatform/DataflowTemplates/cicd/internal/flags"
 	"github.com/GoogleCloudPlatform/DataflowTemplates/cicd/internal/op"
+	"strconv"
 )
 
 const (
@@ -192,16 +189,13 @@ func (*mvnVerifyWorkflow) Run(args ...string) error {
 }
 
 func RunForChangedModules(cmd string, args ...string) error {
-	modules := flags.ModulesToBuild()
-
 	parsedArgs := []string{}
 	for _, arg := range args {
 		if arg != "" {
 			parsedArgs = append(parsedArgs, arg)
 		}
 	}
-
-	return op.RunMavenOnModule(unifiedPom, cmd, strings.Join(modules, ","), parsedArgs...)
+	return op.RunMavenOnModule(unifiedPom, cmd, parsedArgs...)
 }
 
 type spotlessCheckWorkflow struct{}

From a6c40958353035fb957f5bd67065fba99c2a3eff Mon Sep 17 00:00:00 2001
From: djagaluru <djagaluru@google.com>
Date: Fri, 17 May 2024 06:56:44 +0000
Subject: [PATCH 67/70] correcting error

---
 .../teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
index 5c2a29da91..dec42817dd 100644
--- a/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
+++ b/v2/gcs-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/GCSToSourceDbWithoutReaderIT.java
@@ -110,7 +110,7 @@ public static void cleanUp() throws IOException {
   @Test
   public void testGCSToSource() throws IOException, InterruptedException {
     // Write events to GCS
-    gcsClient.uploadArtifact(
+    gcsResourceManager.uploadArtifact(
         "output/Shard1/2024-05-13T08:43:10.000Z-2024-05-13T08:43:20.000Z-pane-0-last-0-of-1.txt",
         Resources.getResource("GCSToSourceDbWithoutReaderIT/events.txt").getPath());
     assertThatPipeline(jobInfo).isRunning();

From 5fec81172a89e7fcbf2b9d25e81e86dfeddfa6f2 Mon Sep 17 00:00:00 2001
From: Manit Gupta <manitgupta@google.com>
Date: Fri, 17 May 2024 18:16:51 +0530
Subject: [PATCH 68/70] Update .github/workflows/java-pr.yml

Co-authored-by: Danny McCormick <dannymccormick@google.com>
---
 .github/workflows/java-pr.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/java-pr.yml b/.github/workflows/java-pr.yml
index 014e1cedb4..fa91a74b8d 100644
--- a/.github/workflows/java-pr.yml
+++ b/.github/workflows/java-pr.yml
@@ -27,7 +27,7 @@ on:
       # This will make it easier to verify action changes don't break anything.
       - '.github/actions/setup-env/*'
       - '.github/workflows/java-pr.yml'
-      # Exclude spanner paths from global run
+      # Exclude spanner paths from global run (covered in https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/.github/workflows/spanner-pr.yml)
       - '!v2/datastream-to-spanner/**'
       - '!v2/spanner-common/**'
       - '!v2/spanner-change-streams-to-sharded-file-sink/**'

From 4f3d8c3c2bf6f2098ae8a0d723a706faf8ece4aa Mon Sep 17 00:00:00 2001
From: Dippatel98 <dippatel@google.com>
Date: Wed, 15 May 2024 20:31:53 +0000
Subject: [PATCH 69/70] Add support for writing Kafka keys in Kafka To BigQuery
 Template

---
 .../options/KafkaToBigQueryFlexOptions.java   | 17 +++-
 .../v2/transforms/AvroDynamicTransform.java   | 18 +++-
 .../teleport/v2/transforms/AvroTransform.java | 19 +++-
 .../BigQueryDynamicDestination.java           | 16 ++--
 .../teleport/v2/utils/BigQueryAvroUtils.java  | 17 ++++
 .../templates/KafkaToBigQueryFlexAvroIT.java  | 93 +++++++++++++++++--
 6 files changed, 157 insertions(+), 23 deletions(-)

diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java
index 4aa70cfeca..e8dbe594bb 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/options/KafkaToBigQueryFlexOptions.java
@@ -106,7 +106,7 @@ public interface KafkaToBigQueryFlexOptions
         @TemplateParameter.TemplateEnumOption("NON_WIRE_FORMAT")
       },
       optional = true,
-      description = "The format to use for avro messages.",
+      description = "The format to use for avro messages",
       helpText =
           "This parameter is used to indicate what format to use for the avro messages. Default is CONFLUENT_WIRE_FORMAT.")
   @Default.String("CONFLUENT_WIRE_FORMAT")
@@ -130,13 +130,24 @@ public interface KafkaToBigQueryFlexOptions
       parentName = "avroFormat",
       parentTriggerValues = {"CONFLUENT_WIRE_FORMAT"},
       optional = true,
-      description = "Schema Registry Connection URL.",
+      description = "Schema Registry Connection URL",
       helpText =
           "Schema Registry Connection URL for a registry which supports Confluent wire format.")
   String getSchemaRegistryConnectionUrl();
 
   void setSchemaRegistryConnectionUrl(String schemaRegistryConnectionUrl);
 
+  @TemplateParameter.Boolean(
+      order = 11,
+      optional = true,
+      description = "Persist the Kafka Message Key to the BigQuery table",
+      helpText =
+          "If true, the pipeline will persist the Kafka message key in the BigQuery table, in a `_key` field of type `BYTES`. Default is false (Key is ignored).")
+  @Default.Boolean(false)
+  Boolean getPersistKafkaKey();
+
+  void setPersistKafkaKey(Boolean value);
+
   @TemplateParameter.Text(
       order = 11,
       parentName = "avroFormat",
@@ -157,7 +168,7 @@ public interface KafkaToBigQueryFlexOptions
       parentName = "avroFormat",
       parentTriggerValues = {"CONFLUENT_WIRE_FORMAT"},
       optional = true,
-      description = "Naming prefix to be used while creating BigQuery output tables.",
+      description = "BigQuery Table naming prefix",
       helpText =
           "Naming prefix to be used while creating BigQuery output tables. Only applicable when using schema registry.")
   @Default.String("")
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java
index ec9ec4e73e..26cd8c9700 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroDynamicTransform.java
@@ -65,6 +65,8 @@ public class AvroDynamicTransform
 
   private static final Logger LOG = LoggerFactory.getLogger(AvroDynamicTransform.class);
 
+  private static final String kafkaKeyField = "_key";
+
   private KafkaToBigQueryFlexOptions options;
 
   private AvroDynamicTransform(KafkaToBigQueryFlexOptions options) {
@@ -84,7 +86,8 @@ public WriteResult expand(PCollection<KafkaRecord<byte[], byte[]>> kafkaRecords)
                 BigQueryDynamicDestination.of(
                     options.getProject(),
                     options.getOutputDataset(),
-                    options.getBqTableNamePrefix()))
+                    options.getBqTableNamePrefix(),
+                    options.getPersistKafkaKey()))
             .withWriteDisposition(WriteDisposition.valueOf(options.getWriteDisposition()))
             .withCreateDisposition(CreateDisposition.valueOf(options.getCreateDisposition()))
             .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
@@ -102,7 +105,9 @@ public WriteResult expand(PCollection<KafkaRecord<byte[], byte[]>> kafkaRecords)
                 FailsafeElementCoder.of(
                     KafkaRecordCoder.of(NullableCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
                     GenericRecordCoder.of()))
-            .apply("ConvertGenericRecordToTableRow", ParDo.of(new GenericRecordToTableRowFn()))
+            .apply(
+                "ConvertGenericRecordToTableRow",
+                ParDo.of(new GenericRecordToTableRowFn(options.getPersistKafkaKey())))
             .setCoder(
                 FailsafeElementCoder.of(
                     KafkaRecordCoder.of(NullableCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
@@ -161,6 +166,12 @@ private static class GenericRecordToTableRowFn
           FailsafeElement<KafkaRecord<byte[], byte[]>, KV<GenericRecord, TableRow>>>
       implements Serializable {
 
+    private boolean persistKafkaKey;
+
+    GenericRecordToTableRowFn(boolean persistKafkaKey) {
+      this.persistKafkaKey = persistKafkaKey;
+    }
+
     @ProcessElement
     public void processElement(ProcessContext context) {
       FailsafeElement<KafkaRecord<byte[], byte[]>, GenericRecord> element = context.element();
@@ -169,6 +180,9 @@ public void processElement(ProcessContext context) {
               element.getPayload(),
               BigQueryUtils.toTableSchema(
                   AvroUtils.toBeamSchema(element.getPayload().getSchema())));
+      if (this.persistKafkaKey) {
+        row.set(kafkaKeyField, element.getOriginalPayload().getKV().getKey());
+      }
       context.output(
           FailsafeElement.of(element.getOriginalPayload(), KV.of(element.getPayload(), row)));
     }
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java
index 1f6752f353..db976da856 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java
@@ -64,6 +64,8 @@ public class AvroTransform
 
   private static final Logger LOG = LoggerFactory.getLogger(AvroTransform.class);
 
+  private static final String kafkaKeyField = "_key";
+
   private KafkaToBigQueryFlexOptions options;
 
   private AvroTransform(KafkaToBigQueryFlexOptions options) {
@@ -81,7 +83,9 @@ public WriteResult expand(PCollection<KafkaRecord<byte[], byte[]>> kafkaRecords)
 
     Write<TableRow> writeToBQ =
         BigQueryIO.<TableRow>write()
-            .withSchema(BigQueryUtils.toTableSchema(AvroUtils.toBeamSchema(schema)))
+            .withSchema(
+                BigQueryAvroUtils.convertAvroSchemaToTableSchema(
+                    schema, options.getPersistKafkaKey()))
             .withWriteDisposition(WriteDisposition.valueOf(options.getWriteDisposition()))
             .withCreateDisposition(CreateDisposition.valueOf(options.getCreateDisposition()))
             .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
@@ -103,7 +107,9 @@ public WriteResult expand(PCollection<KafkaRecord<byte[], byte[]>> kafkaRecords)
                 FailsafeElementCoder.of(
                     KafkaRecordCoder.of(NullableCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
                     GenericRecordCoder.of()))
-            .apply("ConvertGenericRecordToTableRow", ParDo.of(new GenericRecordToTableRowFn()))
+            .apply(
+                "ConvertGenericRecordToTableRow",
+                ParDo.of(new GenericRecordToTableRowFn(options.getPersistKafkaKey())))
             .setCoder(
                 FailsafeElementCoder.of(
                     KafkaRecordCoder.of(NullableCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
@@ -177,6 +183,12 @@ private static class GenericRecordToTableRowFn
           FailsafeElement<KafkaRecord<byte[], byte[]>, TableRow>>
       implements Serializable {
 
+    private boolean persistKafkaKey;
+
+    GenericRecordToTableRowFn(boolean persistKafkaKey) {
+      this.persistKafkaKey = persistKafkaKey;
+    }
+
     @ProcessElement
     public void processElement(ProcessContext context) {
       FailsafeElement<KafkaRecord<byte[], byte[]>, GenericRecord> element = context.element();
@@ -185,6 +197,9 @@ public void processElement(ProcessContext context) {
               element.getPayload(),
               BigQueryUtils.toTableSchema(
                   AvroUtils.toBeamSchema(element.getPayload().getSchema())));
+      if (this.persistKafkaKey) {
+        row.set(kafkaKeyField, element.getOriginalPayload().getKV().getKey());
+      }
       context.output(FailsafeElement.of(element.getOriginalPayload(), row));
     }
   }
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
index b02c862abb..f930982415 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java
@@ -18,10 +18,9 @@
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.teleport.v2.coders.GenericRecordCoder;
+import com.google.cloud.teleport.v2.utils.BigQueryAvroUtils;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils;
 import org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinations;
 import org.apache.beam.sdk.io.gcp.bigquery.TableDestination;
 import org.apache.beam.sdk.values.KV;
@@ -36,16 +35,20 @@ public class BigQueryDynamicDestination
 
   private String tableNamePrefix;
 
+  private boolean persistKafkaKey;
+
   public static BigQueryDynamicDestination of(
-      String projectName, String datasetName, String tableNamePrefix) {
-    return new BigQueryDynamicDestination(projectName, datasetName, tableNamePrefix);
+      String projectName, String datasetName, String tableNamePrefix, boolean persistKafkaKey) {
+    return new BigQueryDynamicDestination(
+        projectName, datasetName, tableNamePrefix, persistKafkaKey);
   }
 
   private BigQueryDynamicDestination(
-      String projectName, String datasetName, String tableNamePrefix) {
+      String projectName, String datasetName, String tableNamePrefix, boolean persistKafkaKey) {
     this.projectName = projectName;
     this.datasetName = datasetName;
     this.tableNamePrefix = tableNamePrefix;
+    this.persistKafkaKey = persistKafkaKey;
   }
 
   @Override
@@ -66,7 +69,8 @@ public TableDestination getTable(GenericRecord element) {
   @Override
   public TableSchema getSchema(GenericRecord element) {
     // TODO: Test if sending null can work here, might be more efficient.
-    return BigQueryUtils.toTableSchema(AvroUtils.toBeamSchema(element.getSchema()));
+    return BigQueryAvroUtils.convertAvroSchemaToTableSchema(
+        element.getSchema(), this.persistKafkaKey);
   }
 
   @Override
diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/utils/BigQueryAvroUtils.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/utils/BigQueryAvroUtils.java
index 33f366766e..f80940d61d 100644
--- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/utils/BigQueryAvroUtils.java
+++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/utils/BigQueryAvroUtils.java
@@ -46,6 +46,8 @@
 import org.apache.avro.Schema.Field;
 import org.apache.avro.Schema.Type;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableCollection;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMultimap;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding;
@@ -106,6 +108,8 @@ public class BigQueryAvroUtils {
   private static final DateTimeFormatter DATE_AND_SECONDS_FORMATTER =
       DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZoneUTC();
 
+  private static final String kafkaKeyField = "_key";
+
   static String formatTimestamp(Long timestampMicro) {
     // timestampMicro is in "microseconds since epoch" format,
     // e.g., 1452062291123456L means "2016-01-06 06:38:11.123456 UTC".
@@ -526,4 +530,17 @@ private static Schema handleAvroLogicalTypes(TableFieldSchema bigQueryField, Typ
         return Schema.create(avroType);
     }
   }
+
+  public static TableSchema convertAvroSchemaToTableSchema(Schema schema, Boolean persistKafkaKey) {
+    TableSchema tableSchema = BigQueryUtils.toTableSchema(AvroUtils.toBeamSchema(schema));
+    if (persistKafkaKey) {
+      List<TableFieldSchema> list = tableSchema.getFields();
+      TableFieldSchema field = new TableFieldSchema();
+      field.setName(kafkaKeyField);
+      field.setType("BYTES");
+      list.add(field);
+      tableSchema.setFields(list);
+    }
+    return tableSchema;
+  }
 }
diff --git a/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java b/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java
index faf72a3885..26f42b34b6 100644
--- a/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java
+++ b/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.teleport.v2.templates;
 
+import static org.apache.beam.it.gcp.bigquery.matchers.BigQueryAsserts.assertThatBigQueryRecords;
 import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline;
 import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult;
 
@@ -23,9 +24,9 @@
 import com.google.cloud.bigquery.Schema;
 import com.google.cloud.bigquery.StandardSQLTypeName;
 import com.google.cloud.bigquery.TableId;
+import com.google.cloud.bigquery.TableResult;
 import com.google.cloud.teleport.metadata.TemplateIntegrationTest;
 import com.google.cloud.teleport.v2.kafka.transforms.BinaryAvroSerializer;
-import com.google.cloud.teleport.v2.utils.SecretManagerUtils;
 import com.google.common.io.Resources;
 import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
 import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
@@ -33,7 +34,9 @@
 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Base64;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Function;
 import net.jcip.annotations.NotThreadSafe;
@@ -89,7 +92,6 @@ public void setup() throws IOException {
             Field.of("productId", StandardSQLTypeName.INT64),
             Field.newBuilder("productName", StandardSQLTypeName.STRING).setMaxLength(10L).build());
 
-    tableId = bigQueryClient.createTable(testName, bqSchema);
     kafkaResourceManager =
         KafkaResourceManager.builder(testName).setHost(TestProperties.hostIp()).build();
 
@@ -111,6 +113,7 @@ public void tearDown() {
 
   @Test
   public void testKafkaToBigQueryAvroInConfluentFormat() throws IOException, RestClientException {
+    tableId = bigQueryClient.createTable(testName, bqSchema);
     baseKafkaToBigQueryAvro(
         b ->
             b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT")
@@ -118,21 +121,45 @@ public void testKafkaToBigQueryAvroInConfluentFormat() throws IOException, RestC
                 .addParameter("outputTableSpec", toTableSpecLegacy(tableId)));
   }
 
+  @Test
+  public void testKafkaToBigQueryAvroInConfluentFormatWithKey()
+      throws IOException, RestClientException {
+    List<Field> fields = new ArrayList<>(bqSchema.getFields());
+    fields.add(Field.of("_key", StandardSQLTypeName.BYTES));
+    bqSchema = Schema.of(fields);
+    tableId = bigQueryClient.createTable(testName + "WithKey", bqSchema);
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT")
+                .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("outputTableSpec", toTableSpecLegacy(tableId))
+                .addParameter("persistKafkaKey", "true"));
+  }
+
   @Test
   public void testKafkaToBigQueryAvroWithSchemaRegistry() throws IOException, RestClientException {
     baseKafkaToBigQueryAvro(
         b ->
             b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT")
-                .addParameter(
-                    "schemaRegistryConnectionUrl",
-                    SecretManagerUtils.getSecret(
-                        "projects/269744978479/secrets/kafka-schema-registry-connection-url/versions/1"))
+                .addParameter("schemaRegistryConnectionUrl", "http://10.128.0.60:8081")
                 .addParameter("outputDataset", bqDatasetId));
   }
 
+  @Test
+  public void testKafkaToBigQueryAvroWithSchemaRegistryWithKey()
+      throws IOException, RestClientException {
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT")
+                .addParameter("schemaRegistryConnectionUrl", "http://10.128.0.60:8081")
+                .addParameter("outputDataset", bqDatasetId)
+                .addParameter("persistKafkaKey", "true"));
+  }
+
   @Test
   public void testKafkaToBigQueryAvroInNonConfluentFormat()
       throws IOException, RestClientException {
+    tableId = bigQueryClient.createTable(testName, bqSchema);
     baseKafkaToBigQueryAvro(
         b ->
             b.addParameter("avroFormat", "NON_WIRE_FORMAT")
@@ -140,8 +167,24 @@ public void testKafkaToBigQueryAvroInNonConfluentFormat()
                 .addParameter("outputTableSpec", toTableSpecLegacy(tableId)));
   }
 
+  @Test
+  public void testKafkaToBigQueryAvroInNonConfluentFormatWithKey()
+      throws IOException, RestClientException {
+    List<Field> fields = new ArrayList<>(bqSchema.getFields());
+    fields.add(Field.of("_key", StandardSQLTypeName.BYTES));
+    bqSchema = Schema.of(fields);
+    tableId = bigQueryClient.createTable(testName + "WithKey", bqSchema);
+    baseKafkaToBigQueryAvro(
+        b ->
+            b.addParameter("avroFormat", "NON_WIRE_FORMAT")
+                .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc"))
+                .addParameter("outputTableSpec", toTableSpecLegacy(tableId))
+                .addParameter("persistKafkaKey", "true"));
+  }
+
   @Test
   public void testKafkaToBigQueryAvroWithExistingDLQ() throws IOException, RestClientException {
+    tableId = bigQueryClient.createTable(testName, bqSchema);
     deadletterTableId = bigQueryClient.createTable(testName + "_dlq", getDeadletterSchema());
 
     baseKafkaToBigQueryAvro(
@@ -153,6 +196,7 @@ public void testKafkaToBigQueryAvroWithExistingDLQ() throws IOException, RestCli
 
   @Test
   public void testKafkaToBigQueryAvroWithStorageApi() throws IOException, RestClientException {
+    tableId = bigQueryClient.createTable(testName, bqSchema);
     baseKafkaToBigQueryAvro(
         b ->
             b.addParameter("useStorageWriteApi", "true")
@@ -165,6 +209,7 @@ public void testKafkaToBigQueryAvroWithStorageApi() throws IOException, RestClie
   @Test
   public void testKafkaToBigQueryAvroWithStorageApiExistingDLQ()
       throws IOException, RestClientException {
+    tableId = bigQueryClient.createTable(testName, bqSchema);
     deadletterTableId = bigQueryClient.createTable(testName + "_dlq", getDeadletterSchema());
 
     baseKafkaToBigQueryAvro(
@@ -236,13 +281,13 @@ private void baseKafkaToBigQueryAvro(
         && options.getParameter("schemaRegistryConnectionUrl") != null) {
 
       publishDoubleSchemaMessages(topicName);
-      TableId avroTable = TableId.of(bqDatasetId, avroSchema.getFullName().replace(".", "-"));
-      TableId otherAvroTable =
+      tableId = TableId.of(bqDatasetId, avroSchema.getFullName().replace(".", "-"));
+      TableId otherTableId =
           TableId.of(bqDatasetId, otherAvroSchema.getFullName().replace(".", "-"));
 
-      conditions.add(BigQueryRowsCheck.builder(bigQueryClient, avroTable).setMinRows(20).build());
+      conditions.add(BigQueryRowsCheck.builder(bigQueryClient, tableId).setMinRows(20).build());
       conditions.add(
-          BigQueryRowsCheck.builder(bigQueryClient, otherAvroTable).setMinRows(20).build());
+          BigQueryRowsCheck.builder(bigQueryClient, otherTableId).setMinRows(20).build());
 
     } else if (options.getParameter("avroFormat") != null
         && options.getParameter("avroFormat").equals("NON_WIRE_FORMAT")
@@ -269,6 +314,34 @@ private void baseKafkaToBigQueryAvro(
 
     // Assert
     assertThatResult(result).meetsConditions();
+
+    TableResult tableRows = bigQueryClient.readTable(tableId);
+    if (options.getParameter("persistKafkaKey") != null
+        && options.getParameter("persistKafkaKey").equals("true")) {
+      assertThatBigQueryRecords(tableRows)
+          .hasRecordsUnordered(
+              List.of(
+                  Map.of(
+                      "productId",
+                      11,
+                      "productName",
+                      "Dataflow",
+                      "_key",
+                      Base64.getEncoder().encodeToString("11".getBytes())),
+                  Map.of(
+                      "productId",
+                      12,
+                      "productName",
+                      "Pub/Sub",
+                      "_key",
+                      Base64.getEncoder().encodeToString("12".getBytes()))));
+    } else {
+      assertThatBigQueryRecords(tableRows)
+          .hasRecordsUnordered(
+              List.of(
+                  Map.of("productId", 11, "productName", "Dataflow"),
+                  Map.of("productId", 12, "productName", "Pub/Sub")));
+    }
   }
 
   private void publishSingleSchemaMessages(String topicName)

From 9dda5c2b090ce67c93a3d75d4f29e371092acb5d Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Mon, 20 May 2024 09:43:09 -0400
Subject: [PATCH 70/70] Increase operations and allow manual triggering of
 stale

---
 .github/workflows/stale.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index 88150de9d6..97f6446e57 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -8,6 +8,7 @@ name: Mark stale issues and pull requests
 on:
   schedule:
   - cron: '23 2 * * *'
+  workflow_dispatch:
 
 jobs:
   stale:
@@ -26,3 +27,4 @@ jobs:
         stale-pr-message: 'This pull request has been marked as stale due to 180 days of inactivity. It will be closed in 1 week if no further activity occurs. If you think that’s incorrect or this pull request requires a review, please simply write any comment. If closed, you can revive the PR at any time. Thank you for your contributions.'
         close-pr-message: 'This pull request has been closed due to lack of activity. If you think that is incorrect, or the pull request requires review, you can revive the PR at any time.'
         days-before-stale: 180
+        operations-per-run: 100