From bd99fdee5c12bdccea46a8c2e53eb09eee3a7d6c Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Tue, 30 Apr 2024 16:40:19 +0530 Subject: [PATCH 01/70] Modified parameters for all templates assigned --- .../teleport/bigtable/BigtableToAvro.java | 14 +++---- .../teleport/spanner/ExportPipeline.java | 39 ++++++------------ .../teleport/spanner/ImportPipeline.java | 30 +++++--------- .../teleport/templates/PubsubToPubsub.java | 11 ++--- .../teleport/templates/TextToPubsub.java | 6 +-- .../options/AstraDbToBigQueryOptions.java | 18 ++++---- .../SpannerChangeStreamsToGcsOptions.java | 41 ++++++------------- .../v2/templates/PubSubToBigQuery.java | 10 ++--- .../v2/options/JdbcToBigQueryOptions.java | 39 +++++++----------- .../teleport/v2/templates/JmsToPubsub.java | 15 ++++--- 10 files changed, 79 insertions(+), 144 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java index 77bf83a85f..f9ede3e24b 100644 --- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java +++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java @@ -78,8 +78,7 @@ public interface Options extends PipelineOptions { order = 1, description = "Project ID", helpText = - "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to" - + " read data from") + "The ID of the Google Cloud project of the Bigtable instance that you want to read data from.") ValueProvider getBigtableProjectId(); @SuppressWarnings("unused") @@ -89,7 +88,7 @@ public interface Options extends PipelineOptions { order = 2, regexes = {"[a-z][a-z0-9\\-]+[a-z0-9]"}, description = "Instance ID", - helpText = "The ID of the Cloud Bigtable instance that contains the table") + helpText = "The ID of the Bigtable instance that contains the table.") ValueProvider getBigtableInstanceId(); @SuppressWarnings("unused") @@ -99,7 +98,7 @@ public interface Options extends PipelineOptions { order = 3, regexes = {"[_a-zA-Z0-9][-_.a-zA-Z0-9]*"}, description = "Table ID", - helpText = "The ID of the Cloud Bigtable table to read") + helpText = "The ID of the Bigtable table to export.") ValueProvider getBigtableTableId(); @SuppressWarnings("unused") @@ -109,9 +108,8 @@ public interface Options extends PipelineOptions { order = 4, description = "Output file directory in Cloud Storage", helpText = - "The path and filename prefix for writing output files. Must end with a slash. DateTime" - + " formatting is used to parse directory path for date & time formatters.", - example = "gs://your-bucket/your-path") + "The Cloud Storage path where data is written.", + example = "gs://mybucket/somefolder") ValueProvider getOutputDirectory(); @SuppressWarnings("unused") @@ -120,7 +118,7 @@ public interface Options extends PipelineOptions { @TemplateParameter.Text( order = 5, description = "Avro file prefix", - helpText = "The prefix of the Avro file name. For example, \"table1-\"") + helpText = "The prefix of the Avro filename. For example, output-.") @Default.String("part") ValueProvider getFilenamePrefix(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 0fdb720cc7..3f68b73767 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -79,7 +79,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 1, regexes = {"[a-z][a-z0-9\\-]*[a-z0-9]"}, description = "Cloud Spanner instance ID", - helpText = "The instance ID of the Cloud Spanner database that you want to export.") + helpText = "The instance ID of the Spanner database that you want to export.") ValueProvider getInstanceId(); void setInstanceId(ValueProvider value); @@ -88,7 +88,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 2, regexes = {"[a-z][a-z0-9_\\-]*[a-z0-9]"}, description = "Cloud Spanner database ID", - helpText = "The database ID of the Cloud Spanner database that you want to export.") + helpText = "The database ID of the Spanner database that you want to export.") ValueProvider getDatabaseId(); void setDatabaseId(ValueProvider value); @@ -97,8 +97,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 3, description = "Cloud Storage output directory", helpText = - "The Cloud Storage path where the Avro files should be exported to. A new directory" - + " will be created under this path that contains the export.", + "The Cloud Storage path you want to export Avro files to. The export job creates a new directory under this path that contains the exported files.", example = "gs://your-bucket/your-path") ValueProvider getOutputDir(); @@ -109,8 +108,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Cloud Storage temp directory for storing Avro files", helpText = - "The Cloud Storage path where the temporary Avro files can be created. Ex:" - + " gs://your-bucket/your-path") + "(Optional) The Cloud Storage path where temporary Avro files are written.") ValueProvider getAvroTempDirectory(); void setAvroTempDirectory(ValueProvider value); @@ -148,11 +146,8 @@ public interface ExportPipelineOptions extends PipelineOptions { }, description = "Snapshot time", helpText = - "Specifies the snapshot time as RFC 3339 format in UTC time without the timezone" - + " offset(always ends in 'Z'). Timestamp must be in the past and Maximum timestamp" - + " staleness applies. See" - + " https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness", - example = "1990-12-31T23:59:59Z") + "(Optional) The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.", + example = "1990-12-31T23:59:60Z") @Default.String(value = "") ValueProvider getSnapshotTime(); @@ -162,7 +157,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 8, optional = true, description = "Cloud Spanner Project Id", - helpText = "The project ID of the Cloud Spanner instance.") + helpText = "(Optional) The Google Cloud Project ID of the Spanner database that you want to read data from.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); @@ -172,8 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export Timestamps as Timestamp-micros type", helpText = - "If true, Timestamps are exported as timestamp-micros type. Timestamps are exported as" - + " ISO8601 strings at nanosecond precision by default.") + "(Optional) If true, timestamps are exported as a long type with timestamp-micros logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.") @Default.Boolean(false) ValueProvider getShouldExportTimestampAsLogicalType(); @@ -185,10 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions { regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"}, description = "Cloud Spanner table name(s).", helpText = - "If provided, only this comma separated list of tables are exported. Ancestor tables" - + " and tables that are referenced via foreign keys are required. If not explicitly" - + " listed, the `shouldExportRelatedTables` flag must be set for a successful" - + " export.") + "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the shouldExportRelatedTables parameter to true.") @Default.String(value = "") ValueProvider getTableNames(); @@ -199,10 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export necessary Related Spanner tables.", helpText = - "Used in conjunction with `tableNames`. If true, add related tables necessary for the" - + " export, such as interleaved parent tables and foreign keys tables. If" - + " `tableNames` is specified but doesn't include related tables, this option must" - + " be set to true for a successful export.") + "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the tableNames parameter.") @Default.Boolean(false) ValueProvider getShouldExportRelatedTables(); @@ -218,8 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "The request priority for Cloud Spanner calls. The value must be one of:" - + " [HIGH,MEDIUM,LOW].") + "(Optional) The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); @@ -229,9 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Use independent compute resource (Spanner DataBoost).", helpText = - "Use Spanner on-demand compute so the export job will run on independent compute" - + " resources and have no impact to current Spanner workloads. This will incur" - + " additional charges in Spanner.") + "(Optional) Set to true to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the spanner.databases.useDataBoost IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).") @Default.Boolean(false) ValueProvider getDataBoostEnabled(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java index bd2b0aee6e..8470c5c17a 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java @@ -67,7 +67,7 @@ public interface Options extends PipelineOptions { order = 1, regexes = {"^[a-z0-9\\-]+$"}, description = "Cloud Spanner instance ID", - helpText = "The instance ID of the Cloud Spanner database that you want to import to.") + helpText = "The instance ID of the Spanner database.") ValueProvider getInstanceId(); void setInstanceId(ValueProvider value); @@ -77,8 +77,7 @@ public interface Options extends PipelineOptions { regexes = {"^[a-z_0-9\\-]+$"}, description = "Cloud Spanner database ID", helpText = - "The database ID of the Cloud Spanner database that you want to import into (must" - + " already exist).") + "The database ID of the Spanner database.") ValueProvider getDatabaseId(); void setDatabaseId(ValueProvider value); @@ -86,7 +85,7 @@ public interface Options extends PipelineOptions { @TemplateParameter.GcsReadFolder( order = 3, description = "Cloud storage input directory", - helpText = "The Cloud Storage path where the Avro files should be imported from.") + helpText = "The Cloud Storage path where the Avro files are imported from.") ValueProvider getInputDir(); void setInputDir(ValueProvider value); @@ -107,9 +106,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Indexes", helpText = - "By default the import pipeline is not blocked on index creation, and it " - + "may complete with indexes still being created in the background. If true, the " - + "pipeline waits until indexes are created.") + "Optional: If true, the pipeline waits for indexes to be created. If false, the job might complete while indexes are still being created in the background. Default: false.") @Default.Boolean(false) ValueProvider getWaitForIndexes(); @@ -120,9 +117,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Foreign Keys", helpText = - "By default the import pipeline is not blocked on foreign key creation, and it may" - + " complete with foreign keys still being created in the background. If true, the" - + " pipeline waits until foreign keys are created.") + "Optional: If true, the pipeline waits for foreign keys to be created. If false, the job might complete while foreign keys are still being created in the background. Default: false.") @Default.Boolean(false) ValueProvider getWaitForForeignKeys(); @@ -133,8 +128,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Change Streams", helpText = - "By default the import pipeline is blocked on change stream creation. If false, it may" - + " complete with change streams still being created in the background.") + "Optional: If true, the pipeline waits for change streams to be created. If false, the job might complete while change streams are still being created in the background. Default: true.") @Default.Boolean(true) ValueProvider getWaitForChangeStreams(); @@ -157,10 +151,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Create Indexes early", helpText = - "Flag to turn off early index creation if there are many indexes. Indexes and Foreign" - + " keys are created after dataload. If there are more than 40 DDL statements to be" - + " executed after dataload, it is preferable to create the indexes before dataload." - + " This is the flag to turn the feature off.") + "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set earlyIndexCreateFlag to false. Default: true.") @Default.Boolean(true) ValueProvider getEarlyIndexCreateFlag(); @@ -175,7 +166,7 @@ public interface Options extends PipelineOptions { order = 9, optional = true, description = "Cloud Spanner Project Id", - helpText = "The project ID of the Cloud Spanner instance.") + helpText = "\tOptional: The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); @@ -187,7 +178,7 @@ public interface Options extends PipelineOptions { optional = true, regexes = {"[0-9]+"}, description = "DDL Creation timeout in minutes", - helpText = "DDL Creation timeout in minutes.") + helpText = "Optional: The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.") @Default.Integer(30) ValueProvider getDdlCreationTimeoutInMinutes(); @@ -203,8 +194,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "The request priority for Cloud Spanner calls. The value must be one of:" - + " [HIGH,MEDIUM,LOW].") + "Optional: The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java index 6b49dede09..770cf5c260 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java @@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 1, description = "Pub/Sub input subscription", helpText = - "Pub/Sub subscription to read the input from, in the format of 'projects/your-project-id/subscriptions/your-subscription-name'", + "Pub/Sub subscription to read the input from. For example, projects//subscriptions/.", example = "projects/your-project-id/subscriptions/your-subscription-name") @Validation.Required ValueProvider getInputSubscription(); @@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "The name of the topic to which data should published, in the format of 'projects/your-project-id/topics/your-topic-name'", + "Cloud Pub/Sub topic to write the output to. For example, projects//topics/.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required ValueProvider getOutputTopic(); @@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter key", helpText = - "Attribute key by which events are filtered. No filters are applied if no key is specified.") + "(Optional) Filter events based on an attribute key. No filters are applied if filterKey is not specified.") ValueProvider getFilterKey(); void setFilterKey(ValueProvider filterKey); @@ -165,10 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter value", helpText = - "Filter attribute value to use if an event filter key is provided. Accepts a valid " - + "Java Regex string as an event filter value. In case a regex is provided, the complete " - + "expression should match in order for the message to be filtered. Partial matches (e.g. " - + "substring) will not be filtered. A null event filter value is used by default.") + "(Optional) Filter attribute value to use in case a filterKey is provided. A null filterValue is used by default.") ValueProvider getFilterValue(); void setFilterValue(ValueProvider filterValue); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java index 5972905115..b1cd1b8e02 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java @@ -63,8 +63,8 @@ public interface Options extends PipelineOptions { @TemplateParameter.GcsReadFile( order = 1, description = "Cloud Storage Input File(s)", - helpText = "Path of the file pattern glob to read from.", - example = "gs://your-bucket/path/*.txt") + helpText = "The input file pattern to read from.", + example = "gs://bucket-name/files/*.json") @Required ValueProvider getInputFilePattern(); @@ -74,7 +74,7 @@ public interface Options extends PipelineOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "The name of the topic to which data should published, in the format of 'projects/your-project-id/topics/your-topic-name'", + "The Pub/Sub input topic to write to. The name must be in the format of projects//topics/.", example = "projects/your-project-id/topics/your-topic-name") @Required ValueProvider getOutputTopic(); diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java index bc9946773f..80597dbbbf 100644 --- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java +++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java @@ -33,9 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions description = "BigQuery output table", optional = true, helpText = - "The BigQuery table location to write the output to. " - + "The table should be in the format `:.`. " - + "The table's schema must match the input objects.") + "Optional: The BigQuery table location to write the output to. The table should be in the format :.. The table's schema must match the input objects.") String getOutputTableSpec(); @SuppressWarnings("unused") @@ -47,7 +45,7 @@ interface AstraDbSourceOptions extends PipelineOptions { @TemplateParameter.Text( order = 1, description = "Astra token", - helpText = "Token value or secret resource ID", + helpText = "Token value or secret resource ID.", example = "AstraCS:abcdefghij") @Validation.Required @SuppressWarnings("unused") @@ -59,7 +57,7 @@ interface AstraDbSourceOptions extends PipelineOptions { @TemplateParameter.Text( order = 2, description = "Database identifier", - helpText = "Database unique identifier (uuid)", + helpText = "Database unique identifier (uuid).", example = "cf7af129-d33a-498f-ad06-d97a6ee6eb7") @Validation.Required @SuppressWarnings("unused") @@ -72,7 +70,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 3, description = "Cassandra keyspace", regexes = {"^[a-zA-Z0-9][a-zA-Z0-9_]{0,47}$"}, - helpText = "Name of the Cassandra keyspace inside Astra database") + helpText = "Name of the Cassandra keyspace inside Astra database.") String getAstraKeyspace(); @SuppressWarnings("unused") @@ -82,7 +80,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 4, description = "Cassandra table", regexes = {"^[a-zA-Z][a-zA-Z0-9_]*$"}, - helpText = "Name of the table inside the Cassandra database", + helpText = "Name of the table inside the Cassandra database.", example = "my_table") @SuppressWarnings("unused") String getAstraTable(); @@ -94,7 +92,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 5, optional = true, description = "Cassandra CQL Query", - helpText = "Query to filter rows instead of reading the whole table") + helpText = "Optional: Query to filter rows instead of reading the whole table.") @SuppressWarnings("unused") String getAstraQuery(); @@ -106,7 +104,7 @@ interface AstraDbSourceOptions extends PipelineOptions { optional = true, description = "Astra Database Region", helpText = - "If not provided, a default is chosen, which is useful with multi-region databases") + "Optional: If not provided, a default is chosen, which is useful with multi-region databases.") @SuppressWarnings("unused") String getAstraDatabaseRegion(); @@ -117,7 +115,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 7, optional = true, description = "Token range count", - helpText = "The minimal number of splits to distribute the query") + helpText = "Optional: The minimal number of splits to distribute the query.") Integer getMinTokenRangesCount(); @SuppressWarnings("unused") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java index b80b68f479..fe9f4217c7 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java @@ -39,8 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner Project ID", helpText = - "Project to read change streams from. The default for this parameter is the project " - + "where the Dataflow pipeline is running.") + "(Optional) Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.") @Default.String("") String getSpannerProjectId(); @@ -49,7 +48,7 @@ public interface SpannerChangeStreamsToGcsOptions @TemplateParameter.Text( order = 2, description = "Spanner instance ID", - helpText = "The Spanner instance to read change streams from.") + helpText = "The Spanner instance ID to read change streams data from.") @Validation.Required String getSpannerInstanceId(); @@ -58,7 +57,7 @@ public interface SpannerChangeStreamsToGcsOptions @TemplateParameter.Text( order = 3, description = "Spanner database", - helpText = "The Spanner database to read change streams from.") + helpText = "The Spanner database to read change streams data from.") @Validation.Required String getSpannerDatabase(); @@ -69,10 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner database role", helpText = - "Database role user assumes while reading from the change stream. The database role" - + " should have required privileges to read from change stream. If a database role is" - + " not specified, the user should have required IAM permissions to read from the" - + " database.") + "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the SELECT privilege on the change stream and the EXECUTE privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).") String getSpannerDatabaseRole(); void setSpannerDatabaseRole(String spannerDatabaseRole); @@ -80,7 +76,7 @@ public interface SpannerChangeStreamsToGcsOptions @TemplateParameter.Text( order = 5, description = "Spanner metadata instance ID", - helpText = "The Spanner instance to use for the change streams connector metadata table.") + helpText = "The Spanner instance ID to use for the change streams connector metadata table.") @Validation.Required String getSpannerMetadataInstanceId(); @@ -90,9 +86,7 @@ public interface SpannerChangeStreamsToGcsOptions order = 6, description = "Spanner metadata database", helpText = - "The Spanner database to use for the change streams connector metadata table. For change" - + " streams tracking all tables in a database, we recommend putting the metadata" - + " table in a separate database.") + "The Spanner database to use for the change streams connector metadata table.") @Validation.Required String getSpannerMetadataDatabase(); @@ -103,10 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Cloud Spanner metadata table name", helpText = - "The Cloud Spanner change streams connector metadata table name to use. If not provided," - + " a Cloud Spanner change streams connector metadata table will automatically be" - + " created during the pipeline flow. This parameter must be provided when updating" - + " an existing pipeline and should not be provided otherwise.") + " (Optional) The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.") String getSpannerMetadataTableName(); void setSpannerMetadataTableName(String value); @@ -125,9 +116,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "The timestamp to read change streams from", helpText = - "The starting DateTime, inclusive, to use for reading change streams" - + " (https://tools.ietf.org/html/rfc3339). For example, 2022-05-05T07:59:59Z." - + " Defaults to the timestamp when the pipeline starts.") + "(Optional) The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.") @Default.String("") String getStartTimestamp(); @@ -138,9 +127,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "The timestamp to read change streams to", helpText = - "The ending DateTime, inclusive, to use for reading change streams" - + " (https://tools.ietf.org/html/rfc3339). Ex-2022-05-05T07:59:59Z. Defaults to an" - + " infinite time in the future.") + "(Optional) The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.") @Default.String("") String getEndTimestamp(); @@ -163,8 +150,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Output file format", helpText = - "The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is" - + " AVRO.") + " (Optional) The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.") @Default.Enum("AVRO") FileFormat getOutputFileFormat(); @@ -175,9 +161,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Window duration", helpText = - "The window duration/size in which data will be written to Cloud Storage. Allowed formats" - + " are: Ns (for seconds, example: 5s), Nm (for minutes, example: 12m), Nh (for" - + " hours, example: 2h).", + "\t(Optional) The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).", example = "5m") @Default.String("5m") String getWindowDuration(); @@ -194,8 +178,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Priority for Spanner RPC invocations", helpText = - "The request priority for Cloud Spanner calls. The value must be one of:" - + " [HIGH,MEDIUM,LOW].") + "(Optional) The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)") @Default.Enum("HIGH") RpcPriority getRpcPriority(); diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java index 6304067624..cade509590 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java @@ -183,8 +183,7 @@ public interface Options order = 1, description = "BigQuery output table", helpText = - "BigQuery table location to write the output to. The table’s schema must match the " - + "input JSON objects.") + "The BigQuery output table location, in the format of :.") String getOutputTableSpec(); void setOutputTableSpec(String value); @@ -203,8 +202,7 @@ public interface Options optional = true, description = "Pub/Sub input subscription", helpText = - "Pub/Sub subscription to read the input from, in the format of" - + " 'projects/your-project-id/subscriptions/your-subscription-name'") + "The Pub/Sub input subscription to read from, in the format of projects//subscriptions/.") String getInputSubscription(); void setInputSubscription(String value); @@ -215,9 +213,7 @@ public interface Options description = "Table for messages failed to reach the output table (i.e., Deadletter table)", helpText = - "BigQuery table for failed messages. Messages failed to reach the output table for different reasons " - + "(e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will" - + " be created during pipeline execution. If not specified, \"outputTableSpec_error_records\" is used instead.") + "The BigQuery table for messages that failed to reach the output table, in the format of :.. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.") String getOutputDeadletterTable(); void setOutputDeadletterTable(String value); diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index 99b058f4c7..2c4c0e05e5 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -55,7 +55,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection URL string.", helpText = - "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. Can be passed in as a string that's Base64-encoded and then encrypted with a Cloud KMS key. Note the difference between an Oracle non-RAC database connection string (`jdbc:oracle:thin:@some-host::`) and an Oracle RAC database connection string (`jdbc:oracle:thin:@//some-host[:]/`).", + "The JDBC connection URL string. For example, jdbc:mysql://some-host:3306/sampledb. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (jdbc:oracle:thin:@some-host::) and an Oracle RAC database connection string (jdbc:oracle:thin:@//some-host[:]/).", example = "jdbc:mysql://some-host:3306/sampledb") String getConnectionURL(); @@ -68,8 +68,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection property string.", helpText = - "Properties string to use for the JDBC connection. Format of the string must be" - + " [propertyName=property;]*.", + "Optional: The properties string to use for the JDBC connection. Use the string format [propertyName=property;]*.", example = "unicode=true;characterEncoding=UTF-8") String getConnectionProperties(); @@ -82,8 +81,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection username.", helpText = - "The username to be used for the JDBC connection. Can be passed in as a Base64-encoded string encrypted " - + "with a Cloud KMS key.") + "Optional: The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") String getUsername(); void setUsername(String username); @@ -94,8 +92,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection password.", helpText = - "The password to be used for the JDBC connection. Can be passed in as a Base64-encoded string encrypted " - + "with a Cloud KMS key.") + "Optional: The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") String getPassword(); void setPassword(String password); @@ -107,8 +104,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC source SQL query", helpText = - "The query to be run on the source to extract the data. Either query OR both table AND " - + "PartitionColumn must be specified.", + "Optional: The query to run on the source to extract the data.", example = "select * from sampledb.sample_table") String getQuery(); @@ -131,7 +127,7 @@ public interface JdbcToBigQueryOptions optional = false, groupName = "Target", description = "Temporary directory for BigQuery loading process", - helpText = "The temporary directory for the BigQuery loading process", + helpText = "The temporary directory for the BigQuery loading process.", example = "gs://your-bucket/your-files/temp_dir") String getBigQueryLoadingTemporaryDirectory(); @@ -143,8 +139,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Google Cloud KMS key", helpText = - "Cloud KMS Encryption Key to decrypt the username, password, and connection string. If Cloud KMS key is " - + "passed in, the username, password, and connection string must all be passed in encrypted.", + "Optional: The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key") String getKMSEncryptionKey(); @@ -156,8 +151,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Whether to use column alias to map the rows.", helpText = - "If enabled (set to true) the pipeline will consider column alias (\"AS\") instead of the" - + " column name to map the rows to BigQuery. Defaults to false.") + "Optional: If enabled (set to true), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to false.") @Default.Boolean(false) Boolean getUseColumnAlias(); @@ -169,8 +163,7 @@ public interface JdbcToBigQueryOptions groupName = "Target", description = "Whether to truncate data before writing", helpText = - "If enabled (set to true) the pipeline will truncate before loading data into BigQuery." - + " Defaults to false, which is used to only append data.") + "Optional: If enabled (set to true), the pipeline truncates before loading data into BigQuery. Defaults to false, which causes the pipeline to append data.") @Default.Boolean(false) Boolean getIsTruncate(); @@ -182,10 +175,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The name of a column of numeric type that will be used for partitioning.", helpText = - "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel " - + "by executing multiple instances of the query on the same table (subquery) using ranges. " - + "Currently, only Long partition columns are supported. Either query OR both table AND " - + "PartitionColumn must be specified.") + " Optional: If this parameter is provided (along with table), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports Long partition columns.") String getPartitionColumn(); void setPartitionColumn(String partitionColumn); @@ -196,8 +186,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Name of the table in the external database.", helpText = - "Table to read from using partitions. Either query OR both table AND PartitionColumn must be specified. " - + "This parameter also accepts a subquery in parentheses.", + "Optional: The table to read from when using partitions. This parameter also accepts a subquery in parentheses.", example = "(select id, name from Person) as subq") String getTable(); @@ -209,7 +198,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The number of partitions.", helpText = - "The number of partitions. This, along with the lower and upper bound, form partitions strides for generated WHERE clause expressions used to split the partition column evenly. When the input is less than 1, the number is set to 1.") + "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated WHERE clause expressions that are used to split the partition column evenly. When the input is less than 1, the number is set to 1.") Integer getNumPartitions(); void setNumPartitions(Integer numPartitions); @@ -220,7 +209,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Lower bound of partition column.", helpText = - "Lower bound used in the partition scheme. If not provided, it is automatically inferred by Beam (for the supported types)") + "Optional: The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") Long getLowerBound(); void setLowerBound(Long lowerBound); @@ -231,7 +220,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Upper bound of partition column", helpText = - "Upper bound used in partition scheme. If not provided, it is automatically inferred by Beam (for the supported types)") + "Optional: The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") Long getUpperBound(); void setUpperBound(Long lowerBound); diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java index 0dee01a74a..c131fa5ced 100644 --- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java +++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java @@ -141,8 +141,8 @@ public interface JmsToPubsubOptions extends PipelineOptions { optional = true, regexes = {"[,\\/:a-zA-Z0-9._-]+"}, description = "JMS Host IP", - helpText = "Server IP for JMS Host", - example = "host:5672") + helpText = "The JMS (ActiveMQ) Server IP.", + example = "tcp://10.0.0.1:61616") @Validation.Required String getJmsServer(); @@ -153,7 +153,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { optional = false, regexes = {"[a-zA-Z0-9._-]+"}, description = "JMS Queue/Topic Name to read the input from", - helpText = "JMS Queue/Topic Name to read the input from.", + helpText = "The name of the JMS topic/queue that data is read from.", example = "queue") @Validation.Required String getInputName(); @@ -165,7 +165,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { optional = false, regexes = {"[a-zA-Z0-9._-]+"}, description = "JMS Destination Type to read the input from", - helpText = "JMS Destination Type to read the input from.", + helpText = "The JMS destination type to read data from , can be queue or topic.", example = "queue") @Validation.Required String getInputType(); @@ -176,8 +176,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { order = 4, description = "Output Pub/Sub topic", helpText = - "The name of the topic to which data should published, in the format of" - + " 'projects/your-project-id/topics/your-topic-name'", + "The name of the topic to which data should published, in the format projects/your-project-id/topics/your-topic-name.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required String getOutputTopic(); @@ -187,7 +186,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { @TemplateParameter.Text( order = 5, description = "JMS Username", - helpText = "JMS username for authentication with JMS server", + helpText = "\t(Optional) The username to use for authentication on the JMS server.", example = "sampleusername") String getUsername(); @@ -196,7 +195,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { @TemplateParameter.Text( order = 6, description = "JMS Password", - helpText = "Password for username provided for authentication with JMS server", + helpText = "(Optional) The password associated with the provided username.", example = "samplepassword") String getPassword(); From 0fda2012a3368fdd42c9dc6445e9af88887b39e0 Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Tue, 30 Apr 2024 17:13:13 +0530 Subject: [PATCH 02/70] Modified to add backquotes to highlight words for all templates --- .../cloud/teleport/bigtable/BigtableToAvro.java | 2 +- .../cloud/teleport/spanner/ExportPipeline.java | 10 +++++----- .../cloud/teleport/spanner/ImportPipeline.java | 12 ++++++------ .../cloud/teleport/templates/PubSubToBigQuery.java | 10 +++------- .../cloud/teleport/templates/PubsubToPubsub.java | 8 ++++---- .../cloud/teleport/templates/TextToPubsub.java | 2 +- .../v2/astradb/options/AstraDbToBigQueryOptions.java | 2 +- .../v2/options/SpannerChangeStreamsToGcsOptions.java | 2 +- .../teleport/v2/templates/PubSubToBigQuery.java | 6 +++--- .../teleport/v2/options/JdbcToBigQueryOptions.java | 12 ++++++------ .../cloud/teleport/v2/templates/JmsToPubsub.java | 2 +- 11 files changed, 32 insertions(+), 36 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java index f9ede3e24b..4cb5d2b2ad 100644 --- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java +++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java @@ -118,7 +118,7 @@ public interface Options extends PipelineOptions { @TemplateParameter.Text( order = 5, description = "Avro file prefix", - helpText = "The prefix of the Avro filename. For example, output-.") + helpText = "The prefix of the Avro filename. For example, `output-`.") @Default.String("part") ValueProvider getFilenamePrefix(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 3f68b73767..7165873674 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export Timestamps as Timestamp-micros type", helpText = - "(Optional) If true, timestamps are exported as a long type with timestamp-micros logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.") + "(Optional) If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.") @Default.Boolean(false) ValueProvider getShouldExportTimestampAsLogicalType(); @@ -179,7 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions { regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"}, description = "Cloud Spanner table name(s).", helpText = - "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the shouldExportRelatedTables parameter to true.") + "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.") @Default.String(value = "") ValueProvider getTableNames(); @@ -190,7 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export necessary Related Spanner tables.", helpText = - "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the tableNames parameter.") + "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.") @Default.Boolean(false) ValueProvider getShouldExportRelatedTables(); @@ -206,7 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "(Optional) The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.") + "(Optional) The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); @@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Use independent compute resource (Spanner DataBoost).", helpText = - "(Optional) Set to true to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the spanner.databases.useDataBoost IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).") + "(Optional) Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).") @Default.Boolean(false) ValueProvider getDataBoostEnabled(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java index 8470c5c17a..b5556d1d3b 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java @@ -106,7 +106,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Indexes", helpText = - "Optional: If true, the pipeline waits for indexes to be created. If false, the job might complete while indexes are still being created in the background. Default: false.") + "Optional: If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.") @Default.Boolean(false) ValueProvider getWaitForIndexes(); @@ -117,7 +117,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Foreign Keys", helpText = - "Optional: If true, the pipeline waits for foreign keys to be created. If false, the job might complete while foreign keys are still being created in the background. Default: false.") + "Optional: If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.") @Default.Boolean(false) ValueProvider getWaitForForeignKeys(); @@ -128,7 +128,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Change Streams", helpText = - "Optional: If true, the pipeline waits for change streams to be created. If false, the job might complete while change streams are still being created in the background. Default: true.") + "Optional: If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.") @Default.Boolean(true) ValueProvider getWaitForChangeStreams(); @@ -139,7 +139,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Sequences", helpText = - "By default the import pipeline is blocked on sequence creation. If false, it may" + "By default the import pipeline is blocked on sequence creation. If `false`, it may" + " complete with sequences still being created in the background.") @Default.Boolean(true) ValueProvider getWaitForSequences(); @@ -151,7 +151,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Create Indexes early", helpText = - "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set earlyIndexCreateFlag to false. Default: true.") + "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.") @Default.Boolean(true) ValueProvider getEarlyIndexCreateFlag(); @@ -194,7 +194,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "Optional: The request priority for Spanner calls. Possible values are HIGH, MEDIUM, LOW. The default value is MEDIUM.") + "Optional: The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java index a4b136500c..c03c9e7725 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java @@ -166,8 +166,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio order = 1, description = "BigQuery output table", helpText = - "BigQuery table location to write the output to. The table’s schema must match the " - + "input JSON objects.") + "The BigQuery output table location, in the format of `:.`") ValueProvider getOutputTableSpec(); void setOutputTableSpec(ValueProvider value); @@ -184,8 +183,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio order = 3, description = "Pub/Sub input subscription", helpText = - "Pub/Sub subscription to read the input from, in the format of" - + " 'projects/your-project-id/subscriptions/your-subscription-name'") + "The Pub/Sub input subscription to read from, in the format of `projects//subscriptions/`.") ValueProvider getInputSubscription(); void setInputSubscription(ValueProvider value); @@ -205,9 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio description = "Table for messages failed to reach the output table (i.e., Deadletter table)", helpText = - "BigQuery table for failed messages. Messages failed to reach the output table for different reasons " - + "(e.g., mismatched schema, malformed json) are written to this table. If it doesn't exist, it will" - + " be created during pipeline execution. If not specified, \"outputTableSpec_error_records\" is used instead.") + "The BigQuery table for messages that failed to reach the output table, in the format of `:.`. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.") ValueProvider getOutputDeadletterTable(); void setOutputDeadletterTable(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java index 770cf5c260..cf77cc1825 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java @@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 1, description = "Pub/Sub input subscription", helpText = - "Pub/Sub subscription to read the input from. For example, projects//subscriptions/.", + "Pub/Sub subscription to read the input from. For example, `projects//subscriptions/`.", example = "projects/your-project-id/subscriptions/your-subscription-name") @Validation.Required ValueProvider getInputSubscription(); @@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "Cloud Pub/Sub topic to write the output to. For example, projects//topics/.", + "Cloud Pub/Sub topic to write the output to. For example, `projects//topics/`.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required ValueProvider getOutputTopic(); @@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter key", helpText = - "(Optional) Filter events based on an attribute key. No filters are applied if filterKey is not specified.") + "(Optional) Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.") ValueProvider getFilterKey(); void setFilterKey(ValueProvider filterKey); @@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter value", helpText = - "(Optional) Filter attribute value to use in case a filterKey is provided. A null filterValue is used by default.") + "(Optional) Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.") ValueProvider getFilterValue(); void setFilterValue(ValueProvider filterValue); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java index b1cd1b8e02..ac07eaa34d 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java @@ -74,7 +74,7 @@ public interface Options extends PipelineOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "The Pub/Sub input topic to write to. The name must be in the format of projects//topics/.", + "The Pub/Sub input topic to write to. The name must be in the format of `projects//topics/`.", example = "projects/your-project-id/topics/your-topic-name") @Required ValueProvider getOutputTopic(); diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java index 80597dbbbf..5e1d5e51ed 100644 --- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java +++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java @@ -33,7 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions description = "BigQuery output table", optional = true, helpText = - "Optional: The BigQuery table location to write the output to. The table should be in the format :.. The table's schema must match the input objects.") + "Optional: The BigQuery table location to write the output to. The table should be in the format `:.`. The table's schema must match the input objects.") String getOutputTableSpec(); @SuppressWarnings("unused") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java index fe9f4217c7..c8fc5a33ac 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java @@ -68,7 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner database role", helpText = - "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the SELECT privilege on the change stream and the EXECUTE privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).") + "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).") String getSpannerDatabaseRole(); void setSpannerDatabaseRole(String spannerDatabaseRole); diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java index cade509590..c7149bef6c 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java @@ -183,7 +183,7 @@ public interface Options order = 1, description = "BigQuery output table", helpText = - "The BigQuery output table location, in the format of :.") + "The BigQuery output table location, in the format of `:.`") String getOutputTableSpec(); void setOutputTableSpec(String value); @@ -202,7 +202,7 @@ public interface Options optional = true, description = "Pub/Sub input subscription", helpText = - "The Pub/Sub input subscription to read from, in the format of projects//subscriptions/.") + "The Pub/Sub input subscription to read from, in the format of `projects//subscriptions/`.") String getInputSubscription(); void setInputSubscription(String value); @@ -213,7 +213,7 @@ public interface Options description = "Table for messages failed to reach the output table (i.e., Deadletter table)", helpText = - "The BigQuery table for messages that failed to reach the output table, in the format of :.. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.") + "The BigQuery table for messages that failed to reach the output table, in the format of` :.`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.") String getOutputDeadletterTable(); void setOutputDeadletterTable(String value); diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index 2c4c0e05e5..f7286b1b95 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -55,7 +55,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection URL string.", helpText = - "The JDBC connection URL string. For example, jdbc:mysql://some-host:3306/sampledb. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (jdbc:oracle:thin:@some-host::) and an Oracle RAC database connection string (jdbc:oracle:thin:@//some-host[:]/).", + "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (`jdbc:oracle:thin:@some-host::`) and an Oracle RAC database connection string (`jdbc:oracle:thin:@//some-host[:]/`).", example = "jdbc:mysql://some-host:3306/sampledb") String getConnectionURL(); @@ -151,7 +151,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Whether to use column alias to map the rows.", helpText = - "Optional: If enabled (set to true), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to false.") + "Optional: If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.") @Default.Boolean(false) Boolean getUseColumnAlias(); @@ -163,7 +163,7 @@ public interface JdbcToBigQueryOptions groupName = "Target", description = "Whether to truncate data before writing", helpText = - "Optional: If enabled (set to true), the pipeline truncates before loading data into BigQuery. Defaults to false, which causes the pipeline to append data.") + "Optional: If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.") @Default.Boolean(false) Boolean getIsTruncate(); @@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The name of a column of numeric type that will be used for partitioning.", helpText = - " Optional: If this parameter is provided (along with table), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports Long partition columns.") + " Optional: If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") String getPartitionColumn(); void setPartitionColumn(String partitionColumn); @@ -198,7 +198,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The number of partitions.", helpText = - "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated WHERE clause expressions that are used to split the partition column evenly. When the input is less than 1, the number is set to 1.") + "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.") Integer getNumPartitions(); void setNumPartitions(Integer numPartitions); @@ -247,7 +247,7 @@ public interface JdbcToBigQueryOptions }, optional = true, description = "Create Disposition to use for BigQuery", - helpText = "BigQuery CreateDisposition. For example, CREATE_IF_NEEDED, CREATE_NEVER.") + helpText = "BigQuery CreateDisposition. For example, `CREATE_IF_NEEDED`, `CREATE_NEVER`.") @Default.String("CREATE_NEVER") String getCreateDisposition(); diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java index c131fa5ced..ee3a87c7c3 100644 --- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java +++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java @@ -176,7 +176,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { order = 4, description = "Output Pub/Sub topic", helpText = - "The name of the topic to which data should published, in the format projects/your-project-id/topics/your-topic-name.", + "The name of the topic to which data should published, in the format `projects/your-project-id/topics/your-topic-name`.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required String getOutputTopic(); From 1a009018b016501b16c2d3038423d07703227369 Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Wed, 1 May 2024 10:48:57 +0530 Subject: [PATCH 03/70] Modified links and Optional statements --- .../teleport/spanner/ExportPipeline.java | 16 ++++++------- .../teleport/spanner/ImportPipeline.java | 14 +++++------ .../teleport/templates/PubsubToPubsub.java | 8 +++---- .../options/AstraDbToBigQueryOptions.java | 8 +++---- .../SpannerChangeStreamsToGcsOptions.java | 16 ++++++------- .../v2/templates/PubSubToBigQuery.java | 2 +- .../v2/options/JdbcToBigQueryOptions.java | 24 +++++++++---------- .../teleport/v2/templates/JmsToPubsub.java | 4 ++-- 8 files changed, 46 insertions(+), 46 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 7165873674..6abfe1f306 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -108,7 +108,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Cloud Storage temp directory for storing Avro files", helpText = - "(Optional) The Cloud Storage path where temporary Avro files are written.") + "The Cloud Storage path where temporary Avro files are written.") ValueProvider getAvroTempDirectory(); void setAvroTempDirectory(ValueProvider value); @@ -146,7 +146,7 @@ public interface ExportPipelineOptions extends PipelineOptions { }, description = "Snapshot time", helpText = - "(Optional) The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.", + "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.", example = "1990-12-31T23:59:60Z") @Default.String(value = "") ValueProvider getSnapshotTime(); @@ -157,7 +157,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 8, optional = true, description = "Cloud Spanner Project Id", - helpText = "(Optional) The Google Cloud Project ID of the Spanner database that you want to read data from.") + helpText = "The Google Cloud Project ID of the Spanner database that you want to read data from.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); @@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export Timestamps as Timestamp-micros type", helpText = - "(Optional) If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.") + "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.") @Default.Boolean(false) ValueProvider getShouldExportTimestampAsLogicalType(); @@ -179,7 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions { regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"}, description = "Cloud Spanner table name(s).", helpText = - "(Optional) A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.") + "A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.") @Default.String(value = "") ValueProvider getTableNames(); @@ -190,7 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export necessary Related Spanner tables.", helpText = - "\t(Optional) Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.") + "Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.") @Default.Boolean(false) ValueProvider getShouldExportRelatedTables(); @@ -206,7 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "(Optional) The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") + "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); @@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Use independent compute resource (Spanner DataBoost).", helpText = - "(Optional) Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see [Data Boost overview](https://cloud.google.com/spanner/docs/databoost/databoost-overview).") + "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview(https://cloud.google.com/spanner/docs/databoost/databoost-overview).") @Default.Boolean(false) ValueProvider getDataBoostEnabled(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java index b5556d1d3b..a64b0b471b 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java @@ -106,7 +106,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Indexes", helpText = - "Optional: If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.") + "If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.") @Default.Boolean(false) ValueProvider getWaitForIndexes(); @@ -117,7 +117,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Foreign Keys", helpText = - "Optional: If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.") + "If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.") @Default.Boolean(false) ValueProvider getWaitForForeignKeys(); @@ -128,7 +128,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Change Streams", helpText = - "Optional: If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.") + "If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.") @Default.Boolean(true) ValueProvider getWaitForChangeStreams(); @@ -151,7 +151,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Create Indexes early", helpText = - "Optional: Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.") + "Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.") @Default.Boolean(true) ValueProvider getEarlyIndexCreateFlag(); @@ -166,7 +166,7 @@ public interface Options extends PipelineOptions { order = 9, optional = true, description = "Cloud Spanner Project Id", - helpText = "\tOptional: The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.") + helpText = "The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); @@ -178,7 +178,7 @@ public interface Options extends PipelineOptions { optional = true, regexes = {"[0-9]+"}, description = "DDL Creation timeout in minutes", - helpText = "Optional: The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.") + helpText = "The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.") @Default.Integer(30) ValueProvider getDdlCreationTimeoutInMinutes(); @@ -194,7 +194,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "Optional: The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") + "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java index cf77cc1825..a7bc99181b 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java @@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 1, description = "Pub/Sub input subscription", helpText = - "Pub/Sub subscription to read the input from. For example, `projects//subscriptions/`.", + "Pub/Sub subscription to read the input from.", example = "projects/your-project-id/subscriptions/your-subscription-name") @Validation.Required ValueProvider getInputSubscription(); @@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "Cloud Pub/Sub topic to write the output to. For example, `projects//topics/`.", + "Cloud Pub/Sub topic to write the output to..", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required ValueProvider getOutputTopic(); @@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter key", helpText = - "(Optional) Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.") + "Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.") ValueProvider getFilterKey(); void setFilterKey(ValueProvider filterKey); @@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter value", helpText = - "(Optional) Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.") + "Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.") ValueProvider getFilterValue(); void setFilterValue(ValueProvider filterValue); diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java index 5e1d5e51ed..040fea7c9f 100644 --- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java +++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java @@ -33,7 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions description = "BigQuery output table", optional = true, helpText = - "Optional: The BigQuery table location to write the output to. The table should be in the format `:.`. The table's schema must match the input objects.") + "The BigQuery table location to write the output to. The table should be in the format `:.`. The table's schema must match the input objects.") String getOutputTableSpec(); @SuppressWarnings("unused") @@ -92,7 +92,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 5, optional = true, description = "Cassandra CQL Query", - helpText = "Optional: Query to filter rows instead of reading the whole table.") + helpText = "Query to filter rows instead of reading the whole table.") @SuppressWarnings("unused") String getAstraQuery(); @@ -104,7 +104,7 @@ interface AstraDbSourceOptions extends PipelineOptions { optional = true, description = "Astra Database Region", helpText = - "Optional: If not provided, a default is chosen, which is useful with multi-region databases.") + "If not provided, a default is chosen, which is useful with multi-region databases.") @SuppressWarnings("unused") String getAstraDatabaseRegion(); @@ -115,7 +115,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 7, optional = true, description = "Token range count", - helpText = "Optional: The minimal number of splits to distribute the query.") + helpText = "The minimal number of splits to distribute the query.") Integer getMinTokenRangesCount(); @SuppressWarnings("unused") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java index c8fc5a33ac..ecd68de243 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java @@ -39,7 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner Project ID", helpText = - "(Optional) Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.") + "Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.") @Default.String("") String getSpannerProjectId(); @@ -68,7 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner database role", helpText = - "(Optional) The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).") + "The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).") String getSpannerDatabaseRole(); void setSpannerDatabaseRole(String spannerDatabaseRole); @@ -97,7 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Cloud Spanner metadata table name", helpText = - " (Optional) The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.") + "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.") String getSpannerMetadataTableName(); void setSpannerMetadataTableName(String value); @@ -116,7 +116,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "The timestamp to read change streams from", helpText = - "(Optional) The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.") + "The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.") @Default.String("") String getStartTimestamp(); @@ -127,7 +127,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "The timestamp to read change streams to", helpText = - "(Optional) The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.") + "The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.") @Default.String("") String getEndTimestamp(); @@ -150,7 +150,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Output file format", helpText = - " (Optional) The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.") + "The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.") @Default.Enum("AVRO") FileFormat getOutputFileFormat(); @@ -161,7 +161,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Window duration", helpText = - "\t(Optional) The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).", + "The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).", example = "5m") @Default.String("5m") String getWindowDuration(); @@ -178,7 +178,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Priority for Spanner RPC invocations", helpText = - "(Optional) The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)") + "The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)") @Default.Enum("HIGH") RpcPriority getRpcPriority(); diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java index c7149bef6c..aa8f81ece9 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java @@ -223,7 +223,7 @@ public interface Options optional = true, description = "Use at at-least-once semantics in BigQuery Storage Write API", helpText = - "This parameter takes effect only if \"Use BigQuery Storage Write API\" is enabled. If" + "This parameter takes effect only if `Use BigQuery Storage Write API` is enabled. If" + " enabled the at-least-once semantics will be used for Storage Write API, otherwise" + " exactly-once semantics will be used.", hiddenUi = true) diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index f7286b1b95..4e0f0419a6 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -68,7 +68,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection property string.", helpText = - "Optional: The properties string to use for the JDBC connection. Use the string format [propertyName=property;]*.", + "The properties string to use for the JDBC connection. Use the string format `[propertyName=property;]*.`", example = "unicode=true;characterEncoding=UTF-8") String getConnectionProperties(); @@ -81,7 +81,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection username.", helpText = - "Optional: The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") + "The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") String getUsername(); void setUsername(String username); @@ -92,7 +92,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection password.", helpText = - "Optional: The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") + "The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") String getPassword(); void setPassword(String password); @@ -104,7 +104,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC source SQL query", helpText = - "Optional: The query to run on the source to extract the data.", + "The query to run on the source to extract the data.", example = "select * from sampledb.sample_table") String getQuery(); @@ -139,7 +139,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Google Cloud KMS key", helpText = - "Optional: The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", + "The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key") String getKMSEncryptionKey(); @@ -151,7 +151,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Whether to use column alias to map the rows.", helpText = - "Optional: If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.") + "If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.") @Default.Boolean(false) Boolean getUseColumnAlias(); @@ -163,7 +163,7 @@ public interface JdbcToBigQueryOptions groupName = "Target", description = "Whether to truncate data before writing", helpText = - "Optional: If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.") + "If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.") @Default.Boolean(false) Boolean getIsTruncate(); @@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The name of a column of numeric type that will be used for partitioning.", helpText = - " Optional: If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") + "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") String getPartitionColumn(); void setPartitionColumn(String partitionColumn); @@ -186,7 +186,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Name of the table in the external database.", helpText = - "Optional: The table to read from when using partitions. This parameter also accepts a subquery in parentheses.", + "The table to read from when using partitions. This parameter also accepts a subquery in parentheses.", example = "(select id, name from Person) as subq") String getTable(); @@ -198,7 +198,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The number of partitions.", helpText = - "Optional: The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.") + "The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.") Integer getNumPartitions(); void setNumPartitions(Integer numPartitions); @@ -209,7 +209,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Lower bound of partition column.", helpText = - "Optional: The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") + "The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") Long getLowerBound(); void setLowerBound(Long lowerBound); @@ -220,7 +220,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Upper bound of partition column", helpText = - "Optional: The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") + "The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") Long getUpperBound(); void setUpperBound(Long lowerBound); diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java index ee3a87c7c3..c258588dcd 100644 --- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java +++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java @@ -186,7 +186,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { @TemplateParameter.Text( order = 5, description = "JMS Username", - helpText = "\t(Optional) The username to use for authentication on the JMS server.", + helpText = "The username to use for authentication on the JMS server.", example = "sampleusername") String getUsername(); @@ -195,7 +195,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { @TemplateParameter.Text( order = 6, description = "JMS Password", - helpText = "(Optional) The password associated with the provided username.", + helpText = "The password associated with the provided username.", example = "samplepassword") String getPassword(); From d8ba7dc55071a2bd136e6f176b5c9d8d83175199 Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Wed, 1 May 2024 12:59:03 +0530 Subject: [PATCH 04/70] Fixed typos --- .../java/com/google/cloud/teleport/spanner/ExportPipeline.java | 2 +- .../com/google/cloud/teleport/templates/PubSubToBigQuery.java | 2 +- .../com/google/cloud/teleport/templates/PubsubToPubsub.java | 2 +- .../teleport/v2/options/SpannerChangeStreamsToGcsOptions.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 6abfe1f306..4bde7eef7a 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Use independent compute resource (Spanner DataBoost).", helpText = - "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview(https://cloud.google.com/spanner/docs/databoost/databoost-overview).") + "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).") @Default.Boolean(false) ValueProvider getDataBoostEnabled(); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java index c03c9e7725..ec763ab5f4 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java @@ -203,7 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio description = "Table for messages failed to reach the output table (i.e., Deadletter table)", helpText = - "The BigQuery table for messages that failed to reach the output table, in the format of `:.`. If it doesn't exist, it is created during pipeline execution. If not specified, OUTPUT_TABLE_SPEC_error_records is used instead.") + "The BigQuery table for messages that failed to reach the output table, in the format of `:.`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.") ValueProvider getOutputDeadletterTable(); void setOutputDeadletterTable(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java index a7bc99181b..baff969afc 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java @@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "Cloud Pub/Sub topic to write the output to..", + "Cloud Pub/Sub topic to write the output to.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required ValueProvider getOutputTopic(); diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java index ecd68de243..a819e51c48 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java @@ -68,7 +68,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner database role", helpText = - "The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see [Fine-grained access control for change streams](https://cloud.google.com/spanner/docs/fgac-change-streams).") + "The Spanner database role to use when running the template. This parameter is required only when the IAM principal who is running the template is a fine-grained access control user. The database role must have the `SELECT` privilege on the change stream and the `EXECUTE` privilege on the change stream's read function. For more information, see Fine-grained access control for change streams (https://cloud.google.com/spanner/docs/fgac-change-streams).") String getSpannerDatabaseRole(); void setSpannerDatabaseRole(String spannerDatabaseRole); From 199fa612676178379366597662da7aacd66f4f23 Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Mon, 6 May 2024 10:17:34 +0530 Subject: [PATCH 05/70] Apply suggestions from code review Changes made as recommended by technical writer Rebecca Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com> --- .../cloud/teleport/bigtable/BigtableToAvro.java | 2 +- .../cloud/teleport/spanner/ExportPipeline.java | 12 ++++++------ .../cloud/teleport/spanner/ImportPipeline.java | 16 ++++++++-------- .../teleport/templates/PubSubToBigQuery.java | 6 +++--- .../cloud/teleport/templates/PubsubToPubsub.java | 8 ++++---- .../cloud/teleport/templates/TextToPubsub.java | 2 +- .../options/AstraDbToBigQueryOptions.java | 14 +++++++------- .../SpannerChangeStreamsToGcsOptions.java | 14 +++++++------- .../teleport/v2/templates/PubSubToBigQuery.java | 10 +++++----- .../v2/options/JdbcToBigQueryOptions.java | 12 ++++++------ .../cloud/teleport/v2/templates/JmsToPubsub.java | 6 +++--- 11 files changed, 51 insertions(+), 51 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java index 4cb5d2b2ad..3b55dd29fe 100644 --- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java +++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java @@ -78,7 +78,7 @@ public interface Options extends PipelineOptions { order = 1, description = "Project ID", helpText = - "The ID of the Google Cloud project of the Bigtable instance that you want to read data from.") + "The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.") ValueProvider getBigtableProjectId(); @SuppressWarnings("unused") diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 4bde7eef7a..26a7b55993 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -97,7 +97,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 3, description = "Cloud Storage output directory", helpText = - "The Cloud Storage path you want to export Avro files to. The export job creates a new directory under this path that contains the exported files.", + "The Cloud Storage path to export Avro files to. The export job creates a new directory under this path that contains the exported files.", example = "gs://your-bucket/your-path") ValueProvider getOutputDir(); @@ -146,7 +146,7 @@ public interface ExportPipelineOptions extends PipelineOptions { }, description = "Snapshot time", helpText = - "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past and Maximum timestamp staleness applies.", + "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.", example = "1990-12-31T23:59:60Z") @Default.String(value = "") ValueProvider getSnapshotTime(); @@ -157,7 +157,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 8, optional = true, description = "Cloud Spanner Project Id", - helpText = "The Google Cloud Project ID of the Spanner database that you want to read data from.") + helpText = "The ID of the Google Cloud project that contains the Spanner database that you want to read data from.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); @@ -179,7 +179,7 @@ public interface ExportPipelineOptions extends PipelineOptions { regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"}, description = "Cloud Spanner table name(s).", helpText = - "A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must include all of the related tables (parent tables and foreign key referenced tables) or else set the `shouldExportRelatedTables` parameter to `true`.") + "A comma-separated list of tables specifying the subset of the Spanner database to export. If you set this parameter, you must either include all of the related tables (parent tables and foreign key referenced tables) or set the `shouldExportRelatedTables` parameter to `true`.") @Default.String(value = "") ValueProvider getTableNames(); @@ -206,7 +206,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") + "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); @@ -216,7 +216,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Use independent compute resource (Spanner DataBoost).", helpText = - "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. This requires the `spanner.databases.useDataBoost` IAM permission. For more information, see Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).") + "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero impact on Spanner OLTP workflows. When set to `true`, you also need the `spanner.databases.useDataBoost` IAM permission. For more information, see the Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).") @Default.Boolean(false) ValueProvider getDataBoostEnabled(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java index a64b0b471b..2a5fc96327 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java @@ -106,7 +106,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Indexes", helpText = - "If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. Default: `false`.") + "If `true`, the pipeline waits for indexes to be created. If `false`, the job might complete while indexes are still being created in the background. The default value is `false`.") @Default.Boolean(false) ValueProvider getWaitForIndexes(); @@ -117,7 +117,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Foreign Keys", helpText = - "If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. Default: `false`.") + "If `true`, the pipeline waits for foreign keys to be created. If `false`, the job might complete while foreign keys are still being created in the background. The default value is `false`.") @Default.Boolean(false) ValueProvider getWaitForForeignKeys(); @@ -128,7 +128,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Change Streams", helpText = - "If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. Default: `true`.") + "If `true`, the pipeline waits for change streams to be created. If `false`, the job might complete while change streams are still being created in the background. The default value is `true`.") @Default.Boolean(true) ValueProvider getWaitForChangeStreams(); @@ -139,7 +139,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Wait for Sequences", helpText = - "By default the import pipeline is blocked on sequence creation. If `false`, it may" + "By default, the import pipeline is blocked on sequence creation. If `false`, the import pipeline might" + " complete with sequences still being created in the background.") @Default.Boolean(true) ValueProvider getWaitForSequences(); @@ -151,7 +151,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Create Indexes early", helpText = - "Specifies whether to enable early index creation. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, `set earlyIndexCreateFlag` to `false`. Default: `true`.") + "Specifies whether early index creation is enabled. If the template runs a large number of DDL statements, it's more efficient to create indexes before loading data. Therefore, the default behavior is to create the indexes first when the number of DDL statements exceeds a threshold. To disable this feature, set `earlyIndexCreateFlag` to `false`. The default value is `true`.") @Default.Boolean(true) ValueProvider getEarlyIndexCreateFlag(); @@ -166,7 +166,7 @@ public interface Options extends PipelineOptions { order = 9, optional = true, description = "Cloud Spanner Project Id", - helpText = "The Google Cloud project ID of the Spanner database. If not set, the default Google Cloud project is used.") + helpText = "The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); @@ -178,7 +178,7 @@ public interface Options extends PipelineOptions { optional = true, regexes = {"[0-9]+"}, description = "DDL Creation timeout in minutes", - helpText = "The timeout, in minutes, for DDL statements performed by the template. The default value is 30 minutes.") + helpText = "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.") @Default.Integer(30) ValueProvider getDdlCreationTimeoutInMinutes(); @@ -194,7 +194,7 @@ public interface Options extends PipelineOptions { optional = true, description = "Priority for Spanner RPC invocations", helpText = - "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.") + "The request priority for Spanner calls. Possible values are `HIGH`, `MEDIUM`, and `LOW`. The default value is `MEDIUM`.") ValueProvider getSpannerPriority(); void setSpannerPriority(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java index ec763ab5f4..99bb755ec5 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java @@ -166,7 +166,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio order = 1, description = "BigQuery output table", helpText = - "The BigQuery output table location, in the format of `:.`") + "The BigQuery output table location, in the format `:.`") ValueProvider getOutputTableSpec(); void setOutputTableSpec(ValueProvider value); @@ -183,7 +183,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio order = 3, description = "Pub/Sub input subscription", helpText = - "The Pub/Sub input subscription to read from, in the format of `projects//subscriptions/`.") + "The Pub/Sub input subscription to read from, in the format `projects//subscriptions/`.") ValueProvider getInputSubscription(); void setInputSubscription(ValueProvider value); @@ -203,7 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio description = "Table for messages failed to reach the output table (i.e., Deadletter table)", helpText = - "The BigQuery table for messages that failed to reach the output table, in the format of `:.`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.") + "The BigQuery table to use for messages that failed to reach the output table, in the format of `:.`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.") ValueProvider getOutputDeadletterTable(); void setOutputDeadletterTable(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java index baff969afc..658580b607 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java @@ -132,7 +132,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 1, description = "Pub/Sub input subscription", helpText = - "Pub/Sub subscription to read the input from.", + "The Pub/Sub subscription to read the input from.", example = "projects/your-project-id/subscriptions/your-subscription-name") @Validation.Required ValueProvider getInputSubscription(); @@ -143,7 +143,7 @@ public interface Options extends PipelineOptions, StreamingOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "Cloud Pub/Sub topic to write the output to.", + "The Pub/Sub topic to write the output to.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required ValueProvider getOutputTopic(); @@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter key", helpText = - "Filter events based on an attribute key. No filters are applied if `filterKey` is not specified.") + "The attribute key to use to filter events based. No filters are applied if `filterKey` is not specified.") ValueProvider getFilterKey(); void setFilterKey(ValueProvider filterKey); @@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter value", helpText = - "Filter attribute value to use in case a filterKey is provided. A null `filterValue` is used by default.") + "The attribute value to use to filter events when a `filterKey` is provided. By default, a null `filterValue` is used.") ValueProvider getFilterValue(); void setFilterValue(ValueProvider filterValue); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java index ac07eaa34d..c1e7a9d2ee 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/TextToPubsub.java @@ -74,7 +74,7 @@ public interface Options extends PipelineOptions { order = 2, description = "Output Pub/Sub topic", helpText = - "The Pub/Sub input topic to write to. The name must be in the format of `projects//topics/`.", + "The Pub/Sub input topic to write to. The name must be in the format `projects//topics/`.", example = "projects/your-project-id/topics/your-topic-name") @Required ValueProvider getOutputTopic(); diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java index 040fea7c9f..f5ee4cdbc9 100644 --- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java +++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java @@ -33,7 +33,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions description = "BigQuery output table", optional = true, helpText = - "The BigQuery table location to write the output to. The table should be in the format `:.`. The table's schema must match the input objects.") + "The BigQuery table location to write the output to. Use the format `:.`. The table's schema must match the input objects.") String getOutputTableSpec(); @SuppressWarnings("unused") @@ -45,7 +45,7 @@ interface AstraDbSourceOptions extends PipelineOptions { @TemplateParameter.Text( order = 1, description = "Astra token", - helpText = "Token value or secret resource ID.", + helpText = "The token value or secret resource ID.", example = "AstraCS:abcdefghij") @Validation.Required @SuppressWarnings("unused") @@ -57,7 +57,7 @@ interface AstraDbSourceOptions extends PipelineOptions { @TemplateParameter.Text( order = 2, description = "Database identifier", - helpText = "Database unique identifier (uuid).", + helpText = "The database unique identifier (uuid).", example = "cf7af129-d33a-498f-ad06-d97a6ee6eb7") @Validation.Required @SuppressWarnings("unused") @@ -70,7 +70,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 3, description = "Cassandra keyspace", regexes = {"^[a-zA-Z0-9][a-zA-Z0-9_]{0,47}$"}, - helpText = "Name of the Cassandra keyspace inside Astra database.") + helpText = "The name of the Cassandra keyspace inside of the Astra database.") String getAstraKeyspace(); @SuppressWarnings("unused") @@ -80,7 +80,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 4, description = "Cassandra table", regexes = {"^[a-zA-Z][a-zA-Z0-9_]*$"}, - helpText = "Name of the table inside the Cassandra database.", + helpText = "The name of the table inside of the Cassandra database.", example = "my_table") @SuppressWarnings("unused") String getAstraTable(); @@ -92,7 +92,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 5, optional = true, description = "Cassandra CQL Query", - helpText = "Query to filter rows instead of reading the whole table.") + helpText = "The query to use to filter rows instead of reading the whole table.") @SuppressWarnings("unused") String getAstraQuery(); @@ -115,7 +115,7 @@ interface AstraDbSourceOptions extends PipelineOptions { order = 7, optional = true, description = "Token range count", - helpText = "The minimal number of splits to distribute the query.") + helpText = "The minimal number of splits to use to distribute the query.") Integer getMinTokenRangesCount(); @SuppressWarnings("unused") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java index a819e51c48..e1646cfc21 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java @@ -39,7 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner Project ID", helpText = - "Project to read change streams from. This is also the project where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.") + "The ID of the Google Cloud project that contains the Spanner database to read change streams from. This project is also where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.") @Default.String("") String getSpannerProjectId(); @@ -97,7 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Cloud Spanner metadata table name", helpText = - "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table will automatically be created during the pipeline flow. This parameter must be provided when updating an existing pipeline and shouldn't be provided otherwise.") + "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table is automatically created during the pipeline flow. You must provide a value for this parameter when updating an existing pipeline. Otherwise, don't use this parameter.") String getSpannerMetadataTableName(); void setSpannerMetadataTableName(String value); @@ -116,7 +116,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "The timestamp to read change streams from", helpText = - "The starting DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, i.e. the current time.") + "The starting DateTime, inclusive, to use for reading change streams, in the format Ex-2021-10-12T07:20:50.52Z. Defaults to the timestamp when the pipeline starts, that is, the current time.") @Default.String("") String getStartTimestamp(); @@ -127,7 +127,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "The timestamp to read change streams to", helpText = - "The ending DateTime, inclusive, to use for reading change streams. Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.") + "The ending DateTime, inclusive, to use for reading change streams. For example, Ex-2021-10-12T07:20:50.52Z. Defaults to an infinite time in the future.") @Default.String("") String getEndTimestamp(); @@ -150,7 +150,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Output file format", helpText = - "The format of the output Cloud Storage file. Allowed formats are TEXT, AVRO. Default is AVRO.") + "The format of the output Cloud Storage file. Allowed formats are TEXT and AVRO. Defaults to AVRO.") @Default.Enum("AVRO") FileFormat getOutputFileFormat(); @@ -161,7 +161,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Window duration", helpText = - "The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m, with a minimum of 1s. Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).", + "The window duration is the interval in which data is written to the output directory. Configure the duration based on the pipeline's throughput. For example, a higher throughput might require smaller window sizes so that the data fits into memory. Defaults to 5m (five minutes), with a minimum of 1s (one second). Allowed formats are: [int]s (for seconds, example: 5s), [int]m (for minutes, example: 12m), [int]h (for hours, example: 2h).", example = "5m") @Default.String("5m") String getWindowDuration(); @@ -178,7 +178,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Priority for Spanner RPC invocations", helpText = - "The request priority for Spanner calls. The value must be one of:[HIGH,MEDIUM,LOW]. (Default: HIGH)") + "The request priority for Spanner calls. The value must be HIGH, MEDIUM, or LOW. Defaults to HIGH.") @Default.Enum("HIGH") RpcPriority getRpcPriority(); diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java index aa8f81ece9..44d7ae02c8 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToBigQuery.java @@ -183,7 +183,7 @@ public interface Options order = 1, description = "BigQuery output table", helpText = - "The BigQuery output table location, in the format of `:.`") + "The BigQuery output table location, in the format `:.`.") String getOutputTableSpec(); void setOutputTableSpec(String value); @@ -202,7 +202,7 @@ public interface Options optional = true, description = "Pub/Sub input subscription", helpText = - "The Pub/Sub input subscription to read from, in the format of `projects//subscriptions/`.") + "The Pub/Sub input subscription to read from, in the format `projects//subscriptions/`.") String getInputSubscription(); void setInputSubscription(String value); @@ -213,7 +213,7 @@ public interface Options description = "Table for messages failed to reach the output table (i.e., Deadletter table)", helpText = - "The BigQuery table for messages that failed to reach the output table, in the format of` :.`. If it doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used instead.") + "The BigQuery table to use for messages that failed to reach the output table, in the format `:.`. If the table doesn't exist, it's created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.") String getOutputDeadletterTable(); void setOutputDeadletterTable(String value); @@ -224,8 +224,8 @@ public interface Options description = "Use at at-least-once semantics in BigQuery Storage Write API", helpText = "This parameter takes effect only if `Use BigQuery Storage Write API` is enabled. If" - + " enabled the at-least-once semantics will be used for Storage Write API, otherwise" - + " exactly-once semantics will be used.", + + " enabled, at-least-once semantics are be used for Storage Write API. Otherwise," + + " exactly-once semantics are used.", hiddenUi = true) @Default.Boolean(false) @Override diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index 4e0f0419a6..73e509898f 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -81,7 +81,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection username.", helpText = - "The username to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") + "The username to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.") String getUsername(); void setUsername(String username); @@ -92,7 +92,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "JDBC connection password.", helpText = - "The password to use for the JDBC connection. You can pass in this value encrypted by a Cloud KMS key as a Base64-encoded string.") + "The password to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.") String getPassword(); void setPassword(String password); @@ -151,7 +151,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Whether to use column alias to map the rows.", helpText = - "If enabled (set to `true`), the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.") + "If set to `true`, the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.") @Default.Boolean(false) Boolean getUseColumnAlias(); @@ -163,7 +163,7 @@ public interface JdbcToBigQueryOptions groupName = "Target", description = "Whether to truncate data before writing", helpText = - "If enabled (set to `true`), the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.") + "If set to `true`, the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.") @Default.Boolean(false) Boolean getIsTruncate(); @@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The name of a column of numeric type that will be used for partitioning.", helpText = - "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") + "If this parameter is provided with `table`, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") String getPartitionColumn(); void setPartitionColumn(String partitionColumn); @@ -247,7 +247,7 @@ public interface JdbcToBigQueryOptions }, optional = true, description = "Create Disposition to use for BigQuery", - helpText = "BigQuery CreateDisposition. For example, `CREATE_IF_NEEDED`, `CREATE_NEVER`.") + helpText = "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.") @Default.String("CREATE_NEVER") String getCreateDisposition(); diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java index c258588dcd..304a2295f4 100644 --- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java +++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java @@ -153,7 +153,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { optional = false, regexes = {"[a-zA-Z0-9._-]+"}, description = "JMS Queue/Topic Name to read the input from", - helpText = "The name of the JMS topic/queue that data is read from.", + helpText = "The name of the JMS topic or queue that data is read from.", example = "queue") @Validation.Required String getInputName(); @@ -165,7 +165,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { optional = false, regexes = {"[a-zA-Z0-9._-]+"}, description = "JMS Destination Type to read the input from", - helpText = "The JMS destination type to read data from , can be queue or topic.", + helpText = "The JMS destination type to read data from. Can be a queue or a topic.", example = "queue") @Validation.Required String getInputType(); @@ -176,7 +176,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { order = 4, description = "Output Pub/Sub topic", helpText = - "The name of the topic to which data should published, in the format `projects/your-project-id/topics/your-topic-name`.", + "The name of the Pub/Sub topic to publish data to, in the format `projects//topics/`.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required String getOutputTopic(); From 52e9ca53b3a31e8c3ccf3e92bfc7b904d2009b99 Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Mon, 6 May 2024 10:47:23 +0530 Subject: [PATCH 06/70] Made changes to the suggestions recommended --- .../com/google/cloud/teleport/spanner/ExportPipeline.java | 2 +- .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 4bde7eef7a..09b3d32789 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export Timestamps as Timestamp-micros type", helpText = - "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, timestamps are exported as ISO-8601 strings at nanosecond precision.") + "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.") @Default.Boolean(false) ValueProvider getShouldExportTimestampAsLogicalType(); diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index 4e0f0419a6..73499570aa 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -139,7 +139,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Google Cloud KMS key", helpText = - "The Cloud KMS encryption key to use decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", + "The Cloud KMS encryption key to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key") String getKMSEncryptionKey(); @@ -175,7 +175,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The name of a column of numeric type that will be used for partitioning.", helpText = - "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") + "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") String getPartitionColumn(); void setPartitionColumn(String partitionColumn); From 18dc7928013d8efe0a8f120593c0166f96042dd8 Mon Sep 17 00:00:00 2001 From: gauravjainn Date: Mon, 6 May 2024 13:06:41 +0530 Subject: [PATCH 07/70] "Reverting the JdbcToBigQueryOptions java file" --- .../v2/options/JdbcToBigQueryOptions.java | 270 ++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java new file mode 100644 index 0000000000..06c1178c23 --- /dev/null +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.options; + +import com.google.cloud.teleport.metadata.TemplateParameter; +import org.apache.beam.sdk.options.Default; + +/** Interface used by the JdbcToBigQuery pipeline to accept user input. */ +public interface JdbcToBigQueryOptions + extends CommonTemplateOptions, BigQueryStorageApiBatchOptions { + + @TemplateParameter.Text( + order = 1, + optional = false, + regexes = {"^.+$"}, + groupName = "Source", + description = "Comma-separated Cloud Storage path(s) of the JDBC driver(s)", + helpText = "The comma-separated list of driver JAR files.", + example = "gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar") + String getDriverJars(); + + void setDriverJars(String driverJar); + + @TemplateParameter.Text( + order = 2, + optional = false, + regexes = {"^.+$"}, + groupName = "Source", + description = "JDBC driver class name", + helpText = "The JDBC driver class name.", + example = "com.mysql.jdbc.Driver") + String getDriverClassName(); + + void setDriverClassName(String driverClassName); + + @TemplateParameter.Text( + order = 3, + optional = false, + regexes = { + "(^jdbc:[a-zA-Z0-9/:@.?_+!*=&-;]+$)|(^([A-Za-z0-9+/]{4}){1,}([A-Za-z0-9+/]{0,3})={0,3})" + }, + groupName = "Source", + description = "JDBC connection URL string.", + helpText = + "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string. Note the difference between an Oracle non-RAC database connection string (`jdbc:oracle:thin:@some-host::`) and an Oracle RAC database connection string (`jdbc:oracle:thin:@//some-host[:]/`).", + example = "jdbc:mysql://some-host:3306/sampledb") + String getConnectionURL(); + + void setConnectionURL(String connectionURL); + + @TemplateParameter.Text( + order = 4, + optional = true, + regexes = {"^[a-zA-Z0-9_;!*&=@#-:\\/]+$"}, + groupName = "Source", + description = "JDBC connection property string.", + helpText = + "The properties string to use for the JDBC connection. Use the string format `[propertyName=property;]*.`", + example = "unicode=true;characterEncoding=UTF-8") + String getConnectionProperties(); + + void setConnectionProperties(String connectionProperties); + + @TemplateParameter.Text( + order = 5, + optional = true, + regexes = {"^.+$"}, + groupName = "Source", + description = "JDBC connection username.", + helpText = + "The username to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.") + String getUsername(); + + void setUsername(String username); + + @TemplateParameter.Password( + order = 6, + optional = true, + groupName = "Source", + description = "JDBC connection password.", + helpText = + "The password to use for the JDBC connection. You can pass in this value as a string that's encrypted with a Cloud KMS key and then Base64-encoded. Remove whitespace characters from the Base64-encoded string.") + String getPassword(); + + void setPassword(String password); + + @TemplateParameter.Text( + order = 7, + optional = true, + regexes = {"^.+$"}, + groupName = "Source", + description = "JDBC source SQL query", + helpText = + "The query to run on the source to extract the data.", + example = "select * from sampledb.sample_table") + String getQuery(); + + void setQuery(String query); + + void setOutputTable(String value); + + @TemplateParameter.BigQueryTable( + order = 8, + groupName = "Target", + description = "BigQuery output table", + helpText = + "BigQuery table location to write the output to. The name should be in the format" + + " `:.`. The table's schema must match input objects.", + example = ":.") + String getOutputTable(); + + @TemplateParameter.GcsWriteFolder( + order = 9, + optional = false, + groupName = "Target", + description = "Temporary directory for BigQuery loading process", + helpText = "The temporary directory for the BigQuery loading process.", + example = "gs://your-bucket/your-files/temp_dir") + String getBigQueryLoadingTemporaryDirectory(); + + void setBigQueryLoadingTemporaryDirectory(String directory); + + @TemplateParameter.KmsEncryptionKey( + order = 10, + optional = true, + groupName = "Source", + description = "Google Cloud KMS key", + helpText = + "The Cloud KMS encryption key to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", + example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key") + String getKMSEncryptionKey(); + + void setKMSEncryptionKey(String keyName); + + @TemplateParameter.Boolean( + order = 11, + optional = true, + groupName = "Source", + description = "Whether to use column alias to map the rows.", + helpText = + "If set to `true`, the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.") + @Default.Boolean(false) + Boolean getUseColumnAlias(); + + void setUseColumnAlias(Boolean useColumnAlias); + + @TemplateParameter.Boolean( + order = 12, + optional = true, + groupName = "Target", + description = "Whether to truncate data before writing", + helpText = + "If set to `true`, the pipeline truncates before loading data into BigQuery. Defaults to `false`, which causes the pipeline to append data.") + @Default.Boolean(false) + Boolean getIsTruncate(); + + void setIsTruncate(Boolean isTruncate); + + @TemplateParameter.Text( + order = 13, + optional = true, + groupName = "Source", + description = "The name of a column of numeric type that will be used for partitioning.", + helpText = +<<<<<<< HEAD + "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") +======= + "If this parameter is provided with `table`, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") +>>>>>>> 199fa612676178379366597662da7aacd66f4f23 + String getPartitionColumn(); + + void setPartitionColumn(String partitionColumn); + + @TemplateParameter.Text( + order = 14, + optional = true, + groupName = "Source", + description = "Name of the table in the external database.", + helpText = + "The table to read from when using partitions. This parameter also accepts a subquery in parentheses.", + example = "(select id, name from Person) as subq") + String getTable(); + + void setTable(String table); + + @TemplateParameter.Integer( + order = 15, + optional = true, + groupName = "Source", + description = "The number of partitions.", + helpText = + "The number of partitions. With the lower and upper bound, this value forms partition strides for generated `WHERE` clause expressions that are used to split the partition column evenly. When the input is less than `1`, the number is set to `1`.") + Integer getNumPartitions(); + + void setNumPartitions(Integer numPartitions); + + @TemplateParameter.Long( + order = 16, + optional = true, + groupName = "Source", + description = "Lower bound of partition column.", + helpText = + "The lower bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") + Long getLowerBound(); + + void setLowerBound(Long lowerBound); + + @TemplateParameter.Long( + order = 17, + optional = true, + groupName = "Source", + description = "Upper bound of partition column", + helpText = + "The upper bound to use in the partition scheme. If not provided, this value is automatically inferred by Apache Beam for the supported types.") + Long getUpperBound(); + + void setUpperBound(Long lowerBound); + + @TemplateParameter.Integer( + order = 18, + optional = true, + groupName = "Source", + description = "Fetch Size", + // TODO: remove the "Not used for partitioned reads" once + // https://github.com/apache/beam/pull/28999 is released. + helpText = + "The number of rows to be fetched from database at a time. Not used for partitioned reads.") + @Default.Integer(50000) + Integer getFetchSize(); + + void setFetchSize(Integer fetchSize); + + @TemplateParameter.Enum( + order = 19, + enumOptions = { + @TemplateParameter.TemplateEnumOption("CREATE_IF_NEEDED"), + @TemplateParameter.TemplateEnumOption("CREATE_NEVER") + }, + optional = true, + description = "Create Disposition to use for BigQuery", + helpText = "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.") + @Default.String("CREATE_NEVER") + String getCreateDisposition(); + + void setCreateDisposition(String createDisposition); + + @TemplateParameter.GcsReadFile( + order = 20, + optional = true, + description = "Cloud Storage path to BigQuery JSON schema", + helpText = + "The Cloud Storage path for the BigQuery JSON schema. If `createDisposition` is set to CREATE_IF_NEEDED, this parameter must be specified.", + example = "gs://your-bucket/your-schema.json") + String getBigQuerySchemaPath(); + + void setBigQuerySchemaPath(String path); +} From 743ebbec8a170ceb45fd30d25eb96d473f834226 Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Tue, 7 May 2024 10:56:32 +0530 Subject: [PATCH 08/70] Apply suggestions from code review Updating recommendations provided Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com> --- .../com/google/cloud/teleport/spanner/ExportPipeline.java | 2 +- .../google/cloud/teleport/templates/PubSubToBigQuery.java | 2 +- .../google/cloud/teleport/templates/PubsubToPubsub.java | 2 +- .../v2/astradb/options/AstraDbToBigQueryOptions.java | 2 +- .../v2/options/SpannerChangeStreamsToGcsOptions.java | 4 ++-- .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java | 8 ++++---- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 98164c3076..8d77654c9f 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -146,7 +146,7 @@ public interface ExportPipelineOptions extends PipelineOptions { }, description = "Snapshot time", helpText = - "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified as per RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.", + "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.", example = "1990-12-31T23:59:60Z") @Default.String(value = "") ValueProvider getSnapshotTime(); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java index 99bb755ec5..14d33a06a0 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubSubToBigQuery.java @@ -203,7 +203,7 @@ public interface Options extends PipelineOptions, JavascriptTextTransformerOptio description = "Table for messages failed to reach the output table (i.e., Deadletter table)", helpText = - "The BigQuery table to use for messages that failed to reach the output table, in the format of `:.`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.") + "The BigQuery table to use for messages that fail to reach the output table, in the format of `:.`. If the table doesn't exist, it is created during pipeline execution. If not specified, `OUTPUT_TABLE_SPEC_error_records` is used.") ValueProvider getOutputDeadletterTable(); void setOutputDeadletterTable(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java index 658580b607..a090f3bba9 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java @@ -155,7 +155,7 @@ public interface Options extends PipelineOptions, StreamingOptions { optional = true, description = "Event filter key", helpText = - "The attribute key to use to filter events based. No filters are applied if `filterKey` is not specified.") + "The attribute key to use to filter events. No filters are applied if `filterKey` is not specified.") ValueProvider getFilterKey(); void setFilterKey(ValueProvider filterKey); diff --git a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java index f5ee4cdbc9..aee2942ca1 100644 --- a/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java +++ b/v2/astradb-to-bigquery/src/main/java/com/google/cloud/teleport/v2/astradb/options/AstraDbToBigQueryOptions.java @@ -57,7 +57,7 @@ interface AstraDbSourceOptions extends PipelineOptions { @TemplateParameter.Text( order = 2, description = "Database identifier", - helpText = "The database unique identifier (uuid).", + helpText = "The database unique identifier (UUID).", example = "cf7af129-d33a-498f-ad06-d97a6ee6eb7") @Validation.Required @SuppressWarnings("unused") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java index e1646cfc21..dab4820842 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java @@ -39,7 +39,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Spanner Project ID", helpText = - "The ID of the Google Cloud project that contains the Spanner database to read change streams from. This project is also where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.") + "The ID of the Google Cloud project that contains the Spanner database to read change streams from. This project is also where the change streams connector metadata table is created. The default for this parameter is the project where the Dataflow pipeline is running.") @Default.String("") String getSpannerProjectId(); @@ -97,7 +97,7 @@ public interface SpannerChangeStreamsToGcsOptions optional = true, description = "Cloud Spanner metadata table name", helpText = - "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table is automatically created during the pipeline flow. You must provide a value for this parameter when updating an existing pipeline. Otherwise, don't use this parameter.") + "The Spanner change streams connector metadata table name to use. If not provided, a Spanner change streams metadata table is automatically created during pipeline execution. You must provide a value for this parameter when updating an existing pipeline. Otherwise, don't use this parameter.") String getSpannerMetadataTableName(); void setSpannerMetadataTableName(String value); diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index 06c1178c23..da1bc74743 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -117,8 +117,8 @@ public interface JdbcToBigQueryOptions groupName = "Target", description = "BigQuery output table", helpText = - "BigQuery table location to write the output to. The name should be in the format" - + " `:.`. The table's schema must match input objects.", + "The BigQuery table location to write the output to. The name should be in the format" + + " `:.`. The table's schema must match input objects.", example = ":.") String getOutputTable(); @@ -139,7 +139,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Google Cloud KMS key", helpText = - "The Cloud KMS encryption key to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", + "The Cloud KMS encryption key to use to decrypt the username, password, and connection string. If you pass in a Cloud KMS key, the username, password, and connection string must all be passed in encrypted.", example = "projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key") String getKMSEncryptionKey(); @@ -178,7 +178,7 @@ public interface JdbcToBigQueryOptions <<<<<<< HEAD "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") ======= - "If this parameter is provided with `table`, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") + "If this parameter is provided with the `table` parameter, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") >>>>>>> 199fa612676178379366597662da7aacd66f4f23 String getPartitionColumn(); From 98c9d4e8b9e21cceffe3af373c129a6a6c615272 Mon Sep 17 00:00:00 2001 From: archanagupta03 Date: Tue, 7 May 2024 11:27:51 +0530 Subject: [PATCH 09/70] Resolving HEAD comments --- .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index da1bc74743..3478a00404 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -175,11 +175,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The name of a column of numeric type that will be used for partitioning.", helpText = -<<<<<<< HEAD "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") -======= - "If this parameter is provided with the `table` parameter, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") ->>>>>>> 199fa612676178379366597662da7aacd66f4f23 String getPartitionColumn(); void setPartitionColumn(String partitionColumn); From 82bb215fd0122309c6555dce5b6e046e8e6af97c Mon Sep 17 00:00:00 2001 From: archanagupta03 Date: Tue, 7 May 2024 12:18:22 +0530 Subject: [PATCH 10/70] Pushing changes after running mvn spotless:apply --- .../google/cloud/teleport/bigtable/BigtableToAvro.java | 3 +-- .../google/cloud/teleport/spanner/ExportPipeline.java | 8 ++++---- .../google/cloud/teleport/spanner/ImportPipeline.java | 9 +++++---- .../google/cloud/teleport/templates/PubsubToPubsub.java | 6 ++---- .../v2/options/SpannerChangeStreamsToGcsOptions.java | 3 +-- .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java | 8 ++++---- 6 files changed, 17 insertions(+), 20 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java index 3b55dd29fe..cc7db434ad 100644 --- a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java +++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToAvro.java @@ -107,8 +107,7 @@ public interface Options extends PipelineOptions { @TemplateParameter.GcsWriteFolder( order = 4, description = "Output file directory in Cloud Storage", - helpText = - "The Cloud Storage path where data is written.", + helpText = "The Cloud Storage path where data is written.", example = "gs://mybucket/somefolder") ValueProvider getOutputDirectory(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 8d77654c9f..e40317d257 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -107,8 +107,7 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 4, optional = true, description = "Cloud Storage temp directory for storing Avro files", - helpText = - "The Cloud Storage path where temporary Avro files are written.") + helpText = "The Cloud Storage path where temporary Avro files are written.") ValueProvider getAvroTempDirectory(); void setAvroTempDirectory(ValueProvider value); @@ -146,7 +145,7 @@ public interface ExportPipelineOptions extends PipelineOptions { }, description = "Snapshot time", helpText = - "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC "Zulu" format. The timestamp must be in the past, and maximum timestamp staleness applies.", + "The timestamp that corresponds to the version of the Spanner database that you want to read. The timestamp must be specified by using RFC 3339 UTC `Zulu` format. The timestamp must be in the past, and maximum timestamp staleness applies.", example = "1990-12-31T23:59:60Z") @Default.String(value = "") ValueProvider getSnapshotTime(); @@ -157,7 +156,8 @@ public interface ExportPipelineOptions extends PipelineOptions { order = 8, optional = true, description = "Cloud Spanner Project Id", - helpText = "The ID of the Google Cloud project that contains the Spanner database that you want to read data from.") + helpText = + "The ID of the Google Cloud project that contains the Spanner database that you want to read data from.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java index 2a5fc96327..4376c1d5ea 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java @@ -76,8 +76,7 @@ public interface Options extends PipelineOptions { order = 2, regexes = {"^[a-z_0-9\\-]+$"}, description = "Cloud Spanner database ID", - helpText = - "The database ID of the Spanner database.") + helpText = "The database ID of the Spanner database.") ValueProvider getDatabaseId(); void setDatabaseId(ValueProvider value); @@ -166,7 +165,8 @@ public interface Options extends PipelineOptions { order = 9, optional = true, description = "Cloud Spanner Project Id", - helpText = "The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.") + helpText = + "The ID of the Google Cloud project that contains the Spanner database. If not set, the default Google Cloud project is used.") ValueProvider getSpannerProjectId(); void setSpannerProjectId(ValueProvider value); @@ -178,7 +178,8 @@ public interface Options extends PipelineOptions { optional = true, regexes = {"[0-9]+"}, description = "DDL Creation timeout in minutes", - helpText = "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.") + helpText = + "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.") @Default.Integer(30) ValueProvider getDdlCreationTimeoutInMinutes(); diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java index a090f3bba9..2bd1f94de5 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToPubsub.java @@ -131,8 +131,7 @@ public interface Options extends PipelineOptions, StreamingOptions { @TemplateParameter.PubsubSubscription( order = 1, description = "Pub/Sub input subscription", - helpText = - "The Pub/Sub subscription to read the input from.", + helpText = "The Pub/Sub subscription to read the input from.", example = "projects/your-project-id/subscriptions/your-subscription-name") @Validation.Required ValueProvider getInputSubscription(); @@ -142,8 +141,7 @@ public interface Options extends PipelineOptions, StreamingOptions { @TemplateParameter.PubsubTopic( order = 2, description = "Output Pub/Sub topic", - helpText = - "The Pub/Sub topic to write the output to.", + helpText = "The Pub/Sub topic to write the output to.", example = "projects/your-project-id/topics/your-topic-name") @Validation.Required ValueProvider getOutputTopic(); diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java index dab4820842..bc06022508 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToGcsOptions.java @@ -85,8 +85,7 @@ public interface SpannerChangeStreamsToGcsOptions @TemplateParameter.Text( order = 6, description = "Spanner metadata database", - helpText = - "The Spanner database to use for the change streams connector metadata table.") + helpText = "The Spanner database to use for the change streams connector metadata table.") @Validation.Required String getSpannerMetadataDatabase(); diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index 3478a00404..8d6b0e96d8 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -103,8 +103,7 @@ public interface JdbcToBigQueryOptions regexes = {"^.+$"}, groupName = "Source", description = "JDBC source SQL query", - helpText = - "The query to run on the source to extract the data.", + helpText = "The query to run on the source to extract the data.", example = "select * from sampledb.sample_table") String getQuery(); @@ -151,7 +150,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "Whether to use column alias to map the rows.", helpText = - "If set to `true`, the pipeline uses the column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to `false`.") + "If set to `true`, the pipeline uses the column alias (`AS`) instead of the column name to map the rows to BigQuery. Defaults to `false`.") @Default.Boolean(false) Boolean getUseColumnAlias(); @@ -247,7 +246,8 @@ public interface JdbcToBigQueryOptions }, optional = true, description = "Create Disposition to use for BigQuery", - helpText = "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.") + helpText = + "The BigQuery CreateDisposition to use. For example, `CREATE_IF_NEEDED` or `CREATE_NEVER`.") @Default.String("CREATE_NEVER") String getCreateDisposition(); From 3954bf219bc78ae70d176394d3cb819c1869ee4a Mon Sep 17 00:00:00 2001 From: Archana Gupta Date: Tue, 7 May 2024 23:25:06 +0530 Subject: [PATCH 11/70] Apply suggestions from code review Finishing touches as per recommendations suggested Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com> --- .../com/google/cloud/teleport/spanner/ExportPipeline.java | 4 ++-- .../cloud/teleport/v2/options/JdbcToBigQueryOptions.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index e40317d257..949201edac 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -167,7 +167,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export Timestamps as Timestamp-micros type", helpText = - "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.") + "If true, timestamps are exported as a `long` type with `timestamp-micros` logical type. By default, this parameter is set to `false` and timestamps are exported as ISO-8601 strings at nanosecond precision.") @Default.Boolean(false) ValueProvider getShouldExportTimestampAsLogicalType(); @@ -190,7 +190,7 @@ public interface ExportPipelineOptions extends PipelineOptions { optional = true, description = "Export necessary Related Spanner tables.", helpText = - "Specifies whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.") + "Whether to include related tables. This parameter is used in conjunction with the `tableNames` parameter.") @Default.Boolean(false) ValueProvider getShouldExportRelatedTables(); diff --git a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java index 8d6b0e96d8..3b51908f9f 100644 --- a/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java +++ b/v2/jdbc-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/JdbcToBigQueryOptions.java @@ -116,7 +116,7 @@ public interface JdbcToBigQueryOptions groupName = "Target", description = "BigQuery output table", helpText = - "The BigQuery table location to write the output to. The name should be in the format" + "The BigQuery table location to write the output to. Use the format" + " `:.`. The table's schema must match input objects.", example = ":.") String getOutputTable(); @@ -174,7 +174,7 @@ public interface JdbcToBigQueryOptions groupName = "Source", description = "The name of a column of numeric type that will be used for partitioning.", helpText = - "If this parameter is provided (along with the name of the `table` defined as an optional parameter), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") + "If this parameter is provided with the name of the `table` defined as an optional parameter, JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only supports `Long` partition columns.") String getPartitionColumn(); void setPartitionColumn(String partitionColumn); From 1d34d0e688f608c1ad5470207b7946298a061837 Mon Sep 17 00:00:00 2001 From: an2x <52892974+an2x@users.noreply.github.com> Date: Thu, 9 May 2024 10:42:43 -0400 Subject: [PATCH 12/70] Add support for KAFKA_TOPIC template parameters. --- .../teleport/metadata/TemplateParameter.java | 40 +++++++++++++++++++ .../teleport/metadata/util/MetadataUtils.java | 1 + .../plugin/model/ImageSpecParameter.java | 17 ++++++++ .../plugin/model/ImageSpecParameterType.java | 5 ++- .../plugin/model/TemplateDefinitionsTest.java | 3 ++ .../cloud/teleport/plugin/sample/AtoBOk.java | 10 ++++- 6 files changed, 74 insertions(+), 2 deletions(-) diff --git a/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java b/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java index 24e1e67891..15c5d78efb 100644 --- a/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java +++ b/metadata/src/main/java/com/google/cloud/teleport/metadata/TemplateParameter.java @@ -701,4 +701,44 @@ public final class TemplateParameter { /** Parameter visibility in the UI. */ boolean hiddenUi() default false; } + + /** + * Template parameter containing a Kafka Topic. + * + *

The parameter specifies the fully-qualified name of an Apache Kafka topic. This can be + * either a Google Managed Kafka topic or a non-managed Kafka topic. + */ + @Retention(RetentionPolicy.RUNTIME) + @Target({ElementType.FIELD, ElementType.METHOD}) + public @interface KafkaTopic { + /** Order of appearance. */ + int order() default 999; + + /** Name of the parameter. */ + String name() default ""; + + /** Group Name of the parameter. */ + String groupName() default ""; + + /** Parent Name of the parameter. */ + String parentName() default ""; + + /** List of parent trigger values. */ + String[] parentTriggerValues() default ""; + + /** If parameter is optional. */ + boolean optional() default false; + + /** Description of the parameter. */ + String description(); + + /** Help text of the parameter. */ + String helpText(); + + /** Example of the parameter. */ + String example() default ""; + + /** Parameter visibility in the UI. */ + boolean hiddenUi() default false; + } } diff --git a/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java b/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java index 9eaf664589..da1ced4fb5 100644 --- a/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java +++ b/metadata/src/main/java/com/google/cloud/teleport/metadata/util/MetadataUtils.java @@ -42,6 +42,7 @@ public final class MetadataUtils { TemplateParameter.GcsWriteFile.class, TemplateParameter.GcsWriteFolder.class, TemplateParameter.Integer.class, + TemplateParameter.KafkaTopic.class, TemplateParameter.KmsEncryptionKey.class, TemplateParameter.Long.class, TemplateParameter.Password.class, diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java index 1774e215f8..4bc677dccf 100644 --- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java +++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java @@ -468,6 +468,23 @@ public void processParamType(Annotation parameterAnnotation) { this.setHiddenUi(durationParam.hiddenUi()); this.setParamType(ImageSpecParameterType.TEXT); break; + case "KafkaTopic": + TemplateParameter.KafkaTopic kafkaTopic = + (TemplateParameter.KafkaTopic) parameterAnnotation; + if (!kafkaTopic.name().isEmpty()) { + this.setName(kafkaTopic.name()); + } + processDescriptions( + kafkaTopic.groupName(), + kafkaTopic.description(), + kafkaTopic.helpText(), + kafkaTopic.example()); + this.setParentName(kafkaTopic.parentName()); + this.setParentTriggerValues(kafkaTopic.parentTriggerValues()); + this.setOptional(kafkaTopic.optional()); + this.setHiddenUi(kafkaTopic.hiddenUi()); + this.setParamType(ImageSpecParameterType.KAFKA_TOPIC); + break; default: throw new IllegalArgumentException("Invalid type " + parameterAnnotation); } diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java index 7f12cc959e..0d1fd972d6 100644 --- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java +++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameterType.java @@ -49,5 +49,8 @@ public enum ImageSpecParameterType { ENUM, /** Number parameter. */ - NUMBER; + NUMBER, + + /** Kafka Topic parameter. */ + KAFKA_TOPIC; } diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java index 6864e74012..181ff10366 100644 --- a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java +++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/model/TemplateDefinitionsTest.java @@ -63,6 +63,9 @@ public void testSampleAtoBOk() { ImageSpecParameter to = metadata.getParameter("to").get(); assertEquals(ImageSpecParameterType.BIGQUERY_TABLE, to.getParamType()); + ImageSpecParameter inputKafkaTopic = metadata.getParameter("inputKafkaTopic").get(); + assertEquals(ImageSpecParameterType.KAFKA_TOPIC, inputKafkaTopic.getParamType()); + ImageSpecParameter logical = metadata.getParameter("logical").get(); assertEquals(ImageSpecParameterType.BOOLEAN, logical.getParamType()); assertEquals("^(true|false)$", logical.getRegexes().get(0)); diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java index 491d09f776..f0ea357288 100644 --- a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java +++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java @@ -99,12 +99,20 @@ public interface AtoBOptions { Boolean getParamWithGroupName(); @TemplateParameter.Text( - order = 8, + order = 9, parentName = "paramWithGroupName", parentTriggerValues = {"true"}, description = "N/A", helpText = "Text that has parent name and parent trigger value") @Default.Boolean(false) Boolean getParamWithParentName(); + + @TemplateParameter.KafkaTopic( + order = 10, + description = "Kafka input topic", + helpText = "Kafka topic to trad from", + example = + "projects/project-foo/locations/us-central1/clusters/cluster-bar/topics/topic-baz") + String getInputKafkaTopic(); } } From c42a7713530700e0e847d5ac8a974d49601557d5 Mon Sep 17 00:00:00 2001 From: Shreya Khajanchi Date: Thu, 9 May 2024 22:23:39 +0530 Subject: [PATCH 13/70] added unit tests to Spanner transaction writer --- .../SpannerTransactionWriterDoFn.java | 13 ++ .../SpannerTransactionWriterDoFnTest.java | 203 ++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java index ec21ac4cee..6c7f9e303c 100644 --- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java +++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFn.java @@ -301,4 +301,17 @@ String getTxnTag(PipelineOptions options) { } return txnTag; } + + public void setMapper(ObjectMapper mapper) { + this.mapper = mapper; + } + + public void setSpannerAccessor(SpannerAccessor spannerAccessor) { + this.spannerAccessor = spannerAccessor; + } + + public void setChangeEventSessionConvertor( + ChangeEventSessionConvertor changeEventSessionConvertor) { + this.changeEventSessionConvertor = changeEventSessionConvertor; + } } diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java new file mode 100644 index 0000000000..c9803eba7f --- /dev/null +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.templates; + +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.cloud.spanner.DatabaseClient; +import com.google.cloud.spanner.Options; +import com.google.cloud.spanner.TransactionRunner; +import com.google.cloud.teleport.v2.spanner.ddl.Ddl; +import com.google.cloud.teleport.v2.spanner.migrations.constants.Constants; +import com.google.cloud.teleport.v2.spanner.migrations.convertors.ChangeEventSessionConvertor; +import com.google.cloud.teleport.v2.spanner.migrations.schema.Schema; +import com.google.cloud.teleport.v2.templates.datastream.DatastreamConstants; +import com.google.cloud.teleport.v2.values.FailsafeElement; +import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions; +import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor; +import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.values.PCollectionView; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; + +public class SpannerTransactionWriterDoFnTest { + @Test + void testGetTxnTag() { + String[] args = new String[] {"--jobId=123"}; + SpannerConfig spannerConfig = mock(SpannerConfig.class); + DataflowWorkerHarnessOptions options = + PipelineOptionsFactory.fromArgs(args) + .withValidation() + .as(DataflowWorkerHarnessOptions.class); + SpannerTransactionWriterDoFn spannerTransactionWriterDoFn = + new SpannerTransactionWriterDoFn(spannerConfig, null, null, null, "", "mysql", false, true); + String result = spannerTransactionWriterDoFn.getTxnTag(options); + assertEquals(result, "txBy=123"); + } + + Ddl getTestDdl() { + Ddl ddl = + Ddl.builder() + .createTable("Users") + .column("first_name") + .string() + .max() + .endColumn() + .column("last_name") + .string() + .size(5) + .endColumn() + .column("age") + .int64() + .endColumn() + .primaryKey() + .asc("first_name") + .desc("last_name") + .end() + .endTable() + .createTable("shadow_Users") + .column("first_name") + .string() + .max() + .endColumn() + .column("last_name") + .string() + .size(5) + .endColumn() + .column("version") + .int64() + .endColumn() + .primaryKey() + .asc("first_name") + .desc("last_name") + .end() + .endTable() + .build(); + return ddl; + } + + @Test + void testProcessElement() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS); + SpannerConfig spannerConfig = mock(SpannerConfig.class); + SpannerAccessor spannerAccessor = mock(SpannerAccessor.class); + PCollectionView ddlView = mock(PCollectionView.class); + Schema schema = mock(Schema.class); + DoFn.ProcessContext processContextMock = mock(DoFn.ProcessContext.class); + DatabaseClient databaseClientMock = mock(DatabaseClient.class); + TransactionRunner transactionCallableMock = mock(TransactionRunner.class); + ValueProvider rpcPriorityValueProviderMock = mock(ValueProvider.class); + ChangeEventSessionConvertor changeEventSessionConvertor = + mock(ChangeEventSessionConvertor.class); + + String[] args = new String[] {"--jobId=123"}; + DataflowWorkerHarnessOptions options = + PipelineOptionsFactory.fromArgs(args) + .withValidation() + .as(DataflowWorkerHarnessOptions.class); + + ObjectNode outputObject = mapper.createObjectNode(); + outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE); + outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users"); + outputObject.put("first_name", "Johny"); + outputObject.put("last_name", "Depp"); + outputObject.put("age", 13); + outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123); + FailsafeElement failsafeElement = + FailsafeElement.of(outputObject.toString(), outputObject.toString()); + Ddl ddl = getTestDdl(); + + when(processContextMock.element()).thenReturn(failsafeElement); + when(processContextMock.sideInput(any())).thenReturn(ddl); + when(processContextMock.getPipelineOptions()).thenReturn(options); + when(schema.isEmpty()).thenReturn(true); + when(rpcPriorityValueProviderMock.get()).thenReturn(Options.RpcPriority.LOW); + when(spannerConfig.getRpcPriority()).thenReturn(rpcPriorityValueProviderMock); + when(spannerAccessor.getDatabaseClient()).thenReturn(databaseClientMock); + when(changeEventSessionConvertor.transformChangeEventData(eq(outputObject), any(), eq(ddl))) + .thenReturn(outputObject); + when(transactionCallableMock.run(any())).thenReturn(null); + when(databaseClientMock.readWriteTransaction(any(), any())).thenReturn(transactionCallableMock); + + SpannerTransactionWriterDoFn spannerTransactionWriterDoFn = + new SpannerTransactionWriterDoFn( + spannerConfig, ddlView, schema, null, "shadow", "mysql", false, true); + spannerTransactionWriterDoFn.setMapper(mapper); + spannerTransactionWriterDoFn.setChangeEventSessionConvertor(changeEventSessionConvertor); + spannerTransactionWriterDoFn.setSpannerAccessor(spannerAccessor); + spannerTransactionWriterDoFn.processElement(processContextMock); + + verify(processContextMock, times(1)).output(any(com.google.cloud.Timestamp.class)); + } + + @Test + void testProcessElementWithInvalidChangeEvent() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS); + SpannerConfig spannerConfig = mock(SpannerConfig.class); + SpannerAccessor spannerAccessor = mock(SpannerAccessor.class); + PCollectionView ddlView = mock(PCollectionView.class); + Schema schema = mock(Schema.class); + DoFn.ProcessContext processContextMock = mock(DoFn.ProcessContext.class); + ChangeEventSessionConvertor changeEventSessionConvertor = + mock(ChangeEventSessionConvertor.class); + + ObjectNode outputObject = mapper.createObjectNode(); + outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE); + outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users1"); + outputObject.put("first_name", "Johny"); + outputObject.put("last_name", "Depp"); + outputObject.put("age", 13); + outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123); + FailsafeElement failsafeElement = + FailsafeElement.of(outputObject.toString(), outputObject.toString()); + Ddl ddl = getTestDdl(); + + when(processContextMock.element()).thenReturn(failsafeElement); + when(processContextMock.sideInput(any())).thenReturn(ddl); + when(schema.isEmpty()).thenReturn(true); + when(changeEventSessionConvertor.transformChangeEventData(eq(outputObject), any(), eq(ddl))) + .thenReturn(outputObject); + + SpannerTransactionWriterDoFn spannerTransactionWriterDoFn = + new SpannerTransactionWriterDoFn( + spannerConfig, ddlView, schema, null, "shadow", "mysql", false, true); + spannerTransactionWriterDoFn.setMapper(mapper); + spannerTransactionWriterDoFn.setChangeEventSessionConvertor(changeEventSessionConvertor); + spannerTransactionWriterDoFn.setSpannerAccessor(spannerAccessor); + spannerTransactionWriterDoFn.processElement(processContextMock); + + ArgumentCaptor argument = ArgumentCaptor.forClass(FailsafeElement.class); + verify(processContextMock, times(1)) + .output(eq(SpannerTransactionWriter.PERMANENT_ERROR_TAG), argument.capture()); + assertEquals( + "Table from change event does not exist in Spanner. table=Users1", + argument.getValue().getErrorMessage()); + } +} From f24e8b35efaf6d9da9abde35eea8966b0e6f5972 Mon Sep 17 00:00:00 2001 From: Shreya Khajanchi Date: Fri, 10 May 2024 10:34:22 +0530 Subject: [PATCH 14/70] fix test issue --- .../v2/templates/SpannerTransactionWriterDoFnTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java index c9803eba7f..1109173472 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java @@ -42,12 +42,12 @@ import org.apache.beam.sdk.options.ValueProvider; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.values.PCollectionView; -import org.junit.jupiter.api.Test; +import org.junit.Test; import org.mockito.ArgumentCaptor; public class SpannerTransactionWriterDoFnTest { @Test - void testGetTxnTag() { + public void testGetTxnTag() { String[] args = new String[] {"--jobId=123"}; SpannerConfig spannerConfig = mock(SpannerConfig.class); DataflowWorkerHarnessOptions options = @@ -102,7 +102,7 @@ Ddl getTestDdl() { } @Test - void testProcessElement() throws Exception { + public void testProcessElement() throws Exception { ObjectMapper mapper = new ObjectMapper(); mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS); SpannerConfig spannerConfig = mock(SpannerConfig.class); @@ -157,7 +157,7 @@ void testProcessElement() throws Exception { } @Test - void testProcessElementWithInvalidChangeEvent() throws Exception { + public void testProcessElementWithInvalidChangeEvent() throws Exception { ObjectMapper mapper = new ObjectMapper(); mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS); SpannerConfig spannerConfig = mock(SpannerConfig.class); From e8ab30af193e0803d4f73fbb3e1e622e8f0b875e Mon Sep 17 00:00:00 2001 From: Shreya Khajanchi Date: Fri, 10 May 2024 15:27:15 +0530 Subject: [PATCH 15/70] removed validation --- .../v2/templates/SpannerTransactionWriterDoFnTest.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java index 1109173472..7b4acb6646 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java @@ -45,15 +45,14 @@ import org.junit.Test; import org.mockito.ArgumentCaptor; +/** Unit tests for SpannerTransactionWriterDoFn class. */ public class SpannerTransactionWriterDoFnTest { @Test public void testGetTxnTag() { String[] args = new String[] {"--jobId=123"}; SpannerConfig spannerConfig = mock(SpannerConfig.class); DataflowWorkerHarnessOptions options = - PipelineOptionsFactory.fromArgs(args) - .withValidation() - .as(DataflowWorkerHarnessOptions.class); + PipelineOptionsFactory.fromArgs(args).as(DataflowWorkerHarnessOptions.class); SpannerTransactionWriterDoFn spannerTransactionWriterDoFn = new SpannerTransactionWriterDoFn(spannerConfig, null, null, null, "", "mysql", false, true); String result = spannerTransactionWriterDoFn.getTxnTag(options); @@ -118,9 +117,7 @@ public void testProcessElement() throws Exception { String[] args = new String[] {"--jobId=123"}; DataflowWorkerHarnessOptions options = - PipelineOptionsFactory.fromArgs(args) - .withValidation() - .as(DataflowWorkerHarnessOptions.class); + PipelineOptionsFactory.fromArgs(args).as(DataflowWorkerHarnessOptions.class); ObjectNode outputObject = mapper.createObjectNode(); outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE); From 5667766d2964db8c3a883807a4b29317e87d8837 Mon Sep 17 00:00:00 2001 From: Shreya Khajanchi Date: Fri, 10 May 2024 16:30:50 +0530 Subject: [PATCH 16/70] addressing comments --- .../SpannerTransactionWriterDoFnTest.java | 49 +++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java index 7b4acb6646..f8d3d0b378 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/SpannerTransactionWriterDoFnTest.java @@ -27,7 +27,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.cloud.spanner.DatabaseClient; +import com.google.cloud.spanner.Mutation; import com.google.cloud.spanner.Options; +import com.google.cloud.spanner.TransactionContext; import com.google.cloud.spanner.TransactionRunner; import com.google.cloud.teleport.v2.spanner.ddl.Ddl; import com.google.cloud.teleport.v2.spanner.migrations.constants.Constants; @@ -35,6 +37,7 @@ import com.google.cloud.teleport.v2.spanner.migrations.schema.Schema; import com.google.cloud.teleport.v2.templates.datastream.DatastreamConstants; import com.google.cloud.teleport.v2.values.FailsafeElement; +import java.util.Iterator; import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions; import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor; import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig; @@ -111,6 +114,7 @@ public void testProcessElement() throws Exception { DoFn.ProcessContext processContextMock = mock(DoFn.ProcessContext.class); DatabaseClient databaseClientMock = mock(DatabaseClient.class); TransactionRunner transactionCallableMock = mock(TransactionRunner.class); + TransactionContext transactionContext = mock(TransactionContext.class); ValueProvider rpcPriorityValueProviderMock = mock(ValueProvider.class); ChangeEventSessionConvertor changeEventSessionConvertor = mock(ChangeEventSessionConvertor.class); @@ -122,10 +126,10 @@ public void testProcessElement() throws Exception { ObjectNode outputObject = mapper.createObjectNode(); outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE); outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users"); - outputObject.put("first_name", "Johny"); + outputObject.put("first_name", "Johnny"); outputObject.put("last_name", "Depp"); outputObject.put("age", 13); - outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123); + outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 12345); FailsafeElement failsafeElement = FailsafeElement.of(outputObject.toString(), outputObject.toString()); Ddl ddl = getTestDdl(); @@ -139,7 +143,12 @@ public void testProcessElement() throws Exception { when(spannerAccessor.getDatabaseClient()).thenReturn(databaseClientMock); when(changeEventSessionConvertor.transformChangeEventData(eq(outputObject), any(), eq(ddl))) .thenReturn(outputObject); - when(transactionCallableMock.run(any())).thenReturn(null); + when(transactionCallableMock.run(any())) + .thenAnswer( + invocation -> { + TransactionRunner.TransactionCallable callable = invocation.getArgument(0); + return callable.run(transactionContext); + }); when(databaseClientMock.readWriteTransaction(any(), any())).thenReturn(transactionCallableMock); SpannerTransactionWriterDoFn spannerTransactionWriterDoFn = @@ -149,6 +158,38 @@ public void testProcessElement() throws Exception { spannerTransactionWriterDoFn.setChangeEventSessionConvertor(changeEventSessionConvertor); spannerTransactionWriterDoFn.setSpannerAccessor(spannerAccessor); spannerTransactionWriterDoFn.processElement(processContextMock); + ArgumentCaptor> argument = ArgumentCaptor.forClass(Iterable.class); + verify(transactionContext, times(1)).buffer(argument.capture()); + Iterable capturedMutations = argument.getValue(); + Iterator mutationIterator = capturedMutations.iterator(); + Mutation actualDataMutation = null; + Mutation actualShadowTableMutation = null; + + if (mutationIterator.hasNext()) { + // Get the first mutation + actualDataMutation = mutationIterator.next(); + + if (mutationIterator.hasNext()) { + // Get the second mutation + actualShadowTableMutation = mutationIterator.next(); + } + } + + Mutation.WriteBuilder dataBuilder = Mutation.newInsertOrUpdateBuilder("Users"); + dataBuilder.set("first_name").to("Johnny"); + dataBuilder.set("last_name").to("Depp"); + dataBuilder.set("age").to(13); + Mutation expectedDataMutation = dataBuilder.build(); + assertEquals(actualDataMutation, expectedDataMutation); + + Mutation.WriteBuilder shadowBuilder = Mutation.newInsertOrUpdateBuilder("shadow_Users"); + shadowBuilder.set("first_name").to("Johnny"); + shadowBuilder.set("last_name").to("Depp"); + shadowBuilder.set("timestamp").to(12345); + shadowBuilder.set("log_file").to(""); + shadowBuilder.set("log_position").to(-1); + Mutation expectedShadowMutation = shadowBuilder.build(); + assertEquals(actualShadowTableMutation, expectedShadowMutation); verify(processContextMock, times(1)).output(any(com.google.cloud.Timestamp.class)); } @@ -168,7 +209,7 @@ public void testProcessElementWithInvalidChangeEvent() throws Exception { ObjectNode outputObject = mapper.createObjectNode(); outputObject.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, Constants.MYSQL_SOURCE_TYPE); outputObject.put(DatastreamConstants.EVENT_TABLE_NAME_KEY, "Users1"); - outputObject.put("first_name", "Johny"); + outputObject.put("first_name", "Johnny"); outputObject.put("last_name", "Depp"); outputObject.put("age", 13); outputObject.put(DatastreamConstants.MYSQL_TIMESTAMP_KEY, 123); From 46ada98784d20568e348c5e775902c78b1e636a3 Mon Sep 17 00:00:00 2001 From: Jeffrey Kinard Date: Fri, 10 May 2024 12:26:55 -0400 Subject: [PATCH 17/70] Add removed files from bad merge 1490 Signed-off-by: Jeffrey Kinard --- .github/actions/setup-env/action.yml | 5 +- .github/actions/setup-java-env/action.yml | 4 +- .github/scripts/configure-runners.sh | 17 +- .github/scripts/startup-script.sh | 26 +- .github/workflows/go-pr.yml | 8 +- .github/workflows/java-pr.yml | 4 + .github/workflows/prepare-java-cache.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/scorecards-analysis.yml | 2 +- contributor-docs/maintainers-guide.md | 71 ++++- .../BigQueryResourceManagerUtils.java | 30 +- .../BigtableResourceManagerUtils.java | 86 +++++- .../DatastreamResourceManagerUtils.java | 30 +- .../utils/SpannerResourceManagerUtils.java | 53 +++- .../bigquery/BigQueryResourceManagerTest.java | 3 +- .../bigtable/BigtableResourceManagerTest.java | 2 +- .../BigtableResourceManagerUtilsTest.java | 5 +- .../spanner/SpannerResourceManagerTest.java | 4 +- .../SpannerResourceManagerUtilsTest.java | 38 ++- .../plugin/PythonDockerfileGenerator.java | 76 ----- .../plugin/XlangDockerfileGenerator.java | 116 +++++++ .../main/resources/Dockerfile-template-xlang | 72 +++++ .../main/resources/Dockerfile-xlang-template | 51 ---- .../plugin/XlangDockerfileGeneratorTest.java | 58 ++++ .../plugin/maven/TemplatesStageMojo.java | 11 +- pom.xml | 1 + .../common/JavascriptTextTransformer.java | 2 +- .../v2/templates/DataStreamToSpanner.java | 4 +- .../datastream/ChangeEventContextFactory.java | 11 +- .../spanner/ProcessInformationSchema.java | 14 + .../DataStreamToSpannerDatatypeIT.java | 131 ++++---- ...igrationWithMigrationShardIdColumnIT.java} | 42 +-- ...rationWithoutMigrationShardIdColumnIT.java | 286 ++++++++++++++++++ .../v2/templates/DataStreamToSpannerTest.java | 67 ++++ .../ChangeEventContextFactoryTest.java | 81 +++++ .../spanner/ProcessInformationSchemaTest.java | 63 ++++ .../AllDatatypeColumns-statements.sql | 73 +++++ .../AllDatatypeColumns2-statements.sql | 70 +++++ .../mysql-backfill-AllDatatypeColumns.avro | Bin 0 -> 4398 bytes .../mysql-backfill-AllDatatypeColumns.jsonl | 2 - .../mysql-backfill-AllDatatypeColumns2.avro | Bin 0 -> 4317 bytes .../mysql-backfill-AllDatatypeColumns2.jsonl | 2 - .../mysql-cdc-AllDatatypeColumns.jsonl | 2 - .../mysql-cdc-AllDatatypeColumns2.avro | Bin 0 -> 4382 bytes .../mysql-cdc-AllDatatypeColumns2.jsonl | 2 - .../mysql-cdc1-AllDatatypeColumns.avro | Bin 0 -> 3780 bytes .../mysql-cdc2-AllDatatypeColumns.avro | Bin 0 -> 3791 bytes .../Movie-shard1.jsonl | 0 .../Movie-shard2.jsonl | 0 .../Users-backfill-logical-shard1.jsonl | 0 .../Users-backfill-logical-shard2.jsonl | 0 .../Users-backfill-logical-shard3.jsonl | 0 .../Users-backfill-logical-shard4.jsonl | 0 .../Users-cdc-shard1.jsonl | 0 .../Users-cdc-shard2.jsonl | 0 .../mysql-schema.sql | 0 .../mysql-session.json | 0 .../spanner-schema.sql | 0 .../transformation-context-shard1.json | 0 .../transformation-context-shard2.json | 0 .../Users-backfill-logical-shard1.jsonl | 3 + .../Users-backfill-logical-shard2.jsonl | 3 + .../Users-backfill-logical-shard3.jsonl | 3 + .../Users-backfill-logical-shard4.jsonl | 3 + .../Users-cdc-shard1.jsonl | 6 + .../Users-cdc-shard2.jsonl | 6 + .../mysql-schema.sql | 6 + .../spanner-schema.sql | 5 + .../src/main/resources/requirements.txt | 1 - .../src/main/resources/requirements.txt | 1 - .../neo4j/templates/ConstraintsIndicesIT.java | 2 + .../teleport/v2/templates/GCSToSplunk.java | 100 ++++-- .../transforms/BinaryAvroDeserializer.java | 10 +- .../transforms/BinaryAvroSerializer.java | 13 +- .../v2/templates/KafkaToBigQueryFlex.java | 11 +- .../teleport/v2/transforms/AvroTransform.java | 21 +- v2/kafka-to-gcs/pom.xml | 11 + .../v2/transforms/AvroWriteTransform.java | 2 + .../teleport/v2/templates/KafkaToGcsIT.java | 178 +++++++++++ .../resources/KafkaToGcsIT/avro_schema.avsc | 15 + .../src/main/resources/requirements.txt | 1 - .../src/main/resources/requirements.txt | 1 - v2/sourcedb-to-spanner/pom.xml | 11 + .../v2/options/OptionsToConfigBuilder.java | 98 ++++++ .../v2/options/SourceDbToSpannerOptions.java | 120 ++++++-- .../v2/source/DataSourceProvider.java | 145 --------- .../teleport/v2/source/reader/ReaderImpl.java | 55 ++++ .../v2/source/reader/auth/dbauth/DbAuth.java | 40 +++ .../dbauth/GuardedStringValueProvider.java | 80 +++++ .../auth/dbauth/LocalCredentialsProvider.java | 59 ++++ .../reader/auth/dbauth/package-info.java | 17 ++ .../v2/source/reader/auth/package-info.java | 18 ++ .../v2/source/reader/io/IoWrapper.java | 30 ++ .../mysql/MysqlDialectAdapter.java | 2 +- .../io/jdbc/iowrapper/JdbcIoWrapper.java | 201 ++++++++++++ .../iowrapper/config/JdbcIOWrapperConfig.java | 149 +++++++++ .../io/jdbc/iowrapper/config/TableConfig.java | 81 +++++ .../config/defaults/MySqlConfigDefaults.java | 58 ++++ .../config/defaults/package-info.java | 18 ++ .../jdbc/iowrapper/config/package-info.java | 18 ++ .../io/jdbc/iowrapper/package-info.java | 18 ++ .../io/jdbc/rowmapper/JdbcValueMapper.java | 3 +- .../rowmapper/JdbcValueMappingsProvider.java | 3 +- .../rowmapper/ResultSetValueExtractor.java | 3 +- .../jdbc/rowmapper/ResultSetValueMapper.java | 3 +- .../reader/io/schema/SourceTableSchema.java | 15 +- .../io/transform/AccumulatingTableReader.java | 6 +- .../reader/io/transform/ReaderTransform.java | 9 +- .../v2/templates/SourceDbToSpanner.java | 136 +++++---- .../transformer/SourceRowToMutationDoFn.java | 7 +- .../options/OptionsToConfigBuilderTest.java | 68 +++++ .../v2/source/DataSourceProviderTest.java | 51 ---- .../GuardedStringValueProviderTest.java | 34 +++ .../dbauth/LocalCredentialsProviderTest.java | 39 +++ .../mysql/MysqlDialectAdapterTest.java | 2 +- .../iowrapper/config/TableConfigTest.java | 74 +++++ .../avro/GenericRecordTypeConvertor.java | 96 ++++-- .../avro/GenericRecordTypeConvertorTest.java | 203 +++++++++++-- .../resources/avro/all-spanner-types.avsc | 62 ---- .../resources/avro/logical-types-schema.avsc | 51 ---- 120 files changed, 3321 insertions(+), 793 deletions(-) create mode 100644 plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java create mode 100644 plugins/core-plugin/src/main/resources/Dockerfile-template-xlang delete mode 100644 plugins/core-plugin/src/main/resources/Dockerfile-xlang-template create mode 100644 plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java rename v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/{DataStreamToSpannerShardedMigrationIT.java => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java} (85%) create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java create mode 100644 v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc1-AllDatatypeColumns.avro create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Movie-shard1.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Movie-shard2.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard1.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard2.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard3.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-backfill-logical-shard4.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-cdc-shard1.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/Users-cdc-shard2.jsonl (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/mysql-schema.sql (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/mysql-session.json (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/spanner-schema.sql (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/transformation-context-shard1.json (100%) rename v2/datastream-to-spanner/src/test/resources/{DataStreamToSpannerShardedMigrationIT => DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT}/transformation-context-shard2.json (100%) create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql delete mode 100644 v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt delete mode 100644 v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt create mode 100644 v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java create mode 100644 v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc delete mode 100644 v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt delete mode 100644 v2/pubsub-to-mongodb/src/main/resources/requirements.txt create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java delete mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java delete mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java create mode 100644 v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java delete mode 100644 v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc delete mode 100644 v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml index 69ba7f1575..4f2cb143e2 100644 --- a/.github/actions/setup-env/action.yml +++ b/.github/actions/setup-env/action.yml @@ -55,12 +55,13 @@ runs: using: 'composite' steps: - name: Setup Go - uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: ${{ inputs.go-version }} + cache: false - name: Get Changed Files id: changed-files - uses: tj-actions/changed-files@61ee456a9d0f512e7ecfdf28863634c97dae2d16 + uses: tj-actions/changed-files@4c5f5d698fbf2d763d5f13815ac7c2ccbef1ff7f # v44.2.0 with: separator: ',' # It shouldn't take too long to build all of this, and it will at least diff --git a/.github/actions/setup-java-env/action.yml b/.github/actions/setup-java-env/action.yml index d13a82281a..c3b50c7158 100644 --- a/.github/actions/setup-java-env/action.yml +++ b/.github/actions/setup-java-env/action.yml @@ -40,7 +40,7 @@ runs: using: 'composite' steps: - name: Setup Java - uses: actions/setup-java@a12e082d834968c1847f782019214fadd20719f6 + uses: actions/setup-java@5896cecc08fd8a1fbdfaf517e29b571164b031f7 # v4.2.0 with: distribution: 'temurin' java-version: ${{ inputs.java-version }} @@ -57,7 +57,7 @@ runs: echo "YESTERDAY=$KEY" >> $GITHUB_ENV fi - name: Setup Cache - uses: actions/cache@72d1e4fdff0ff7b1b6e86b415f2d4f5941e5c006 + uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 # v4.0.1 id: setup-cache with: path: | diff --git a/.github/scripts/configure-runners.sh b/.github/scripts/configure-runners.sh index eeee825d19..a51b9aaf7e 100755 --- a/.github/scripts/configure-runners.sh +++ b/.github/scripts/configure-runners.sh @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# For running the script, see go/templates-gitactions-script +# For running the script, see +# https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/contributor-docs/maintainers-guide.md#provision-new-runners # Defaults NAME_SUFFIX="it" @@ -22,6 +23,10 @@ BASE_NAME="gitactions-runner" REPO_NAME="DataflowTemplates" REPO_OWNER="GoogleCloudPlatform" GH_RUNNER_VERSION="2.299.1" + +MACHINE_TYPE="n1-highmem-32" +BOOT_DISK_SIZE="200GB" + VERBOSE=0 ############################################################ @@ -44,6 +49,8 @@ Help() echo "o (optional) Set the owner of the GitHub repo. Default '$REPO_OWNER'" echo "s (optional) Set the number of runners. Default $SIZE" echo "v (optional) Set the gitactions runner version. Default $GH_RUNNER_VERSION" + echo "m (optional) Set the machine type for the GCE VM runner. $MACHINE_TYPE" + echo "b (optional) Set the boot disk size for the GCE VM runner. $BOOT_DISK_SIZE" echo "V Verbose mode." echo "h Print this Help." echo @@ -79,6 +86,10 @@ while getopts ":h:Vp:a:t:n:S:r:o:s:v:" option; do SIZE=$OPTARG;; v) # Enter a version GH_RUNNER_VERSION=$OPTARG;; + m) # Enter a machine type + MACHINE_TYPE=$OPTARG;; + b) # Enter a boot disk size + BOOT_DISK_SIZE=$OPTARG;; V) # Verbose VERBOSE=1;; \?) # Invalid option @@ -163,8 +174,6 @@ gcloud secrets add-iam-policy-binding $SECRET_NAME \ IMAGE_FAMILY="ubuntu-2004-lts" IMAGE_PROJECT="ubuntu-os-cloud" BOOT_DISK_TYPE="pd-balanced" -BOOT_DISK_SIZE="200GB" -MACHINE_TYPE="n1-highmem-16" SCOPE="cloud-platform" if [ $VERBOSE -eq 1 ]; then echo; echo "Creating instance template: $INSTANCE_TEMPLATE_NAME..."; fi if [ $VERBOSE -eq 1 ]; then @@ -181,7 +190,7 @@ gcloud compute instance-templates create $INSTANCE_TEMPLATE_NAME \ --image-project=$IMAGE_PROJECT \ --boot-disk-type=$BOOT_DISK_TYPE \ --boot-disk-size=$BOOT_DISK_SIZE \ - --machine-type="MACHINE_TYPE" \ + --machine-type=$MACHINE_TYPE \ --scopes=$SCOPE \ --service-account=${SA_EMAIL} \ --metadata-from-file=startup-script=startup-script-${NAME_SUFFIX}.sh,shutdown-script=shutdown-script-${NAME_SUFFIX}.sh diff --git a/.github/scripts/startup-script.sh b/.github/scripts/startup-script.sh index 5ee8470b18..fa782dae38 100644 --- a/.github/scripts/startup-script.sh +++ b/.github/scripts/startup-script.sh @@ -22,30 +22,28 @@ ulimit -n 65536 # increase max virtual memory sudo sysctl -w vm.max_map_count=262144 +# update git +sudo add-apt-repository ppa:git-core/ppa -y +sudo apt update +sudo apt install git -y + # install jq -apt-get update -apt-get -y install jq +sudo apt install jq -y # install maven -sudo apt update sudo apt install git maven -y -# update git -sudo add-apt-repository ppa:git-core/ppa -y -sudo apt-get update -sudo apt-get install git -y - # install gh -sudo type -p curl >/dev/null || (sudo apt update && sudo apt install curl -y) -sudo curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \ +sudo apt install curl -y \ +&& sudo curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \ && sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ && sudo apt update \ && sudo apt install gh -y # install docker -sudo apt-get update -sudo apt-get install \ +sudo apt update +sudo apt install \ ca-certificates \ curl \ gnupg \ @@ -55,8 +53,8 @@ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o echo \ "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null -sudo apt-get update -sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y +sudo apt update +sudo apt install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y # add user to docker group sudo groupadd docker diff --git a/.github/workflows/go-pr.yml b/.github/workflows/go-pr.yml index a7b73b1d06..7e1573f6fd 100644 --- a/.github/workflows/go-pr.yml +++ b/.github/workflows/go-pr.yml @@ -35,9 +35,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Code - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0 - name: Setup Go - uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: '1.21' - name: Run Fmt @@ -53,9 +53,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Code - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0 - name: Setup Go - uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: '1.21' # By nature, this also makes sure that everything builds diff --git a/.github/workflows/java-pr.yml b/.github/workflows/java-pr.yml index 887b3b2ab7..ec617ebae7 100644 --- a/.github/workflows/java-pr.yml +++ b/.github/workflows/java-pr.yml @@ -31,6 +31,10 @@ on: - cron: "0 */12 * * *" workflow_dispatch: +concurrency: + group: java-pr-${{ github.event.issue.number || github.run_id }} + cancel-in-progress: true + env: MAVEN_OPTS: -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=error diff --git a/.github/workflows/prepare-java-cache.yml b/.github/workflows/prepare-java-cache.yml index 399a3e8bd1..33af88c88b 100644 --- a/.github/workflows/prepare-java-cache.yml +++ b/.github/workflows/prepare-java-cache.yml @@ -58,7 +58,7 @@ jobs: run: | echo "CACHE_KEY=''" >> $GITHUB_ENV - name: Checkout Code - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0 - name: Setup Java id: setup-java uses: ./.github/actions/setup-java-env diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 01dd51cd82..4d4657c743 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,7 +23,7 @@ permissions: jobs: release: name: Create Release - runs-on: [self-hosted, it] + runs-on: [self-hosted, release] steps: - name: Get releaser identity run: | diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml index e066e6bbc0..f7cbc7b10e 100644 --- a/.github/workflows/scorecards-analysis.yml +++ b/.github/workflows/scorecards-analysis.yml @@ -22,7 +22,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0 with: persist-credentials: false diff --git a/contributor-docs/maintainers-guide.md b/contributor-docs/maintainers-guide.md index 6099961058..f6cdbe7be5 100644 --- a/contributor-docs/maintainers-guide.md +++ b/contributor-docs/maintainers-guide.md @@ -56,4 +56,73 @@ This repo's code currently is mirrored in Google's internal source control syste 6) Approve the change internally. At this point, the change will get automatically merged internally and externally. We are actively working to deprecate this process and use GitHub as the only source of truth. -If you encounter unresolvable issues with this flow, please reach out to the Dataflow team directly. \ No newline at end of file +If you encounter unresolvable issues with this flow, please reach out to the Dataflow team directly. + +## GitHub actions + +There are several workflows that leverage GitHub actions to keep the repo healthy. Of these workflows, there are +currently 2 that are run on self-hosted runners on GCP - [Java PR](../.github/workflows/java-pr.yml) which is used to +test PR's and [Release](../.github/workflows/release.yml) which is the workflow used for releasing new templates each +week. + +### Provision new runners + +There are instances where we may need to re-provision self-hosted runners, due to unexpected failures, updating +dependencies, increasing memory, etc. In these cases, there are helper scripts to aid in redeployment of the GitHub +actions runners. + +There are 3 scripts: [configure-runners.sh](../.github/scripts/configure-runners.sh), +[startup-script.sh](../.github/scripts/startup-script.sh) and +[shutdown-script.sh](../.github/scripts/shutdown-script.sh). The first is the main script used to provision the runners +themselves. The startup script is what will be invoked by the GCE VM as it is booted up for the first time and will +install all necessary packages needed by IT's, unit tests, Release, etc. as well as link the machine as a runner for the +repo. Likewise, the shutdown script is run when the VM is shutdown. + +To provision GitHub actions runners, there are a couple prerequisites +- Must be a maintainer of the repo +- Must have access to GCP project cloud-teleport-testing + +Things to remember: +- Running the script will tear down existing runners and provision new ones. This will kill any actions currently +running on any of the runners. Failure to spin up new runner correctly will block PR's and Releases, so use carefully. +- After running the script, it is likely the old runners will still be listed under +https://github.com/GoogleCloudPlatform/DataflowTemplates/settings/actions/runners. Simply force remove these to keep the +repo clean +- The commands below will demonstrate how to provision runners for use with our workflows as they exist today. If there +arises a need to provision runners in a different manner, feel free to modify the scripts directly and open a PR with +the necessary changes. + +To run the configuration script: + +1. Set gcloud project to cloud-teleport-testing if not already set + ``` + gcloud config set project cloud-teleport-testing + ``` + +2. Export the GitHub actions token + ``` + GITACTIONS_TOKEN=$(gcloud secrets versions access latest --secret=gitactions-runner-secret) + ``` + +3. Run the script + + * For IT runners: + + ``` + ./configure-runners.sh \ + -p cloud-teleport-testing \ + -a 269744978479-compute@developer.gserviceaccount.com \ + -t $GITACTIONS_TOKEN + ``` + + * For Performance Test Runner + ``` + ./configure-runners.sh \ + -p cloud-teleport-testing \ + -a 269744978479-compute@developer.gserviceaccount.com \ + -t $GITACTIONS_TOKEN \ + -S perf \ + -s 1 + ``` + +**Note**: To see optional configurable parameters, run `./configure-runners.sh -h` \ No newline at end of file diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java index f2b6849caa..76be7ffcdf 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerUtils.java @@ -22,17 +22,18 @@ import com.google.cloud.bigquery.TableId; import java.time.format.DateTimeFormatter; import java.util.regex.Pattern; +import org.apache.commons.lang3.RandomStringUtils; /** Utilities for {@link BigQueryResourceManager} implementations. */ public final class BigQueryResourceManagerUtils { private static final int MAX_DATASET_ID_LENGTH = 1024; private static final Pattern ILLEGAL_DATASET_ID_CHARS = Pattern.compile("[^a-zA-Z0-9_]"); + private static final String REPLACE_CHAR = "_"; private static final int MIN_TABLE_ID_LENGTH = 1; private static final int MAX_TABLE_ID_LENGTH = 1024; private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_]"); - private static final DateTimeFormatter TIME_FORMAT = - DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss_SSSSSS"); + private static final String TIME_FORMAT = "yyyyMMdd_HHmmss"; private BigQueryResourceManagerUtils() {} @@ -46,8 +47,31 @@ private BigQueryResourceManagerUtils() {} * @return a BigQuery compatible dataset name. */ static String generateDatasetId(String datasetName) { + + // Take substring of datasetName to account for random suffix + // TODO(polber) - remove with Beam 2.57.0 + int randomSuffixLength = 6; + datasetName = + datasetName.substring( + 0, + Math.min( + datasetName.length(), + MAX_DATASET_ID_LENGTH + - REPLACE_CHAR.length() + - TIME_FORMAT.length() + - REPLACE_CHAR.length() + - randomSuffixLength)); + + // Add random suffix to avoid collision + // TODO(polber) - remove with Beam 2.57.0 return generateResourceId( - datasetName, ILLEGAL_DATASET_ID_CHARS, "_", MAX_DATASET_ID_LENGTH, TIME_FORMAT); + datasetName, + ILLEGAL_DATASET_ID_CHARS, + REPLACE_CHAR, + MAX_DATASET_ID_LENGTH, + DateTimeFormatter.ofPattern(TIME_FORMAT)) + + REPLACE_CHAR + + RandomStringUtils.randomAlphanumeric(6).toLowerCase(); } /** diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java index 28f1f5bf60..a719805884 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.regex.Pattern; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; +import org.apache.commons.lang3.RandomStringUtils; /** Utilities for {@link BigtableResourceManager} implementations. */ public final class BigtableResourceManagerUtils { @@ -39,8 +40,7 @@ public final class BigtableResourceManagerUtils { private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_.]"); private static final String REPLACE_TABLE_ID_CHAR = "-"; - private static final DateTimeFormatter TIME_FORMAT = - DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS"); + private static final String TIME_FORMAT = "yyyyMMdd-HHmmss"; private BigtableResourceManagerUtils() {} @@ -57,13 +57,35 @@ private BigtableResourceManagerUtils() {} static List generateDefaultClusters( String baseString, String zone, int numNodes, StorageType storageType) { + // Take substring of baseString to account for random suffix + // TODO(polber) - remove with Beam 2.57.0 + int randomSuffixLength = 6; + baseString = + baseString + .toLowerCase() + .substring( + 0, + Math.min( + baseString.length(), + MAX_CLUSTER_ID_LENGTH + - REPLACE_CLUSTER_CHAR.length() + - TIME_FORMAT.length() + - REPLACE_CLUSTER_CHAR.length() + - randomSuffixLength)); + String clusterId = generateResourceId( baseString.toLowerCase(), ILLEGAL_CLUSTER_CHARS, REPLACE_CLUSTER_CHAR, MAX_CLUSTER_ID_LENGTH, - TIME_FORMAT); + DateTimeFormatter.ofPattern(TIME_FORMAT)); + + // Add random suffix to avoid collision + // TODO(polber) - remove with Beam 2.57.0 + clusterId = + clusterId + REPLACE_CLUSTER_CHAR + RandomStringUtils.randomAlphanumeric(6).toLowerCase(); + BigtableResourceManagerCluster cluster = BigtableResourceManagerCluster.create(clusterId, zone, numNodes, storageType); @@ -77,12 +99,31 @@ static List generateDefaultClusters( * @return The instance id string. */ static String generateInstanceId(String baseString) { + + // Take substring of baseString to account for random suffix + // TODO(polber) - remove with Beam 2.57.0 + int randomSuffixLength = 6; + baseString = + baseString.substring( + 0, + Math.min( + baseString.length(), + MAX_INSTANCE_ID_LENGTH + - REPLACE_INSTANCE_ID_CHAR.length() + - TIME_FORMAT.length() + - REPLACE_INSTANCE_ID_CHAR.length() + - randomSuffixLength)); + + // Add random suffix to avoid collision + // TODO(polber) - remove with Beam 2.57.0 return generateResourceId( - baseString.toLowerCase(), - ILLEGAL_INSTANCE_ID_CHARS, - REPLACE_INSTANCE_ID_CHAR, - MAX_INSTANCE_ID_LENGTH, - TIME_FORMAT); + baseString.toLowerCase(), + ILLEGAL_INSTANCE_ID_CHARS, + REPLACE_INSTANCE_ID_CHAR, + MAX_INSTANCE_ID_LENGTH, + DateTimeFormatter.ofPattern(TIME_FORMAT)) + + REPLACE_INSTANCE_ID_CHAR + + RandomStringUtils.randomAlphanumeric(6).toLowerCase(); } /** @@ -92,12 +133,31 @@ static String generateInstanceId(String baseString) { * @return The instance id string. */ public static String generateTableId(String baseString) { + + // Take substring of baseString to account for random suffix + // TODO(polber) - remove with Beam 2.57.0 + int randomSuffixLength = 6; + baseString = + baseString.substring( + 0, + Math.min( + baseString.length(), + MAX_TABLE_ID_LENGTH + - REPLACE_TABLE_ID_CHAR.length() + - TIME_FORMAT.length() + - REPLACE_TABLE_ID_CHAR.length() + - randomSuffixLength)); + + // Add random suffix to avoid collision + // TODO(polber) - remove with Beam 2.57.0 return generateResourceId( - baseString.toLowerCase(), - ILLEGAL_TABLE_CHARS, - REPLACE_TABLE_ID_CHAR, - MAX_TABLE_ID_LENGTH, - TIME_FORMAT); + baseString.toLowerCase(), + ILLEGAL_TABLE_CHARS, + REPLACE_TABLE_ID_CHAR, + MAX_TABLE_ID_LENGTH, + DateTimeFormatter.ofPattern(TIME_FORMAT)) + + REPLACE_TABLE_ID_CHAR + + RandomStringUtils.randomAlphanumeric(6).toLowerCase(); } /** diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java index ec9dd9c256..24b5269f85 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastream/DatastreamResourceManagerUtils.java @@ -21,13 +21,14 @@ import java.time.format.DateTimeFormatter; import java.util.regex.Pattern; +import org.apache.commons.lang3.RandomStringUtils; public class DatastreamResourceManagerUtils { private static final int MAX_RESOURCE_ID_LENGTH = 60; private static final Pattern ILLEGAL_RESOURCE_ID_CHARS = Pattern.compile("[^a-zA-Z0-9- ]"); - private static final DateTimeFormatter TIME_FORMAT = - DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS"); + private static final String REPLACE_CHAR = "_"; + private static final String TIME_FORMAT = "yyyyMMdd-HHmmss"; private DatastreamResourceManagerUtils() {} @@ -41,7 +42,30 @@ private DatastreamResourceManagerUtils() {} * @return a Datastream compatible resource ID. */ static String generateDatastreamId(String resourceId) { + + // Take substring of baseString to account for random suffix + // TODO(polber) - remove with Beam 2.57.0 + int randomSuffixLength = 6; + resourceId = + resourceId.substring( + 0, + Math.min( + resourceId.length(), + MAX_RESOURCE_ID_LENGTH + - REPLACE_CHAR.length() + - TIME_FORMAT.length() + - REPLACE_CHAR.length() + - randomSuffixLength)); + + // Add random suffix to avoid collision + // TODO(polber) - remove with Beam 2.57.0 return generateResourceId( - resourceId, ILLEGAL_RESOURCE_ID_CHARS, "-", MAX_RESOURCE_ID_LENGTH, TIME_FORMAT); + resourceId, + ILLEGAL_RESOURCE_ID_CHARS, + REPLACE_CHAR, + MAX_RESOURCE_ID_LENGTH, + DateTimeFormatter.ofPattern(TIME_FORMAT)) + + REPLACE_CHAR + + RandomStringUtils.randomAlphanumeric(6).toLowerCase(); } } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java index 3dce40511a..9e40637e18 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtils.java @@ -25,6 +25,7 @@ import java.util.regex.Pattern; import org.apache.beam.it.gcp.spanner.SpannerResourceManager; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CharMatcher; +import org.apache.commons.lang3.RandomStringUtils; /** Utilities for {@link SpannerResourceManager} implementations. */ public final class SpannerResourceManagerUtils { @@ -34,10 +35,8 @@ public final class SpannerResourceManagerUtils { private static final String REPLACE_DATABASE_CHAR = "_"; public static final int MAX_INSTANCE_ID_LENGTH = 30; public static final int MAX_DATABASE_ID_LENGTH = 30; - private static final DateTimeFormatter INSTANCE_TIME_FORMAT = - DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS"); - private static final DateTimeFormatter DATABASE_TIME_FORMAT = - DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss_SSSSSS"); + private static final String INSTANCE_TIME_FORMAT = "yyyyMMdd-HHmmss"; + private static final String DATABASE_TIME_FORMAT = "yyyyMMdd_HHmmss"; private SpannerResourceManagerUtils() {} @@ -50,13 +49,27 @@ private SpannerResourceManagerUtils() {} public static String generateDatabaseId(String baseString) { checkArgument(baseString.length() != 0, "baseString cannot be empty!"); + // Take substring of baseString to account for random suffix + // TODO(polber) - remove with Beam 2.57.0 + int randomSuffixLength = 6; + baseString = + baseString.substring( + 0, + Math.min( + baseString.length(), + MAX_DATABASE_ID_LENGTH + - REPLACE_DATABASE_CHAR.length() + - DATABASE_TIME_FORMAT.length() + - REPLACE_DATABASE_CHAR.length() + - randomSuffixLength)); + String databaseId = generateResourceId( baseString, ILLEGAL_DATABASE_CHARS, REPLACE_DATABASE_CHAR, MAX_DATABASE_ID_LENGTH, - DATABASE_TIME_FORMAT); + DateTimeFormatter.ofPattern(DATABASE_TIME_FORMAT)); // replace hyphen with underscore, so there's no need for backticks String trimmed = CharMatcher.is('_').trimTrailingFrom(databaseId); @@ -71,6 +84,14 @@ public static String generateDatabaseId(String baseString) { if (!Character.isLetter(trimmed.charAt(0))) { trimmed = padding + trimmed.substring(1); } + + // Add random suffix to avoid collision + // TODO(polber) - remove with Beam 2.57.0 + trimmed = + trimmed + + REPLACE_DATABASE_CHAR + + RandomStringUtils.randomAlphanumeric(randomSuffixLength).toLowerCase(); + return trimmed; } @@ -81,13 +102,28 @@ public static String generateDatabaseId(String baseString) { * @return The instance id string. */ public static String generateInstanceId(String baseString) { + + // Take substring of baseString to account for random suffix + // TODO(polber) - remove with Beam 2.57.0 + int randomSuffixLength = 6; + baseString = + baseString.substring( + 0, + Math.min( + baseString.length(), + MAX_INSTANCE_ID_LENGTH + - REPLACE_INSTANCE_CHAR.length() + - INSTANCE_TIME_FORMAT.length() + - REPLACE_INSTANCE_CHAR.length() + - randomSuffixLength)); + String instanceId = generateResourceId( baseString, ILLEGAL_INSTANCE_CHARS, REPLACE_INSTANCE_CHAR, MAX_INSTANCE_ID_LENGTH, - INSTANCE_TIME_FORMAT); + DateTimeFormatter.ofPattern(INSTANCE_TIME_FORMAT)); // if first char is not a letter, replace with letter, so it doesn't // violate spanner's instance naming rules @@ -96,6 +132,11 @@ public static String generateInstanceId(String baseString) { instanceId = padding + instanceId.substring(1); } + // Add random suffix to avoid collision + // TODO(polber) - remove with Beam 2.57.0 + instanceId = + instanceId + REPLACE_INSTANCE_CHAR + RandomStringUtils.randomAlphanumeric(6).toLowerCase(); + return instanceId; } } diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java index e9cd252387..039c63b33f 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java @@ -87,7 +87,8 @@ public void testGetProjectIdReturnsCorrectValue() { public void testGetDatasetIdReturnsCorrectValue() { BigQueryResourceManager tm = BigQueryResourceManager.builder(TEST_ID, PROJECT_ID, null).build(); - assertThat(tm.getDatasetId()).matches(TEST_ID.replace('-', '_') + "_\\d{8}_\\d{6}_\\d{6}"); + assertThat(tm.getDatasetId()) + .matches(TEST_ID.replace('-', '_') + "_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java index 74b25e84c6..7842b99d29 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java @@ -95,7 +95,7 @@ public void testCreateResourceManagerCreatesCorrectIdValues() throws IOException BigtableResourceManager.builder(TEST_ID, PROJECT_ID, null), bigtableResourceManagerClientFactory); - assertThat(rm.getInstanceId()).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}"); + assertThat(rm.getInstanceId()).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}"); assertThat(rm.getProjectId()).matches(PROJECT_ID); } diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java index 9634dc2d04..d0e8c32917 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtilsTest.java @@ -41,7 +41,7 @@ public void testGenerateDefaultClustersShouldWorkWhenAllParametersValid() { generateDefaultClusters(TEST_ID, ZONE, NUM_NODES, STORAGE_TYPE); BigtableResourceManagerCluster thisCluster = cluster.iterator().next(); - assertThat(thisCluster.clusterId()).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}"); + assertThat(thisCluster.clusterId()).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}"); assertThat(thisCluster.zone()).isEqualTo(ZONE); assertThat(thisCluster.numNodes()).isEqualTo(NUM_NODES); assertThat(thisCluster.storageType()).isEqualTo(STORAGE_TYPE); @@ -58,7 +58,8 @@ public void testGenerateDefaultClustersShouldThrowErrorWhenTestIdIsEmpty() { public void testGenerateDefaultClustersShouldShortenTestIdWhenTooLong() { Iterable cluster = generateDefaultClusters("longer-id", ZONE, NUM_NODES, STORAGE_TYPE); - assertThat(cluster.iterator().next().clusterId()).matches("longer--\\d{8}-\\d{6}-\\d{6}"); + assertThat(cluster.iterator().next().clusterId()) + .matches("longer--\\d{8}-\\d{6}-[a-zA-Z0-9]{6}"); } @Test diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java index b361f13cfd..7f79db449a 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/SpannerResourceManagerTest.java @@ -185,9 +185,9 @@ public void testExecuteDdlStatementShouldWorkWhenSpannerDoesntThrowAnyError() String actualDatabaseId = databaseIdCaptor.getValue(); Iterable actualStatement = statementCaptor.getValue(); - assertThat(actualInstanceId).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}"); + assertThat(actualInstanceId).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}"); - assertThat(actualDatabaseId).matches(TEST_ID + "_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actualDatabaseId).matches(TEST_ID + "_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); assertThat(actualStatement).containsExactlyElementsIn(ImmutableList.of(statement)); } diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java index cd1a418a18..4381befc3a 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/spanner/utils/SpannerResourceManagerUtilsTest.java @@ -18,6 +18,8 @@ package org.apache.beam.it.gcp.spanner.utils; import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.MAX_DATABASE_ID_LENGTH; +import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.MAX_INSTANCE_ID_LENGTH; import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.generateDatabaseId; import static org.apache.beam.it.gcp.spanner.utils.SpannerResourceManagerUtils.generateInstanceId; import static org.junit.Assert.assertThrows; @@ -36,7 +38,17 @@ public void testGenerateInstanceIdShouldReplaceNonLetterFirstCharWithLetter() { String actual = generateInstanceId(testBaseString); - assertThat(actual).matches("[a-z]-test-\\d{8}-\\d{6}-\\d{6}"); + assertThat(actual).matches("[a-z]-test-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}"); + } + + @Test + public void testGenerateInstanceIdShouldConcatLongId() { + String testBaseString = "test-really_long-database-id"; + + String actual = generateInstanceId(testBaseString); + + assertThat(actual).matches("test-re-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}"); + assertThat(actual.length()).isEqualTo(MAX_INSTANCE_ID_LENGTH); } @Test @@ -45,7 +57,7 @@ public void testGenerateDatabaseIdShouldNotReplaceDigitLastCharWithLetter() { String actual = generateDatabaseId(testBaseString); - assertThat(actual).matches("db_0_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actual).matches("db_0_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test @@ -54,7 +66,7 @@ public void testGenerateDatabaseIdShouldReplaceDollarSignWithUnderscore() { String actual = generateDatabaseId(testBaseString); - assertThat(actual).matches("t_db_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actual).matches("t_db_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test @@ -63,7 +75,7 @@ public void testGenerateDatabaseIdShouldReplaceDotWithUnderscore() { String actual = generateDatabaseId(testBaseString); - assertThat(actual).matches("test_da_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actual).matches("test_da_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test @@ -72,7 +84,7 @@ public void testGenerateDatabaseIdShouldReplaceHyphenWithUnderscore() { String actual = generateDatabaseId(testBaseString); - assertThat(actual).matches("test_da_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actual).matches("test_da_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test @@ -81,7 +93,7 @@ public void testGenerateDatabaseIdShouldReplaceNonLetterFirstCharWithLetter() { String actual = generateDatabaseId(testBaseString); - assertThat(actual).matches("[a-z]_datab_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actual).matches("[a-z]_datab_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test @@ -90,7 +102,7 @@ public void testGenerateDatabaseIdShouldReplaceUpperCaseLettersWithLowerCase() { String actual = generateDatabaseId(testBaseString); - assertThat(actual).matches("tda_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actual).matches("tda_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test @@ -99,7 +111,7 @@ public void testGenerateDatabaseIdShouldTrimTrailingUnderscore() { String actual = generateDatabaseId(testBaseString); - assertThat(actual).matches("test_da_\\d{8}_\\d{6}_\\d{6}"); + assertThat(actual).matches("test_da_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); } @Test @@ -108,4 +120,14 @@ public void testGenerateDatabaseIdShouldThrowErrorWithEmptyInput() { assertThrows(IllegalArgumentException.class, () -> generateDatabaseId(testBaseString)); } + + @Test + public void testGenerateDatabaseIdShouldConcatLongId() { + String testBaseString = "test_really_long_database_id"; + + String actual = generateDatabaseId(testBaseString); + + assertThat(actual).matches("test_re_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}"); + assertThat(actual.length()).isEqualTo(MAX_DATABASE_ID_LENGTH); + } } diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java index 2487650331..6bbb8d58f5 100644 --- a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java +++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/PythonDockerfileGenerator.java @@ -26,7 +26,6 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; @@ -67,79 +66,4 @@ public static void generateDockerfile( throw e; } } - - public static void generateXlangDockerfile( - String basePythonContainerImage, - String containerName, - File targetDirectory, - File artifactFile, - String commandSpec, - String beamVersion) - throws IOException, TemplateException { - Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_32); - freemarkerConfig.setDefaultEncoding("UTF-8"); - freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER); - freemarkerConfig.setLogTemplateExceptions(true); - freemarkerConfig.setClassForTemplateLoading(PythonDockerfileGenerator.class, "/"); - - String classesDirectory = targetDirectory.getPath() + "/classes"; - Map parameters = new HashMap<>(); - parameters.put("baseContainerImage", basePythonContainerImage); - parameters.put("commandSpec", commandSpec); - parameters.put("containerName", containerName); - parameters.put("beamVersion", beamVersion); - - Template template = freemarkerConfig.getTemplate("Dockerfile-xlang-template"); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - OutputStreamWriter writer = new OutputStreamWriter(baos); - - try { - template.process(parameters, writer); - writer.flush(); - - Files.createDirectories(Path.of(classesDirectory + "/" + containerName)); - - Files.write( - Path.of(classesDirectory + "/" + containerName + "/Dockerfile"), - baos.toString(StandardCharsets.UTF_8).getBytes()); - - } catch (Exception e) { - LOG.warning("Unable to generate Dockerfile for " + containerName); - throw e; - } - - try { - Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/classpath")); - Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/libs")); - - String artifactPath = artifactFile.getPath(); - String targetArtifactPath = - artifactPath.substring(artifactPath.lastIndexOf("/"), artifactPath.length()); - - Files.copy( - Path.of(targetDirectory.getPath() + targetArtifactPath), - Path.of(classesDirectory + "/" + containerName + "/classpath" + targetArtifactPath)); - String sourceLibsDirectory = targetDirectory.getPath() + "/extra_libs"; - String destLibsDirectory = classesDirectory + "/" + containerName + "/libs/"; - Files.walk(Paths.get(sourceLibsDirectory)) - .forEach( - source -> { - LOG.warning("current source: " + source.toString()); - LOG.warning("current source libs directory: " + sourceLibsDirectory); - Path dest = - Paths.get( - destLibsDirectory, - source.toString().substring(sourceLibsDirectory.length())); - try { - Files.copy(source, dest); - } catch (IOException e) { - LOG.warning("Unable to copy contents of " + sourceLibsDirectory); - } - }); - } catch (Exception e) { - LOG.warning("unable to copy jar files"); - throw e; - } - } } diff --git a/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java new file mode 100644 index 0000000000..a8738a0732 --- /dev/null +++ b/plugins/core-plugin/src/main/java/com/google/cloud/teleport/plugin/XlangDockerfileGenerator.java @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.plugin; + +import freemarker.template.Configuration; +import freemarker.template.Template; +import freemarker.template.TemplateException; +import freemarker.template.TemplateExceptionHandler; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Logger; + +/** Utility class that generates a simple Dockerfile for Python templates. */ +public class XlangDockerfileGenerator { + private static final Logger LOG = Logger.getLogger(XlangDockerfileGenerator.class.getName()); + + private XlangDockerfileGenerator() {} + + public static void generateDockerfile( + String baseJavaContainerImage, + String beamVersion, + String pythonVersion, + String containerName, + File targetDirectory, + File artifactFile, + String commandSpec) + throws IOException, TemplateException { + Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_32); + freemarkerConfig.setDefaultEncoding("UTF-8"); + freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER); + freemarkerConfig.setLogTemplateExceptions(true); + freemarkerConfig.setClassForTemplateLoading(PythonDockerfileGenerator.class, "/"); + + String classesDirectory = targetDirectory.getPath() + "/classes"; + Map parameters = new HashMap<>(); + parameters.put("baseJavaContainerImage", baseJavaContainerImage); + parameters.put("beamVersion", beamVersion); + parameters.put("pythonVersion", pythonVersion); + parameters.put("commandSpec", commandSpec); + parameters.put("containerName", containerName); + + Template template = freemarkerConfig.getTemplate("Dockerfile-template-xlang"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + OutputStreamWriter writer = new OutputStreamWriter(baos); + + try { + template.process(parameters, writer); + writer.flush(); + + Files.createDirectories(Path.of(classesDirectory + "/" + containerName)); + + Files.write( + Path.of(classesDirectory + "/" + containerName + "/Dockerfile"), + baos.toString(StandardCharsets.UTF_8).getBytes()); + + } catch (Exception e) { + LOG.warning("Unable to generate Dockerfile for " + containerName); + throw e; + } + + try { + Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/classpath")); + Files.createDirectories(Path.of(classesDirectory + "/" + containerName + "/libs")); + + String artifactPath = artifactFile.getPath(); + String targetArtifactPath = + artifactPath.substring(artifactPath.lastIndexOf("/"), artifactPath.length()); + + Files.copy( + Path.of(targetDirectory.getPath() + targetArtifactPath), + Path.of(classesDirectory + "/" + containerName + "/classpath" + targetArtifactPath)); + String sourceLibsDirectory = targetDirectory.getPath() + "/extra_libs"; + String destLibsDirectory = classesDirectory + "/" + containerName + "/libs/"; + Files.walk(Paths.get(sourceLibsDirectory)) + .forEach( + source -> { + LOG.warning("current source: " + source.toString()); + LOG.warning("current source libs directory: " + sourceLibsDirectory); + Path dest = + Paths.get( + destLibsDirectory, + source.toString().substring(sourceLibsDirectory.length())); + try { + Files.copy(source, dest); + } catch (IOException e) { + LOG.warning("Unable to copy contents of " + sourceLibsDirectory); + } + }); + } catch (Exception e) { + LOG.warning("unable to copy jar files"); + throw e; + } + } +} diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-template-xlang b/plugins/core-plugin/src/main/resources/Dockerfile-template-xlang new file mode 100644 index 0000000000..1bee40db15 --- /dev/null +++ b/plugins/core-plugin/src/main/resources/Dockerfile-template-xlang @@ -0,0 +1,72 @@ +#===================================================================# +# Create build environment from base Python template launcher image # +#===================================================================# +FROM gcr.io/dataflow-templates-base/python311-template-launcher-base:latest as python-base + +# Build args +ARG WORKDIR=/template +ARG REQUIREMENTS_FILE=requirements.txt +ARG BEAM_VERSION=${beamVersion} +ARG BEAM_PACKAGE=apache-beam[dataframe,gcp,test,yaml]==$BEAM_VERSION +ARG PY_VERSION=${pythonVersion} + +# Copy template files to /template +RUN mkdir -p $WORKDIR +COPY ${containerName}-generated-metadata.json $REQUIREMENTS_FILE* /template/ +COPY ${containerName}/ /template/${containerName}/ +WORKDIR $WORKDIR + +# Create requirements.txt file if not provided +RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi + +# Install dependencies to launch the pipeline and download to reduce startup time +RUN python -m venv /venv \ + && /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \ + && /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \ + && /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \ + && rm -rf /usr/local/lib/python$PY_VERSION/site-packages \ + && cp -r /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/ + # change to mv once Beam 2.57.0 is released + + +#============================================================# +# Create Distroless xlang image compatible with YamlTemplate # +#============================================================# +FROM ${baseJavaContainerImage} + +# Build args +ARG CHIPSET_ARCH=x86_64-linux-gnu +ARG PY_VERSION=${pythonVersion} + +# Set environment variables +ENV DATAFLOW_JAVA_COMMAND_SPEC=${commandSpec} +ENV PIP_NO_DEPS=True + +# Copy template, python wheels and python launcher script from python-base +COPY --from=python-base /template /template +COPY --from=python-base /tmp/dataflow-requirements-cache /tmp/dataflow-requirements-cache +COPY --from=python-base /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher + +# Copy python and installed packages from python-base +COPY --from=python-base /usr/local/bin/python$PY_VERSION /usr/local/bin/python +COPY --from=python-base /usr/local/lib/python$PY_VERSION /usr/local/lib/python$PY_VERSION + +# Workaround until Beam 2.57.0 is released +COPY --from=python-base /venv /root/.apache_beam/cache/venvs/py-$PY_VERSION-beam-${beamVersion}-da39a3ee5e6b4b0d3255bfef95601890afd80709 + +# Copy required shared libraries from python-base +COPY --from=python-base /lib/$CHIPSET_ARCH/ld-*so* /lib64/ +COPY --from=python-base /lib/$CHIPSET_ARCH/lib*so* /lib/$CHIPSET_ARCH/ +COPY --from=python-base /usr/lib/$CHIPSET_ARCH/libffi* /usr/lib/$CHIPSET_ARCH/ +COPY --from=python-base /usr/local/lib/libpython$PY_VERSION* /usr/local/lib/ + +# Copy minimal commands from python-base needed to execute template +COPY --from=python-base /bin/dash /bin/sh +COPY --from=python-base /usr/bin/which.debianutils /usr/bin/which + +# Copy licenses +COPY --from=python-base /usr/licenses/ /usr/licenses/ + +WORKDIR /template + +ENTRYPOINT ["/opt/google/dataflow/java_template_launcher"] \ No newline at end of file diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-xlang-template b/plugins/core-plugin/src/main/resources/Dockerfile-xlang-template deleted file mode 100644 index d5ceefb049..0000000000 --- a/plugins/core-plugin/src/main/resources/Dockerfile-xlang-template +++ /dev/null @@ -1,51 +0,0 @@ -#========================================================================# -# Create build environment from base Python 3.11 template launcher image # -#========================================================================# -FROM gcr.io/dataflow-templates-base/java11-template-launcher-base-distroless as java-base - - -#============================================================# -# Create Distroless xlang image compatible with YamlTemplate # -#============================================================# -FROM gcr.io/dataflow-templates-base/python311-template-launcher-base:latest - - -ARG REQUIREMENTS_FILE=requirements.txt - -COPY --from=busybox:1.35.0-uclibc /bin/sh /bin/sh -# Copy template files to /template -ARG WORKDIR=/template -RUN mkdir -p $WORKDIR -COPY $REQUIREMENTS_FILE /template -COPY ${containerName}/ /template/${containerName}/ -COPY ${containerName}-generated-metadata.json /template - -WORKDIR $WORKDIR - -# Set environment variables -ENV DATAFLOW_JAVA_COMMAND_SPEC=${commandSpec} -ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt - -# SHA hash here equates to null and is independent of beam version. This hash is generated based on the values of the -# .withExtraPackages() call made in the PythonExternalTextTransform class. We do not utilize this function meaning that -# this hash will remain constant. -ENV SITE_PACKAGES=/root/.apache_beam/cache/venvs/py-3.11-beam-${beamVersion}-da39a3ee5e6b4b0d3255bfef95601890afd80709 -RUN python -m venv $SITE_PACKAGES - -# pip install dependencies and cache wheels -RUN source $SITE_PACKAGES/bin/activate \ - && pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir --upgrade setuptools \ - && pip install --no-cache-dir pyparsing==2.4.2 \ - && pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \ - && pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE - -COPY --from=java-base /usr/local/jdk-11.0.20+8 /usr/local/jdk-11.0.20+8 -RUN ln -s /usr/local/jdk-11.0.20+8/bin/java /usr/local/bin/java -COPY --from=java-base /opt/google/dataflow/java_template_launcher /opt/google/dataflow/java_template_launcher -COPY --from=java-base /usr/licenses/LICENSE-java11 /usr/licenses/LICENSE-java11 - -WORKDIR /template - -ENV PIP_NO_DEPS=True -ENTRYPOINT ["/opt/google/dataflow/java_template_launcher"] \ No newline at end of file diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java new file mode 100644 index 0000000000..f5598f7e91 --- /dev/null +++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/XlangDockerfileGeneratorTest.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.plugin; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertTrue; + +import com.google.common.base.Charsets; +import com.google.common.io.Files; +import freemarker.template.TemplateException; +import java.io.File; +import java.io.IOException; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for class {@link XlangDockerfileGenerator}. */ +@RunWith(JUnit4.class) +public class XlangDockerfileGeneratorTest { + private final File outputFolder = Files.createTempDir().getAbsoluteFile(); + + @Test + public void testGenerateDockerfile() throws IOException, TemplateException { + new File(outputFolder.getAbsolutePath() + "/containerName").mkdirs(); + new File(outputFolder.getAbsolutePath() + "/extra_libs/example").mkdirs(); + File artifactPath = new File(outputFolder.getAbsolutePath() + "/artifactPath"); + artifactPath.mkdirs(); + XlangDockerfileGenerator.generateDockerfile( + "a java container image", + "beam_version", + "py_version", + "containerName", + outputFolder, + artifactPath, + "command_spec"); + File outputFile = + new File(outputFolder.getAbsolutePath() + "/classes/containerName/Dockerfile"); + + assertTrue(outputFile.exists()); + String fileContents = Files.toString(outputFile, Charsets.UTF_8); + assertThat(fileContents).contains("FROM a java container image"); + assertThat(fileContents).contains("=beam_version"); + assertThat(fileContents).contains("=py_version"); + } +} diff --git a/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java b/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java index 2fc1244ead..53d1397e41 100644 --- a/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java +++ b/plugins/templates-maven-plugin/src/main/java/com/google/cloud/teleport/plugin/maven/TemplatesStageMojo.java @@ -30,6 +30,7 @@ import com.google.cloud.teleport.plugin.TemplateDefinitionsParser; import com.google.cloud.teleport.plugin.TemplatePluginUtils; import com.google.cloud.teleport.plugin.TemplateSpecsGenerator; +import com.google.cloud.teleport.plugin.XlangDockerfileGenerator; import com.google.cloud.teleport.plugin.YamlDockerfileGenerator; import com.google.cloud.teleport.plugin.model.ImageSpec; import com.google.cloud.teleport.plugin.model.TemplateDefinitions; @@ -483,15 +484,17 @@ private void stageFlexJavaTemplate( String dockerfileContainer = outputClassesDirectory.getPath() + "/" + containerName; String dockerfilePath = dockerfileContainer + "/Dockerfile"; String xlangCommandSpec = "/template/" + containerName + "/resources/" + commandSpecFileName; + String beamVersion = project.getProperties().getProperty("beam.version"); File dockerfile = new File(dockerfilePath); if (!dockerfile.exists()) { - PythonDockerfileGenerator.generateXlangDockerfile( - basePythonContainerImage, + XlangDockerfileGenerator.generateDockerfile( + baseContainerImage, + beamVersion, + pythonVersion, containerName, targetDirectory, project.getArtifact().getFile(), - xlangCommandSpec, - project.getProperties().getProperty("beam.version")); + xlangCommandSpec); } LOG.info("Staging XLANG image using Dockerfile"); stageXlangUsingDockerfile(imagePath, containerName + "/Dockerfile"); diff --git a/pom.xml b/pom.xml index 0492523cad..aa01852b89 100644 --- a/pom.xml +++ b/pom.xml @@ -358,6 +358,7 @@ **/*AutoValue_* + **/*Exception.* diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java index b75dfa47e2..eee28497ae 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/JavascriptTextTransformer.java @@ -105,7 +105,7 @@ void setJavascriptTextTransformFunctionName( optional = true, description = "JavaScript UDF auto-reload interval (minutes)", helpText = - "Specifies how frequently to reload the UDF, in minutes. If the value is greater than 0, Dataflow periodically checks the UDF file in Cloud Storage, and reloads the UDF if the file is modified. This parameter allows you to update the UDF while the pipeline is running, without needing to restart the job. If the value is 0, UDF reloading is disabled. The default value is 0.") + "Define the interval that workers may check for JavaScript UDF changes to reload the files.") @Default.Integer(0) ValueProvider getJavascriptTextTransformReloadIntervalMinutes(); diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java index 21a00f7eb8..a29d67b18b 100644 --- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java +++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java @@ -424,12 +424,12 @@ private static void validateSourceType(Options options) { options.setDatastreamSourceType(sourceType); } - private static String getSourceType(Options options) { + static String getSourceType(Options options) { if (options.getDatastreamSourceType() != null) { return options.getDatastreamSourceType(); } if (options.getStreamName() == null) { - throw new IllegalArgumentException("Stream name cannot be empty. "); + throw new IllegalArgumentException("Stream name cannot be empty."); } GcpOptions gcpOptions = options.as(GcpOptions.class); DataStreamClient datastreamClient; diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java index bf29ae000e..4e0e05e529 100644 --- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java +++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactory.java @@ -38,12 +38,17 @@ private static String getSourceType(JsonNode changeEvent) throws InvalidChangeEv public static ChangeEventContext createChangeEventContext( JsonNode changeEvent, Ddl ddl, String shadowTablePrefix, String sourceType) throws ChangeEventConvertorException, InvalidChangeEventException { - - if (!sourceType.equals(getSourceType(changeEvent))) { + String sourceTypeFromChangeEvent; + try { + sourceTypeFromChangeEvent = getSourceType(changeEvent); + } catch (Exception e) { + throw new InvalidChangeEventException(e); + } + if (!sourceType.equals(sourceTypeFromChangeEvent)) { throw new InvalidChangeEventException( "Change event with invalid source. " + "Actual(" - + getSourceType(changeEvent) + + sourceTypeFromChangeEvent + "), Expected(" + sourceType + ")"); diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java index 8ceac94623..e06796b080 100644 --- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java +++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchema.java @@ -199,5 +199,19 @@ List getDataTablesWithNoShadowTables(Ddl ddl) { .filter(f -> !existingShadowTables.contains(shadowTablePrefix + f)) .collect(Collectors.toList()); } + + /* + Added for the purpose of unit testing + */ + public void setDialect(Dialect dialect) { + this.dialect = dialect; + } + + /* + Added for the purpose of unit testing + */ + public void setSpannerAccessor(SpannerAccessor spannerAccessor) { + this.spannerAccessor = spannerAccessor; + } } } diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java index 6c886aadb3..d1997ba376 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerDatatypeIT.java @@ -87,7 +87,7 @@ public void setUp() throws IOException { pubsubResourceManager, new HashMap<>() { { - put("inputFileFormat", "json"); + put("inputFileFormat", "avro"); } }); } @@ -108,7 +108,7 @@ public static void cleanUp() throws IOException { } @Test - public void migrationTestWithAllDatatypeDefaultMapping() { + public void migrationTestWithAllDatatypeConversionMapping() { // Construct a ChainedConditionCheck with 4 stages. // 1. Send initial wave of events // 2. Wait on Spanner to have events @@ -118,8 +118,8 @@ public void migrationTestWithAllDatatypeDefaultMapping() { uploadDataStreamFile( jobInfo, TABLE1, - "backfill.jsonl", - "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.jsonl"), + "backfill.avro", + "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE1) .setMinRows(2) .setMaxRows(2) @@ -142,8 +142,13 @@ public void migrationTestWithAllDatatypeDefaultMapping() { uploadDataStreamFile( jobInfo, TABLE1, - "cdc1.jsonl", - "DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl"), + "cdc1.avro", + "DataStreamToSpannerDatatypeIT/mysql-cdc1-AllDatatypeColumns.avro"), + uploadDataStreamFile( + jobInfo, + TABLE1, + "cdc2.avro", + "DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE1) .setMinRows(1) .setMaxRows(1) @@ -161,7 +166,7 @@ public void migrationTestWithAllDatatypeDefaultMapping() { } @Test - public void migrationTestWithAllDatatypeConversionMapping() { + public void migrationTestWithAllDatatypeDefaultMapping() { // Construct a ChainedConditionCheck with 4 stages. // 1. Send initial wave of events // 2. Wait on Spanner to have events @@ -171,8 +176,8 @@ public void migrationTestWithAllDatatypeConversionMapping() { uploadDataStreamFile( jobInfo, TABLE2, - "backfill.jsonl", - "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl"), + "backfill.avro", + "DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE2) .setMinRows(2) .setMaxRows(2) @@ -195,8 +200,8 @@ public void migrationTestWithAllDatatypeConversionMapping() { uploadDataStreamFile( jobInfo, TABLE2, - "cdc1.jsonl", - "DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.jsonl"), + "cdc1.avro", + "DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE2) .setMinRows(1) .setMaxRows(1) @@ -219,72 +224,83 @@ private void assertAllDatatypeColumnsTableBackfillContents() { Map row = new HashMap<>(); row.put("varchar_column", "value1"); row.put("tinyint_column", "10"); - row.put("text_column", "dGV4dF9kYXRhXzEK"); - row.put("date_column", "2024-02-08T00:00:00.000Z"); + row.put("date_column", "2024-02-08T00:00:00Z"); row.put("smallint_column", "50"); row.put("mediumint_column", "1000"); row.put("int_column", "50000"); row.put("bigint_column", "987654321"); row.put("float_column", "45.67"); row.put("double_column", "123.789"); - row.put("datetime_column", "2024-02-08T08:15:30.000Z"); - row.put("timestamp_column", "2024-02-08T08:15:30.000Z"); + row.put("decimal_column", "456.12"); + row.put("datetime_column", "2024-02-08T08:15:30Z"); + row.put("timestamp_column", "2024-02-08T08:15:30Z"); row.put("time_column", "29730000000"); row.put("year_column", "2022"); - row.put("char_column", "Y2hhcjEK"); + // text, char, tinytext, mediumtext, longtext are BYTE columns + row.put("text_column", "/u/9n58P"); + row.put("char_column", "v58P"); + row.put("tinytext_column", "7+/+7/2fnw8="); + row.put("mediumtext_column", "/+3v79/v2vrx"); + row.put("longtext_column", "/+/v3+/a+vE="); row.put("tinyblob_column", "74696e79626c6f625f646174615f31"); - row.put("tinytext_column", "dGlueXRleHRfZGF0YV8xCg=="); row.put("blob_column", "626c6f625f646174615f31"); row.put("mediumblob_column", "6d656469756d626c6f625f646174615f31"); - row.put("mediumtext_column", "bWVkaXVtdGV4dF9kYXRhXzE="); row.put("longblob_column", "6c6f6e67626c6f625f646174615f31"); - row.put("longtext_column", "bG9uZ3RleHRfZGF0YV8x"); row.put("enum_column", "2"); row.put("bool_column", 0); row.put("other_bool_column", "1"); - row.put("binary_column", "62696e6172795f31"); + // The result which is shown in the matcher does not contain the full 40 characters + // of the binary and the ending characters seem to be getting truncated. + // Have manually verified that the values in spanner and source are identical for all the + // 40 characters. + // TODO: This is likely an issue with the matcher, figure out why this is happening. + row.put("binary_column", "62696e6172795f3100000000000000000..."); row.put("varbinary_column", "76617262696e6172795f646174615f31"); row.put("bit_column", "102"); events.add(row); + row.clear(); row.put("varchar_column", "value2"); row.put("tinyint_column", "5"); - row.put("text_column", "dGV4dF9kYXRhXzIK"); - row.put("date_column", "2024-02-09T00:00:00.000Z"); + row.put("date_column", "2024-02-09T00:00:00Z"); row.put("smallint_column", "25"); row.put("mediumint_column", "500"); row.put("int_column", "25000"); row.put("bigint_column", "987654"); row.put("float_column", "12.34"); row.put("double_column", "56.789"); - row.put("datetime_column", "2024-02-09T15:30:45.000Z"); - row.put("timestamp_column", "2024-02-09T15:30:45.000Z"); + row.put("decimal_column", 123.45); + row.put("datetime_column", "2024-02-09T15:30:45Z"); + row.put("timestamp_column", "2024-02-09T15:30:45Z"); row.put("time_column", "55845000000"); row.put("year_column", "2023"); - row.put("char_column", "Y2hhcjIK"); + // text, char, tinytext, mediumtext, longtext are BYTE columns + row.put("text_column", "/u/9n58f"); + row.put("char_column", "v58f"); + row.put("tinytext_column", "7+/+7/2fnx8="); + row.put("mediumtext_column", "/+3v79/v2vry"); + row.put("longtext_column", "/+/v3+/a+vI="); row.put("tinyblob_column", "74696e79626c6f625f646174615f32"); - row.put("tinytext_column", "dGlueXRleHRfZGF0YV8yCg=="); row.put("blob_column", "626c6f625f646174615f32"); row.put("mediumblob_column", "6d656469756d626c6f625f646174615f32"); - row.put("mediumtext_column", "bWVkaXVtdGV4dF9kYXRhXzI="); row.put("longblob_column", "6c6f6e67626c6f625f646174615f32"); - row.put("longtext_column", "bG9uZ3RleHRfZGF0YV8y"); row.put("enum_column", "3"); row.put("bool_column", 1); row.put("other_bool_column", "0"); - row.put("binary_column", "62696e6172795f32"); + row.put("binary_column", "62696e6172795f3200000000000000000..."); row.put("varbinary_column", "76617262696e6172795f646174615f32"); row.put("bit_column", "25"); events.add(row); SpannerAsserts.assertThatStructs( spannerResourceManager.runQuery( - "select varchar_column, tinyint_column, text_column, date_column" + "select varchar_column, tinyint_column, date_column" + ", smallint_column, mediumint_column, int_column, bigint_column, float_column" - + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column" - + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, " - + " longblob_column, longtext_column, enum_column, bool_column, other_bool_column, binary_column" - + ", varbinary_column, bit_column from AllDatatypeColumns")) + + ", double_column, datetime_column, timestamp_column, time_column, year_column" + + ", tinyblob_column, blob_column, mediumblob_column" + + ", longblob_column, enum_column, bool_column, other_bool_column" + + ", varbinary_column, bit_column, decimal_column, text_column, binary_column" + + ", char_column, tinytext_column, mediumtext_column, longtext_column from AllDatatypeColumns")) .hasRecordsUnorderedCaseInsensitiveColumns(events); } @@ -294,42 +310,45 @@ private void assertAllDatatypeColumnsTableCdcContents() { Map row = new HashMap<>(); row.put("varchar_column", "value1"); row.put("tinyint_column", "15"); - row.put("text_column", "dGV4dF9kYXRhXzEK"); - row.put("date_column", "2024-02-08T00:00:00.000Z"); + row.put("date_column", "2024-02-08T00:00:00Z"); row.put("smallint_column", "50"); row.put("mediumint_column", "1000"); row.put("int_column", "50000"); row.put("bigint_column", "987654321"); row.put("float_column", "45.67"); row.put("double_column", "123.789"); - row.put("datetime_column", "2024-02-08T08:15:30.000Z"); - row.put("timestamp_column", "2024-02-08T08:15:30.000Z"); + row.put("decimal_column", "456.12"); + row.put("datetime_column", "2024-02-08T08:15:30Z"); + row.put("timestamp_column", "2024-02-08T08:15:30Z"); row.put("time_column", "29730000000"); row.put("year_column", "2022"); - row.put("char_column", "Y2hhcjEK"); + // text, char, tinytext, mediumtext, longtext are BYTE columns + row.put("text_column", "/u/9n58P"); + row.put("char_column", "v58P"); + row.put("tinytext_column", "7+/+7/2fnw8="); + row.put("mediumtext_column", "/+3v79/v2vrx"); + row.put("longtext_column", "/+/v3+/a+vE="); row.put("tinyblob_column", "74696e79626c6f625f646174615f31"); - row.put("tinytext_column", "dGlueXRleHRfZGF0YV8xCg=="); row.put("blob_column", "626c6f625f646174615f31"); row.put("mediumblob_column", "6d656469756d626c6f625f646174615f31"); - row.put("mediumtext_column", "bWVkaXVtdGV4dF9kYXRhXzE="); row.put("longblob_column", "6c6f6e67626c6f625f646174615f31"); - row.put("longtext_column", "bG9uZ3RleHRfZGF0YV8x"); row.put("enum_column", "2"); row.put("bool_column", 0); row.put("other_bool_column", "1"); - row.put("binary_column", "62696e6172795f31"); + row.put("binary_column", "62696e6172795f3100000000000000000..."); row.put("varbinary_column", "76617262696e6172795f646174615f31"); row.put("bit_column", "102"); events.add(row); SpannerAsserts.assertThatStructs( spannerResourceManager.runQuery( - "select varchar_column, tinyint_column, text_column, date_column" + "select varchar_column, tinyint_column, date_column" + ", smallint_column, mediumint_column, int_column, bigint_column, float_column" - + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column" - + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, " - + " longblob_column, longtext_column, enum_column, bool_column, other_bool_column, binary_column" - + ", varbinary_column, bit_column from AllDatatypeColumns")) + + ", double_column, datetime_column, timestamp_column, time_column, year_column" + + ", tinyblob_column, blob_column, mediumblob_column" + + ", longblob_column, enum_column, bool_column, other_bool_column" + + ", varbinary_column, bit_column, decimal_column, text_column, binary_column" + + ", char_column, tinytext_column, mediumtext_column, longtext_column from AllDatatypeColumns")) .hasRecordsUnorderedCaseInsensitiveColumns(events); } @@ -347,6 +366,7 @@ private void assertAllDatatypeColumns2TableBackfillContents() { row.put("bigint_column", 987654321); row.put("float_column", 45.67); row.put("double_column", 123.789); + row.put("decimal_column", 456.12); row.put("datetime_column", "2024-02-08T08:15:30Z"); row.put("timestamp_column", "2024-02-08T08:15:30Z"); row.put("time_column", "29730000000"); @@ -368,6 +388,7 @@ private void assertAllDatatypeColumns2TableBackfillContents() { row.put("bit_column", "AQI="); events.add(row); + row.clear(); row.put("varchar_column", "value2"); row.put("tinyint_column", 5); row.put("text_column", "text2"); @@ -378,6 +399,7 @@ private void assertAllDatatypeColumns2TableBackfillContents() { row.put("bigint_column", 987654); row.put("float_column", 12.34); row.put("double_column", 56.789); + row.put("decimal_column", 123.45); row.put("datetime_column", "2024-02-09T15:30:45Z"); row.put("timestamp_column", "2024-02-09T15:30:45Z"); row.put("time_column", "55845000000"); @@ -402,9 +424,9 @@ private void assertAllDatatypeColumns2TableBackfillContents() { "select varchar_column, tinyint_column, text_column, date_column" + ", smallint_column, mediumint_column, int_column, bigint_column, float_column" + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column" - + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, " - + " longblob_column, longtext_column, enum_column, bool_column, binary_column" - + ", varbinary_column, bit_column from AllDatatypeColumns2")) + + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column" + + ", longblob_column, longtext_column, enum_column, bool_column, binary_column" + + ", varbinary_column, bit_column, decimal_column from AllDatatypeColumns2")) .hasRecordsUnorderedCaseInsensitiveColumns(events); } @@ -421,6 +443,7 @@ private void assertAllDatatypeColumns2TableCdcContents() { row.put("bigint_column", 987654321); row.put("float_column", 45.67); row.put("double_column", 123.789); + row.put("decimal_column", 456.12); row.put("datetime_column", "2024-02-08T08:15:30Z"); row.put("timestamp_column", "2024-02-08T08:15:30Z"); row.put("time_column", "29730000000"); @@ -447,9 +470,9 @@ private void assertAllDatatypeColumns2TableCdcContents() { "select varchar_column, tinyint_column, text_column, date_column" + ", smallint_column, mediumint_column, int_column, bigint_column, float_column" + ", double_column, datetime_column, timestamp_column, time_column, year_column, char_column" - + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column, " - + " longblob_column, longtext_column, enum_column, bool_column, binary_column" - + ", varbinary_column, bit_column from AllDatatypeColumns2")) + + ", tinyblob_column, tinytext_column, blob_column, mediumblob_column, mediumtext_column" + + ", longblob_column, longtext_column, enum_column, bool_column, binary_column" + + ", varbinary_column, bit_column, decimal_column from AllDatatypeColumns2")) .hasRecordsUnorderedCaseInsensitiveColumns(events); } } diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java similarity index 85% rename from v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationIT.java rename to v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java index 5ef80ea572..fcbdfce2d1 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationIT.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.java @@ -44,30 +44,36 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** Sharded data migration Integration test for {@link DataStreamToSpanner} Flex template. */ +/** + * Sharded data migration Integration test with addition of migration_shard_id column in the schema + * for each table in the {@link DataStreamToSpanner} Flex template. + */ @Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class}) @TemplateIntegrationTest(DataStreamToSpanner.class) @RunWith(JUnit4.class) -public class DataStreamToSpannerShardedMigrationIT extends DataStreamToSpannerITBase { +public class DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT + extends DataStreamToSpannerITBase { private static final Logger LOG = - LoggerFactory.getLogger(DataStreamToSpannerShardedMigrationIT.class); + LoggerFactory.getLogger( + DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.class); private static final String TABLE = "Users"; private static final String MOVIE_TABLE = "Movie"; private static final String SESSION_FILE_RESOURCE = - "DataStreamToSpannerShardedMigrationIT/mysql-session.json"; + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-session.json"; private static final String TRANSFORMATION_CONTEXT_RESOURCE_SHARD1 = - "DataStreamToSpannerShardedMigrationIT/transformation-context-shard1.json"; + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard1.json"; private static final String TRANSFORMATION_CONTEXT_RESOURCE_SHARD2 = - "DataStreamToSpannerShardedMigrationIT/transformation-context-shard2.json"; + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard2.json"; private static final String SPANNER_DDL_RESOURCE = - "DataStreamToSpannerShardedMigrationIT/spanner-schema.sql"; + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/spanner-schema.sql"; - private static HashSet testInstances = new HashSet<>(); + private static HashSet + testInstances = new HashSet<>(); private static PipelineLauncher.LaunchInfo jobInfo1; private static PipelineLauncher.LaunchInfo jobInfo2; @@ -83,7 +89,7 @@ public class DataStreamToSpannerShardedMigrationIT extends DataStreamToSpannerIT public void setUp() throws IOException { // Prevent cleaning up of dataflow job after a test method is executed. skipBaseCleanup = true; - synchronized (DataStreamToSpannerShardedMigrationIT.class) { + synchronized (DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT.class) { testInstances.add(this); if (spannerResourceManager == null) { spannerResourceManager = setUpSpannerResourceManager(); @@ -132,7 +138,7 @@ public void setUp() throws IOException { */ @AfterClass public static void cleanUp() throws IOException { - for (DataStreamToSpannerShardedMigrationIT instance : testInstances) { + for (DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT instance : testInstances) { instance.tearDownBase(); } ResourceManagerUtils.cleanResources(spannerResourceManager, pubsubResourceManager); @@ -151,17 +157,17 @@ public void multiShardMigration() { jobInfo1, TABLE, "Users-backfill-logical-shard1.jsonl", - "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard1.jsonl"), + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl"), uploadDataStreamFile( jobInfo1, TABLE, "Users-backfill-logical-shard2.jsonl", - "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard2.jsonl"), + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl"), uploadDataStreamFile( jobInfo1, TABLE, "Users-cdc-shard1.jsonl", - "DataStreamToSpannerShardedMigrationIT/Users-cdc-shard1.jsonl"))) + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard1.jsonl"))) .build(); // Wait for conditions @@ -179,17 +185,17 @@ public void multiShardMigration() { jobInfo2, TABLE, "Users-backfill-logical-shard3.jsonl", - "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard3.jsonl"), + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl"), uploadDataStreamFile( jobInfo2, TABLE, "Users-backfill-logical-shard4.jsonl", - "DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard4.jsonl"), + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl"), uploadDataStreamFile( jobInfo2, TABLE, "Users-cdc-shard2.jsonl", - "DataStreamToSpannerShardedMigrationIT/Users-cdc-shard2.jsonl"))) + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard2.jsonl"))) .build(); result = @@ -224,12 +230,12 @@ public void pkReorderedMultiShardMigration() { jobInfo1, MOVIE_TABLE, "Movie-shard1.jsonl", - "DataStreamToSpannerShardedMigrationIT/Movie-shard1.jsonl"), + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard1.jsonl"), uploadDataStreamFile( jobInfo1, MOVIE_TABLE, "Movie-shard2.jsonl", - "DataStreamToSpannerShardedMigrationIT/Movie-shard2.jsonl"))) + "DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard2.jsonl"))) .build(); // Wait for conditions diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java new file mode 100644 index 0000000000..4d05fc4cdf --- /dev/null +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.java @@ -0,0 +1,286 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.templates; + +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.teleport.metadata.SkipDirectRunnerTest; +import com.google.cloud.teleport.metadata.TemplateIntegrationTest; +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import org.apache.beam.it.common.PipelineLauncher; +import org.apache.beam.it.common.PipelineOperator; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.conditions.ChainedConditionCheck; +import org.apache.beam.it.conditions.ConditionCheck; +import org.apache.beam.it.gcp.pubsub.PubsubResourceManager; +import org.apache.beam.it.gcp.spanner.SpannerResourceManager; +import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck; +import org.apache.beam.it.gcp.spanner.matchers.SpannerAsserts; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Sharded data migration Integration test without any migration_shard_id column transformation for + * {@link DataStreamToSpanner} Flex template. + */ +@Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class}) +@TemplateIntegrationTest(DataStreamToSpanner.class) +@RunWith(JUnit4.class) +public class DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT + extends DataStreamToSpannerITBase { + private static final Logger LOG = + LoggerFactory.getLogger( + DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.class); + + private static final String TABLE = "Users"; + private static final String SPANNER_DDL_RESOURCE = + "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql"; + + private static HashSet + testInstances = new HashSet<>(); + private static PipelineLauncher.LaunchInfo jobInfo1; + private static PipelineLauncher.LaunchInfo jobInfo2; + + public static PubsubResourceManager pubsubResourceManager; + public static SpannerResourceManager spannerResourceManager; + + /** + * Setup resource managers and Launch dataflow job once during the execution of this test class. + * + * @throws IOException + */ + @Before + public void setUp() throws IOException { + // Prevent cleaning up of dataflow job after a test method is executed. + skipBaseCleanup = true; + synchronized (DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT.class) { + testInstances.add(this); + if (spannerResourceManager == null) { + spannerResourceManager = setUpSpannerResourceManager(); + createSpannerDDL(spannerResourceManager, SPANNER_DDL_RESOURCE); + } + if (pubsubResourceManager == null) { + pubsubResourceManager = setUpPubSubResourceManager(); + } + if (jobInfo1 == null) { + jobInfo1 = + launchDataflowJob( + getClass().getSimpleName() + "shard1", + null, + null, + "shard1", + spannerResourceManager, + pubsubResourceManager, + new HashMap<>() { + { + put("inputFileFormat", "json"); + } + }); + } + if (jobInfo2 == null) { + jobInfo2 = + launchDataflowJob( + getClass().getSimpleName() + "shard2", + null, + null, + "shard2", + spannerResourceManager, + pubsubResourceManager, + new HashMap<>() { + { + put("inputFileFormat", "json"); + } + }); + } + } + } + + /** + * Cleanup dataflow job and all the resources and resource managers. + * + * @throws IOException + */ + @AfterClass + public static void cleanUp() throws IOException { + for (DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT instance : + testInstances) { + instance.tearDownBase(); + } + ResourceManagerUtils.cleanResources(spannerResourceManager, pubsubResourceManager); + } + + @Test + public void multiShardMigration() { + // Two dataflow jobs are running corresponding to two physical shards containing two logical + // shards each. Migrates Users table from 4 logical shards. Asserts data from all the shards are + // going to Spanner. Checks whether migration shard id column is populated properly based on the + // transformation context. + ChainedConditionCheck conditionCheck = + ChainedConditionCheck.builder( + List.of( + uploadDataStreamFile( + jobInfo1, + TABLE, + "Users-backfill-logical-shard1.jsonl", + "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl"), + uploadDataStreamFile( + jobInfo1, + TABLE, + "Users-backfill-logical-shard2.jsonl", + "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl"), + uploadDataStreamFile( + jobInfo1, + TABLE, + "Users-cdc-shard1.jsonl", + "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl"))) + .build(); + + // Wait for conditions + PipelineOperator.Result result = + pipelineOperator() + .waitForCondition(createConfig(jobInfo1, Duration.ofMinutes(8)), conditionCheck); + + // Assert Conditions + assertThatResult(result).meetsConditions(); + + conditionCheck = + ChainedConditionCheck.builder( + List.of( + uploadDataStreamFile( + jobInfo2, + TABLE, + "Users-backfill-logical-shard3.jsonl", + "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl"), + uploadDataStreamFile( + jobInfo2, + TABLE, + "Users-backfill-logical-shard4.jsonl", + "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl"), + uploadDataStreamFile( + jobInfo2, + TABLE, + "Users-cdc-shard2.jsonl", + "DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl"))) + .build(); + + result = + pipelineOperator() + .waitForCondition(createConfig(jobInfo2, Duration.ofMinutes(8)), conditionCheck); + assertThatResult(result).meetsConditions(); + + ConditionCheck rowsConditionCheck = + SpannerRowsCheck.builder(spannerResourceManager, TABLE) + .setMinRows(12) + .setMaxRows(12) + .build(); + result = + pipelineOperator() + .waitForCondition(createConfig(jobInfo1, Duration.ofMinutes(10)), rowsConditionCheck); + assertThatResult(result).meetsConditions(); + + // Assert specific rows + assertUsersTableContents(); + } + + private void assertUsersTableContents() { + List> events = new ArrayList<>(); + + Map row = new HashMap<>(); + row.put("id", 1); + row.put("name", "Tester1"); + row.put("age", 21); + events.add(row); + + row = new HashMap<>(); + row.put("id", 5); + row.put("name", "Tester5"); + row.put("age", 23); + events.add(row); + + row = new HashMap<>(); + row.put("id", 6); + row.put("name", "Tester6"); + row.put("age", 22); + events.add(row); + + row = new HashMap<>(); + row.put("id", 7); + row.put("name", "Tester7"); + row.put("age", 7); + events.add(row); + + row = new HashMap<>(); + row.put("id", 8); + row.put("name", "Tester8"); + row.put("age", 8); + events.add(row); + + row = new HashMap<>(); + row.put("id", 9); + row.put("name", "Tester9"); + row.put("age", 9); + events.add(row); + + row.put("id", 10); + row.put("name", "Tester10"); + row.put("age", 10); + events.add(row); + + row.put("id", 11); + row.put("name", "Tester11"); + row.put("age", 11); + events.add(row); + + row = new HashMap<>(); + row.put("id", 12); + row.put("name", "Tester12"); + row.put("age", 12); + events.add(row); + + row = new HashMap<>(); + row.put("id", 13); + row.put("name", "Tester13"); + row.put("age", 13); + events.add(row); + + row = new HashMap<>(); + row.put("id", 14); + row.put("name", "Tester14"); + row.put("age", 14); + events.add(row); + + row = new HashMap<>(); + row.put("id", 15); + row.put("name", "Tester15"); + row.put("age", 15); + events.add(row); + + SpannerAsserts.assertThatStructs(spannerResourceManager.runQuery("select * from Users")) + .hasRecordsUnorderedCaseInsensitiveColumns(events); + } +} diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java new file mode 100644 index 0000000000..f471d90afa --- /dev/null +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerTest.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.templates; + +import static org.junit.Assert.assertEquals; + +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class DataStreamToSpannerTest { + + @Rule public ExpectedException expectedEx = ExpectedException.none(); + + @Test + public void testGetSourceTypeWithDatastreamSourceType() { + String[] args = new String[] {"--datastreamSourceType=mysql"}; + DataStreamToSpanner.Options options = + PipelineOptionsFactory.fromArgs(args) + .withValidation() + .as(DataStreamToSpanner.Options.class); + String result = DataStreamToSpanner.getSourceType(options); + + assertEquals("mysql", result); + } + + @Test + public void testGetSourceTypeWithEmptyStreamName() { + expectedEx.expect(IllegalArgumentException.class); + expectedEx.expectMessage("Stream name cannot be empty."); + String[] args = new String[] {""}; + DataStreamToSpanner.Options options = + PipelineOptionsFactory.fromArgs(args) + .withValidation() + .as(DataStreamToSpanner.Options.class); + String result = DataStreamToSpanner.getSourceType(options); + } + + @Test + public void testGetSourceTypeWithGcpCredentialsMissing() { + expectedEx.expect(IllegalArgumentException.class); + expectedEx.expectMessage("Unable to initialize DatastreamClient:"); + String[] args = + new String[] { + "--streamName=projects/sample-project/locations/sample-location/streams/sample-stream" + }; + DataStreamToSpanner.Options options = + PipelineOptionsFactory.fromArgs(args) + .withValidation() + .as(DataStreamToSpanner.Options.class); + String result = DataStreamToSpanner.getSourceType(options); + } +} diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java new file mode 100644 index 0000000000..d49453bfd2 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/datastream/ChangeEventContextFactoryTest.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.templates.datastream; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.cloud.teleport.v2.spanner.ddl.Ddl; +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class ChangeEventContextFactoryTest { + + @Rule public ExpectedException expectedEx = ExpectedException.none(); + + private JsonNode getJsonNode(String json) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS); + return mapper.readTree(json); + } + + @Test + public void testCreateChangeEventContextWithEmptySourceType() throws Exception { + expectedEx.expect(InvalidChangeEventException.class); + JSONObject changeEvent = ChangeEventConvertorTest.getTestChangeEvent("Users2"); + // Test Ddl + Ddl ddl = ChangeEventConvertorTest.getTestDdl(); + ChangeEventContext changeEventContext = + ChangeEventContextFactory.createChangeEventContext( + getJsonNode(changeEvent.toString()), + ddl, + "shadow_", + DatastreamConstants.MYSQL_SOURCE_TYPE); + } + + @Test + public void testCreateChangeEventContextWithNonMatchingSourceType() throws Exception { + expectedEx.expect(InvalidChangeEventException.class); + expectedEx.expectMessage("Change event with invalid source"); + JSONObject changeEvent = ChangeEventConvertorTest.getTestChangeEvent("Users2"); + changeEvent.put( + DatastreamConstants.EVENT_SOURCE_TYPE_KEY, DatastreamConstants.ORACLE_SOURCE_TYPE); + // Test Ddl + Ddl ddl = ChangeEventConvertorTest.getTestDdl(); + ChangeEventContext changeEventContext = + ChangeEventContextFactory.createChangeEventContext( + getJsonNode(changeEvent.toString()), + ddl, + "shadow_", + DatastreamConstants.MYSQL_SOURCE_TYPE); + } + + @Test + public void testCreateChangeEventContextWithNotSupportedSource() throws Exception { + expectedEx.expect(InvalidChangeEventException.class); + expectedEx.expectMessage("Unsupported source database"); + JSONObject changeEvent = ChangeEventConvertorTest.getTestChangeEvent("Users2"); + changeEvent.put(DatastreamConstants.EVENT_SOURCE_TYPE_KEY, "xyz"); + // Test Ddl + Ddl ddl = ChangeEventConvertorTest.getTestDdl(); + ChangeEventContext changeEventContext = + ChangeEventContextFactory.createChangeEventContext( + getJsonNode(changeEvent.toString()), ddl, "shadow_", "xyz"); + } +} diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java index 4ca5b67526..03f0c0d26d 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/spanner/ProcessInformationSchemaTest.java @@ -17,15 +17,28 @@ import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.MatcherAssert.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.spanner.DatabaseAdminClient; import com.google.cloud.spanner.Dialect; import com.google.cloud.teleport.v2.spanner.ddl.Ddl; +import com.google.spanner.admin.database.v1.UpdateDatabaseDdlMetadata; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; +import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor; import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig; +import org.apache.beam.sdk.options.ValueProvider; import org.junit.Test; /** Unit tests for ProcessInformationSchema class. */ @@ -374,4 +387,54 @@ public void canListDataTablesWithNoShadowTablesInDdl() throws Exception { processInformationSchema.getDataTablesWithNoShadowTables(getTestDdlWithGSqlDialect()); assertThat(dataTablesWithNoShadowTables, is(Arrays.asList("Users_interleaved"))); } + + @Test + public void canCreateShadowTablesInSpanner() throws Exception { + SpannerConfig spannerConfig = mock(SpannerConfig.class); + SpannerAccessor spannerAccessor = mock(SpannerAccessor.class); + DatabaseAdminClient databaseAdminClient = mock(DatabaseAdminClient.class); + OperationFuture operationFuture = mock(OperationFuture.class); + + ProcessInformationSchema.ProcessInformationSchemaFn processInformationSchema = + new ProcessInformationSchema.ProcessInformationSchemaFn( + spannerConfig, /* shouldCreateShadowTables= */ true, "shadow_", "mysql"); + processInformationSchema.setDialect(Dialect.GOOGLE_STANDARD_SQL); + processInformationSchema.setSpannerAccessor(spannerAccessor); + + // Mock method calls + when(databaseAdminClient.updateDatabaseDdl(anyString(), anyString(), any(), any())) + .thenReturn(operationFuture); + when(spannerAccessor.getDatabaseAdminClient()).thenReturn(databaseAdminClient); + when(operationFuture.get(anyLong(), any())).thenReturn(null); + ValueProvider sampleValueProvider = + ValueProvider.StaticValueProvider.of("sample-value"); + when(spannerConfig.getInstanceId()).thenReturn(sampleValueProvider); + when(spannerConfig.getDatabaseId()).thenReturn(sampleValueProvider); + + processInformationSchema.createShadowTablesInSpanner(getTestDdlWithGSqlDialect()); + + List createShadowTableStatements = + Collections.singletonList( + "CREATE TABLE `shadow_Users_interleaved` (\n" + + "\t`first_name` STRING(MAX),\n" + + "\t`last_name` STRING(5),\n" + + "\t`age` INT64,\n" + + "\t`bool_field` BOOL,\n" + + "\t`int64_field` INT64,\n" + + "\t`float64_field` FLOAT64,\n" + + "\t`string_field` STRING(MAX),\n" + + "\t`bytes_field` BYTES(MAX),\n" + + "\t`timestamp_field` TIMESTAMP,\n" + + "\t`date_field` DATE,\n" + + "\t`id` INT64,\n" + + "\t`timestamp` INT64,\n" + + "\t`log_file` STRING(MAX),\n" + + "\t`log_position` INT64,\n" + + ") PRIMARY KEY (`first_name` ASC, `last_name` DESC, `age` ASC, `bool_field` ASC, `int64_field` ASC, `float64_field` ASC, `string_field` ASC, `bytes_field` ASC, `timestamp_field` ASC, `date_field` ASC, `id` ASC)"); + // Verify method calls + verify(databaseAdminClient, times(1)) + .updateDatabaseDdl( + eq("sample-value"), eq("sample-value"), eq(createShadowTableStatements), eq(null)); + verify(operationFuture, times(1)).get(anyLong(), any()); + } } diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql new file mode 100644 index 0000000000..f0e2752a42 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns-statements.sql @@ -0,0 +1,73 @@ +INSERT INTO AllDatatypeColumns ( + varchar_column, tinyint_column, text_column, date_column, smallint_column, + mediumint_column, int_column, bigint_column, float_column, double_column, + decimal_column, datetime_column, timestamp_column, time_column, year_column, + char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column, + mediumtext_column, longblob_column, longtext_column, enum_column, bool_column, + other_bool_column, binary_column, varbinary_column, bit_column +) VALUES ( + 'value1', 10, UNHEX('746578745f646174615f310a'), '2024-02-08', 50, + 1000, 50000, 987654321, 45.67, 123.789, 456.12, + '2024-02-08 08:15:30', '2024-02-08 08:15:30', SEC_TO_TIME(29730), 2022, + UNHEX('63686172310a'), UNHEX('74696e79626c6f625f646174615f31'), + UNHEX('74696e79746578745f646174615f310a'), UNHEX('626c6f625f646174615f31'), + UNHEX('6d656469756d626c6f625f646174615f31'), + UNHEX('6d656469756d746578745f646174615f31'), + UNHEX('6c6f6e67626c6f625f646174615f31'), + UNHEX('6c6f6e67746578745f646174615f31'), '2', FALSE, TRUE, + UNHEX('62696e6172795f31'), UNHEX('76617262696e6172795f646174615f31'), b'1100110' + ); + +INSERT INTO AllDatatypeColumns ( + varchar_column, tinyint_column, text_column, date_column, smallint_column, + mediumint_column, int_column, bigint_column, float_column, double_column, + decimal_column, datetime_column, timestamp_column, time_column, year_column, + char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column, + mediumtext_column, longblob_column, longtext_column, enum_column, bool_column, + other_bool_column, binary_column, varbinary_column, bit_column +) VALUES ( + 'value2', 5, UNHEX('746578745f646174615f320a'), '2024-02-09', 25, + 500, 25000, 987654, 12.34, 56.789, 123.45, + '2024-02-09 15:30:45', '2024-02-09 15:30:45', SEC_TO_TIME(55845), 2023, + UNHEX('63686172320a'), UNHEX('74696e79626c6f625f646174615f32'), + UNHEX('74696e79746578745f646174615f320a'), UNHEX('626c6f625f646174615f32'), + UNHEX('6d656469756d626c6f625f646174615f32'), + UNHEX('6d656469756d746578745f646174615f32'), + UNHEX('6c6f6e67626c6f625f646174615f32'), + UNHEX('6c6f6e67746578745f646174615f32'), '3', TRUE, FALSE, + UNHEX('62696e6172795f32'),UNHEX('76617262696e6172795f646174615f32'), b'11001' + ); + +DELETE FROM AllDatatypeColumns where varchar_column = "value2"; + +UPDATE AllDatatypeColumns +SET + tinyint_column = 15, + text_column = UNHEX('746578745f646174615f310a'), + date_column = '2024-02-08', + smallint_column = 50, + mediumint_column = 1000, + int_column = 50000, + bigint_column = 987654321, + float_column = 45.67, + double_column = 123.789, + decimal_column = 456.12, + datetime_column = '2024-02-08 08:15:30', + timestamp_column = '2024-02-08 08:15:30', + time_column = SEC_TO_TIME(29730), + year_column = 2022, + char_column = UNHEX('63686172310a'), + tinyblob_column = UNHEX('74696e79626c6f625f646174615f31'), + tinytext_column = UNHEX('74696e79746578745f646174615f310a'), + blob_column = UNHEX('626c6f625f646174615f31'), + mediumblob_column = UNHEX('6d656469756d626c6f625f646174615f31'), + mediumtext_column = UNHEX('6d656469756d746578745f646174615f31'), + longblob_column = UNHEX('6c6f6e67626c6f625f646174615f31'), + longtext_column = UNHEX('6c6f6e67746578745f646174615f31'), + enum_column = '2', + bool_column = FALSE, + other_bool_column = TRUE, + binary_column = UNHEX('62696e6172795f31'), + varbinary_column = UNHEX('76617262696e6172795f646174615f31'), + bit_column = b'1100110' +WHERE varchar_column = 'value1'; \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql new file mode 100644 index 0000000000..5e24936526 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/AllDatatypeColumns2-statements.sql @@ -0,0 +1,70 @@ +INSERT INTO AllDatatypeColumns2 ( + varchar_column, tinyint_column, text_column, date_column, smallint_column, + mediumint_column, int_column, bigint_column, float_column, double_column, + decimal_column, datetime_column, timestamp_column, time_column, year_column, + char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column, + mediumtext_column, longblob_column, longtext_column, enum_column, bool_column, + binary_column, varbinary_column, bit_column +) VALUES ( + 'value1', 10, 'text1', '2024-02-08', 50, + 1000, 50000, 987654321, 45.67, 123.789, 456.12, + '2024-02-08 08:15:30', '2024-02-08 08:15:30', SEC_TO_TIME(29730), 2022, + 'char_1', UNHEX('74696e79626c6f625f646174615f31'), 'tinytext_data_1', + UNHEX('626c6f625f646174615f31'), UNHEX('6d656469756d626c6f625f646174615f31'), + 'mediumtext_data_1', UNHEX('6c6f6e67626c6f625f646174615f31'), 'longtext_data_1', + '2', FALSE, + UNHEX('62696e6172795f646174615f3100000000000000'), UNHEX('76617262696e6172795f646174615f31'), + b'1100110' + ); + +INSERT INTO AllDatatypeColumns2 ( + varchar_column, tinyint_column, text_column, date_column, smallint_column, + mediumint_column, int_column, bigint_column, float_column, double_column, + decimal_column, datetime_column, timestamp_column, time_column, year_column, + char_column, tinyblob_column, tinytext_column, blob_column, mediumblob_column, + mediumtext_column, longblob_column, longtext_column, enum_column, bool_column, + binary_column, varbinary_column, bit_column +) VALUES ( + 'value2', 5, 'text2', '2024-02-09', 25, + 500, 25000, 987654, 12.34, 56.789, 123.45, + '2024-02-09 15:30:45', '2024-02-09 15:30:45', SEC_TO_TIME(55845), 2023, + 'char_2', UNHEX('74696e79626c6f625f646174615f32'), 'tinytext_data_2', + UNHEX('626c6f625f646174615f32'), UNHEX('6d656469756d626c6f625f646174615f32'), + 'mediumtext_data_2', UNHEX('6c6f6e67626c6f625f646174615f32'), 'longtext_data_2', + '3', TRUE, + UNHEX('62696e6172795f646174615f3200000000000000'), UNHEX('76617262696e6172795f646174615f32'), + b'11001' + ); + +UPDATE AllDatatypeColumns2 +SET + tinyint_column = 15, + text_column = 'text1', + date_column = '2024-02-08', + smallint_column = 50, + mediumint_column = 1000, + int_column = 50000, + bigint_column = 987654321, + float_column = 45.67, + double_column = 123.789, + decimal_column = 456.12, + datetime_column = '2024-02-08 08:15:30', + timestamp_column = '2024-02-08 08:15:30', + time_column = SEC_TO_TIME(29730), + year_column = 2022, + char_column = 'char_1', + tinyblob_column = UNHEX('74696e79626c6f625f646174615f31'), + tinytext_column = 'tinytext_data_1', + blob_column = UNHEX('626c6f625f646174615f31'), + mediumblob_column = UNHEX('6d656469756d626c6f625f646174615f31'), + mediumtext_column = 'mediumtext_data_1', + longblob_column = UNHEX('6c6f6e67626c6f625f646174615f31'), + longtext_column = 'longtext_data_1', + enum_column = '2', + bool_column = FALSE, + binary_column = UNHEX('62696e6172795f646174615f3100000000000000'), + varbinary_column = UNHEX('76617262696e6172795f646174615f31'), + bit_column = b'1100110' +WHERE varchar_column = 'value1'; + +DELETE FROM AllDatatypeColumns2 WHERE varchar_column = 'value2'; \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns.avro new file mode 100644 index 0000000000000000000000000000000000000000..739dec2fcff919c7771e355d906f3590f19fbaa7 GIT binary patch literal 4398 zcmd^CPiP!f7~i~x6iTI+ARda!;7NA7J1@I4J0VJ{4Jb%0+FOaso0&JsOW({}XWqmv zC9)tC1-+;T4<1VJ;z18Z3LZp{MO&#C6^nvB=}9mk{z373JG--+-JRKz30_PfncesO z`MvM=d*A!!S$k^^-A}edaVl~)sZYMEUe;u%O|@k$q>cz(t*iyaSD(4wI?eeRB8f71 zQt;RhB3SkqnB-j&o6|JZ zApHnUDwR#5T$^^td=hr1=L;jCv-q$SI_0jLA# zoM%NyNwzzZs$RGMGH?@b_`VItf<@}|F6%Wo;&5TWWrg!tSK?B)$;{lv1(se~|52`k zr>TILXt+%{T(2|FO^4?676_C_{2}xun+rQ@r)iVVmcD9jvRHpAh#=`3hZ3aVMwG-0 z;!q(9qM$+S7a&yKq?ztBMb*ep=<%E7N%xU37uuvvC9pMDj<(6PQ#`uy=f z&pzgzSb6i6>Nnq>yziy?w-=u>Nz>LHs#ls#y;i9=EvG`TX;y4P+*<$V>%ZQ;^Rv_a zP5pgz>YKqSzZ1R4!O6J@(-^OKFkiT_-+pJV9SSvBN7Y8XQP-PRqiNL|RW2N&rm(0Q zM-_(#G9+BDCbM%?-HJq@UQ^TrsirR`C&y&UD70mt1nr0 z)3Y0nYuA^Y1(bj-l~Ael$mEO6JlE05{bLhY!te2?&pyBQ+6)lvml~MQF`@6TU zgXze_51)NMhi*QEj;qcH;a1GLVHwo43~V@tXW)is)D0cJ)f?WDjvh|?*8o5{!}5&b z8V&HZ%mzG7kESKPXk4{GOAcUbh2%Q6a01i;GX(^}!v5fymtEQo> z_S^5<x8SG)cHJ>^h3#ko#-eb~RfM8F5-M_gEAYFUXKtXdwY#)r4H* zl!yyyJ{i8|aUpo5Z4Y>iWD-(#xIU%5!DyUir{(O!WAVBy+%6b0;n= zi(*DHj1?xzKv~&H7!qa6W9BJ7dKs%qLP1x@6bzCl4p6$^*(m+p3J)0U=RGF2V4164 z_yL+!D4Rrum>}dH3EPjh$1KJ`5RgsF~BRw{Csq z(skmmfzFg9dsJrVG46TPa+qK->kA}<+foWfeBfn>nSiu~Bz=5F?Hh zE!sJz`KeYlhq)vXv9_pcG*FnB`)%&WxgKCmTP-kW7xQgv2je1M&X=` zJP#Skb~|RO*9~6=PHGL`2XHKSq)zXmJ`YD6E)2M>a1rZDTg{cvx43Nj*5ssguY}8Y3J=UYx4QhSFJ5B*PlvekoJv3aZ++4O5!E+ zP%cVl!TiK4L8!V(bIm8n(dNFIHV%GK!c|+^ht2a_@%Tm#Bg5);W#z+P9{=wAvoBwJ z&i{Gyqchj1|9s*FS|=tWO{ZqlI<3{~hF!BQ)2i9H;m&ve-nxI~-f#51-_`%>_`!bG zYe#R0noZ~?3{#@!!g$~$Uhu(G5K1)|M{`EqsOwG3Xj=2eoRE~LAu5_nq8ep>91@|= zrK4{&w-HHS{mf7Upc=;*28u?L{=I{xA-=a*l*pq`++B}_*r&@rV~9qsIV_2d0d-=0AZ zy89Tq`tA(+X?O3Fqv*tS^Lg}*_Wj>){_*+}nyK8VqMaKb@7}oy(1C{^zPvYuZakY@9n%0)t6{*?cxPJL zLpleq>+V!1Dl;27CMUC#+BHBJs&Wp{kTv9q8`f~fJ-MXsXsKZ;R6ZWvo_ZAd>HlZ> zVRS5&9|rkd{$lSl&=f|$9z}OAA3^Wjx`?g?%cs%q^3A<}oR^l+^t^?3?mW2l{^v@2 z{fEDkMu9?sDoMw7LN(LU`#{6g(P{9)~3gccAg&>sGm+QSHsHp7RJ Cn5eA) literal 0 HcmV?d00001 diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl deleted file mode 100644 index ee35c320b7..0000000000 --- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-backfill-AllDatatypeColumns2.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"uuid":"2a646066-40f2-44b7-bec0-61d100000000","read_timestamp":"2024-02-08T09:32:05.695Z","source_timestamp":"2024-02-08T09:30:59.000Z","object":"it_AllDatatypeColumns2","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"20f68e0ee69f7b864b730a1273b6f1b698ab4c28","sort_keys":[1707384659000,"mysql-bin.000029",9615213],"source_metadata":{"table":"AllDatatypeColumns2","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9615213,"change_type":"INSERT","is_deleted":false},"payload":{"varchar_column":"value1","tinyint_column":10,"text_column":"text1","date_column":"2024-02-08T00:00:00.000Z","smallint_column":50,"mediumint_column":1000,"int_column":50000,"bigint_column":987654321,"float_column":45.67,"double_column":123.789,"decimal_column":456.12,"datetime_column":"2024-02-08T08:15:30.000Z","timestamp_column":"2024-02-08T08:15:30.000Z","time_column":29730000000,"year_column":2022,"char_column":"char_1","tinyblob_column":"74696e79626c6f625f646174615f31","tinytext_column":"tinytext_data_1","blob_column":"626c6f625f646174615f31","mediumblob_column":"6d656469756d626c6f625f646174615f31","mediumtext_column":"mediumtext_data_1","longblob_column":"6c6f6e67626c6f625f646174615f31","longtext_column":"longtext_data_1","enum_column":"2","bool_column":0,"binary_column":"62696e6172795f646174615f3100000000000000","varbinary_column":"76617262696e6172795f646174615f31","bit_column":102}} -{"uuid":"2a646066-40f2-44b7-bec0-61d100000001","read_timestamp":"2024-02-08T09:32:05.695Z","source_timestamp":"2024-02-08T09:30:59.000Z","object":"it_AllDatatypeColumns2","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"20f68e0ee69f7b864b730a1273b6f1b698ab4c28","sort_keys":[1707384659000,"mysql-bin.000029",9615856],"source_metadata":{"table":"AllDatatypeColumns2","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9615856,"change_type":"INSERT","is_deleted":false},"payload":{"varchar_column":"value2","tinyint_column":5,"text_column":"text2","date_column":"2024-02-09T00:00:00.000Z","smallint_column":25,"mediumint_column":500,"int_column":25000,"bigint_column":987654,"float_column":12.34,"double_column":56.789,"decimal_column":123.45,"datetime_column":"2024-02-09T15:30:45.000Z","timestamp_column":"2024-02-09T15:30:45.000Z","time_column":55845000000,"year_column":2023,"char_column":"char_2","tinyblob_column":"74696e79626c6f625f646174615f32","tinytext_column":"tinytext_data_2","blob_column":"626c6f625f646174615f32","mediumblob_column":"6d656469756d626c6f625f646174615f32","mediumtext_column":"mediumtext_data_2","longblob_column":"6c6f6e67626c6f625f646174615f32","longtext_column":"longtext_data_2","enum_column":"3","bool_column":1,"binary_column":"62696e6172795f646174615f3200000000000000","varbinary_column":"76617262696e6172795f646174615f32","bit_column":25}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl deleted file mode 100644 index a6c835c373..0000000000 --- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"uuid":"45c148ba-a57f-4882-879e-831700000001","read_timestamp":"2024-02-09T09:59:32.777Z","source_timestamp":"2024-02-09T09:59:19.000Z","object":"it_AllDatatypeColumns","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"0df5741bfd9144e7b58ce8cd018728d345ed6acd","sort_keys":[1707472759000,"mysql-bin.000029",9618920],"source_metadata":{"table":"AllDatatypeColumns","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9618920,"change_type":"DELETE","is_deleted":true},"payload":{"varchar_column":"value2","tinyint_column":5,"text_column":"746578745f646174615f320a","date_column":"2024-02-09T00:00:00.000Z","smallint_column":25,"mediumint_column":500,"int_column":25000,"bigint_column":987654,"float_column":12.34,"double_column":56.789,"decimal_column":123.45,"datetime_column":"2024-02-09T15:30:45.000Z","timestamp_column":"2024-02-09T15:30:45.000Z","time_column":55845000000,"year_column":2023,"char_column":"63686172320a","tinyblob_column":"74696e79626c6f625f646174615f32","tinytext_column":"74696e79746578745f646174615f320a","blob_column":"626c6f625f646174615f32","mediumblob_column":"6d656469756d626c6f625f646174615f32","mediumtext_column":"6d656469756d746578745f646174615f32","longblob_column":"6c6f6e67626c6f625f646174615f32","longtext_column":"6c6f6e67746578745f646174615f32","enum_column":"3","bool_column":1,"other_bool_column":0,"binary_column":"62696e6172795f32","varbinary_column":"76617262696e6172795f646174615f32","bit_column":25}} -{"uuid":"45c148ba-a57f-4882-879e-831700000000","read_timestamp":"2024-02-09T09:59:32.777Z","source_timestamp":"2024-02-09T09:58:59.000Z","object":"it_AllDatatypeColumns","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"0df5741bfd9144e7b58ce8cd018728d345ed6acd","sort_keys":[1707472739000,"mysql-bin.000029",9618285],"source_metadata":{"table":"AllDatatypeColumns","database":"it","primary_keys":["varchar_column"],"log_file":"mysql-bin.000029","log_position":9618285,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"varchar_column":"value1","tinyint_column":15,"text_column":"746578745f646174615f310a","date_column":"2024-02-08T00:00:00.000Z","smallint_column":50,"mediumint_column":1000,"int_column":50000,"bigint_column":987654321,"float_column":45.67,"double_column":123.789,"decimal_column":456.12,"datetime_column":"2024-02-08T08:15:30.000Z","timestamp_column":"2024-02-08T08:15:30.000Z","time_column":29730000000,"year_column":2022,"char_column":"63686172310a","tinyblob_column":"74696e79626c6f625f646174615f31","tinytext_column":"74696e79746578745f646174615f310a","blob_column":"626c6f625f646174615f31","mediumblob_column":"6d656469756d626c6f625f646174615f31","mediumtext_column":"6d656469756d746578745f646174615f31","longblob_column":"6c6f6e67626c6f625f646174615f31","longtext_column":"6c6f6e67746578745f646174615f31","enum_column":"2","bool_column":0,"other_bool_column":1,"binary_column":"62696e6172795f31","varbinary_column":"76617262696e6172795f646174615f31","bit_column":102}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc-AllDatatypeColumns2.avro new file mode 100644 index 0000000000000000000000000000000000000000..3a21fccabea252fa6b19b5bec0f0f3e29bb8a29d GIT binary patch literal 4382 zcmdT{PiP}m98P{)y0{j+EFMHwnS-aK?Yv|rnX-pg8&FuZXgw`8yqS5^PJJ^I=TF(N zRC-zz^yER0wFv$}7b&_RD2nc3#a*p<5oAU8q<9c4h#nMwFOy7@G&5N{MX-lB$qnke=au{IA5e86#)-< zJaXKC;Cstv+zh0-^A~%eQDlq6ZlNIbZFg1LEG6%KM$KkuJ1hvP(@G+f*4)GRlEl4L z+n{_o#*@UC9h-AIkT&}&243VF?09Q>`Z_bh1f7(!4u^$nUgmN!kx{4FnPbLOVg{N| zvvyX`!1F^96C^>&*r@MQk!y#{5q$Izi%H@@S4IpB!BZ0{JzQ*<{_ceP5)ShoQk}D; zRIhy>O`?@fgUpzgZ#&d)-`*ON9(_$vHciX!19Dz+BhC?5m%){x;FcHIq3yW|@+6@< zw*%!GYt&suoe4?ys7#_ql(&PX$v6wyfk2XQ-Sar3F7h(KOpCU;M-So?dWRV~2t3Th z2K5oD-!wYgG#zS5GMPmq9!hg!Mty?`ZMSW^VQK|@C#__pvkU3Dy+Ng?qfl5{>trBf zZNl(9w22IqfkV+sj=oBVnRevlD3Bvnw^y^WW9g<(*g}z|Gt_yT#joi_xYyGtlH%8y z1(<>RoTpVtNY>jS6J9rX7?^Q3e%``uVFzOO&dIa5#qq$v(~4xVF2p5Hld-+?3oJRb z{-a(;Z&OAwF@J+{JYL7{n>Nev7A%xS{4w~F&OJSCrHRR>OJ7)9n9rYbB8YwCSc>Fa zh>&Yc@3c3XE${C@i3s-o0ntxn3bDk0@+jml-slFf3ZVpV3lKUZIV_vX*W-dAtn z@6zat{ryfmc!rk^(nU zc_zN|1~cn{=Zfza;^rrAO?DWyg#Rv4%Bqyr8DmP#kWHgjH!4I{RYO(l2C?c?(G63x zv`L6z7P^2C%(PgAT!qW8P8J>*@ttIX%W&`D4X6Mk)8U>cmlhY68|6nHeWLOBG8AA6 z9WY`toPkq9(=u#reevV3?>#pOChXk>+b>SSt)2aMPQvNS>VxpL^xdD&{`S;7Ocr)a zu(kW<&W$S=+V}ANCpU|5{XUrL*q#h$YgJWKSxr+3F4a%eAC#h+JhH=x+nCV^A2HVJC; z(#QKBpx_AnbQ1P1oq!jvU4YkGjRn|gT-pE2JU0&$vl?vO`1{&R9|>U|JpA$e_6hj1 tCrmO%m}FR(Bt@76#tW0cAz@5=TPSfv5}@6e&)I(==9Or66GVF&$U=lp<4FMrXcA zut^)vGOTr(qKW2Xj#y_B61ul0VE`TzVDzl9RrY5YUQjs8yTT!-#?;*X0!`Xgh-IbB zC}RoE#{Vrw<72@Il+7?^3qW?QH0K;}%^2KU5grN26cedIZWLyDJFVR40H^y zlnEs)Pih#bp&GLND(#|l%QGB9k(DzvJ+0!Gh#c;E8AVocTxoztaL;*}1w(Q;R#f-8 zzXb`UPf&*8k+|;%-_{Of);f9LDRp z_hw8hdJ7n;BK{KkQpiPE7O83S<)yD%o2u8JY9c85#-$jk8Hg_NnmDwHnkZp({f|lY4tl@(xP_kb!M+;FFyPI&uH?ket&Q6lfm_5EI;9` zh(s-j4%+vVjZr4_N-f)->$!G6@cKc=YjY7{y?)7dE?W^zRff6UE|zB5K9nNW&nJ3y z)N2=0`A9eLyFu*qLMP})VQ4!std+!d3GQQ`*xexP$DtRI(CtRoP=T^B0|a1h=pbwt zeSh*w^vA2{*N0coqv!Y0SEIpg^yT2$$xCwQ7P{IA(B$WToLP?1=q|_uaDeLc0u6OuMysFbR?pW`%koP>I;$o7FC!5E Ax&QzG literal 0 HcmV?d00001 diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerDatatypeIT/mysql-cdc2-AllDatatypeColumns.avro new file mode 100644 index 0000000000000000000000000000000000000000..2fc7656943b335499bc87fdb77622f8d745681e3 GIT binary patch literal 3791 zcmcgvO=u)V6rL0^!m{8+R7BR$c#`Rz^vrb6blAhLWBY)xGaNh8sqPqdzKmwsI_XPG!SWMHLL{3`Zss_*EnCp3TuHU1<&F{!g0hX z52dwwV{amvWlZah?MRifWXsxbnDdn4u&-E5rNZ&RNE**vfPBN^Vv7ZsZ{%={yd5*n znY8w&EJ~530X<)vg6z;h8FW@b95GefqBLAkq&V)6(pZv}oPhpIbXew7id1PCo%uY$ zCT%!PvDRgZ#+r`_Vx37y=;nfi0eBn&qbH5cvp>%8l)_2g6(*b-Qxo@7G-+1^%Tk#^ z%3_=j|6h#8$CMK&n|{QmfZVW>jB~&>W^i*(cpxNGOe6-mQ5fg#xN?JSoNPgz>5`n} z-$*Yo&tyNOoGN-;AV!=B!6{CFmlOp9TFKd? zjA_WSxPpNSs)TKoY3HR|oZ$$HteBzEX&Jwv$l$IQQDo)Er3PpK_na45FeH!MAKc&VZ5Gt zZ-%s_w}7EC;?JQk5iUqkq^8Lim%eUos#<@lh#>D9=VGK{AiBgW;?O24qM%6@S0GeR z(n9r9%2B9on)a($AS}`Q%GpOP4C~4^2rF*pi$?)09h>{LKi*m1`=a^rgWrAj-r+~L zZoQ4Zz3^1qSs`xZx9xV+>)9Q*M{GhJ*KT)RvNC@Ce)Rf}zXfmp`zpLYzVOLlIUdTF zxgCUo4begSersuv3cXUxrrU8lPS1CH{)*e=BEWk6lFdxo0Zmkjxzo&-X4%}4BGJz$ zdUe!m7gO0lH}G{oYIRA=?*)W7tq9i2u+s@~4|}20_DL@yZV-}AJE)@^Wo--yz+Bcr zI30B7pk90I9Qs5C8t)UUdJ5-V^8>>xX|{`Rm22s9w9>KnJ(q zKm6)b5S{w+^XES;p`%C9McqBY+?8&}^F7-2y_Oevk=Jq~uj4uJuj58-2Q8Z(Hv!Nn zkX-XZ&jnq->%zCi)~vi=O$CImjNDSMTcC2ubcGxMLM_M+LhB$8zyWHj>uac!f|~zC SSG%5;nw6gk@@Q$kdjA8V2MCz} literal 0 HcmV?d00001 diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard1.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard1.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard1.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard2.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Movie-shard2.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Movie-shard2.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard1.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard2.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard3.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard3.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard4.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-backfill-logical-shard4.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard1.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard1.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard1.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard2.jsonl similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/Users-cdc-shard2.jsonl rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/Users-cdc-shard2.jsonl diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-schema.sql similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-schema.sql rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-schema.sql diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-session.json b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-session.json similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/mysql-session.json rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/mysql-session.json diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/spanner-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/spanner-schema.sql similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/spanner-schema.sql rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/spanner-schema.sql diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard1.json b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard1.json similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard1.json rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard1.json diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard2.json b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard2.json similarity index 100% rename from v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationIT/transformation-context-shard2.json rename to v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithMigrationShardIdColumnIT/transformation-context-shard2.json diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl new file mode 100644 index 0000000000..52919efdfe --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard1.jsonl @@ -0,0 +1,3 @@ +{"uuid":"3a1cd9d6-33e5-4d24-bedf-6bbc00000000","read_timestamp":"2024-01-29T11:42:44.860Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L1_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":1,"name":"Tester1","age":1}} +{"uuid":"3a1cd9d6-33e5-4d24-bedf-6bbc00000001","read_timestamp":"2024-01-29T11:42:44.861Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L1_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":2,"name":"Tester2","age":2}} +{"uuid":"3a1cd9d6-33e5-4d24-bedf-6bbc00000010","read_timestamp":"2024-01-29T11:42:44.861Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L1_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":3,"name":"Tester3","age":3}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl new file mode 100644 index 0000000000..a2fabc868c --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard2.jsonl @@ -0,0 +1,3 @@ +{"uuid":"dfcc3068-25f2-4b97-854f-e88600000000","read_timestamp":"2024-01-29T11:42:45.075Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L2_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":4,"name":"Tester4","age":4}} +{"uuid":"dfcc3068-25f2-4b97-854f-e88600000001","read_timestamp":"2024-01-29T11:42:45.075Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L2_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":5,"name":"Tester5","age":5}} +{"uuid":"dfcc3068-25f2-4b97-854f-e88600000010","read_timestamp":"2024-01-29T11:42:45.075Z","source_timestamp":"2024-01-29T11:42:44.000Z","object":"S1L2_Users","read_method":"mysql-backfill-fulldump","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1706528564000,"",0],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"","log_position":0,"change_type":"INSERT","is_deleted":false},"payload":{"id":6,"name":"Tester6","age":6}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl new file mode 100644 index 0000000000..0db2ef9105 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard3.jsonl @@ -0,0 +1,3 @@ +{"uuid":"51d0ca59-733b-4603-8532-eaaf00000000","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:43:57.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1706528637000,"mysql-bin.000012",84440344],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84440344,"change_type":"INSERT","is_deleted":false},"payload":{"id":7,"name":"Tester7","age":7}} +{"uuid":"51d0ca59-733b-4603-8532-eaaf00000001","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:43:57.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1706528637000,"mysql-bin.000012",84440646],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84440646,"change_type":"INSERT","is_deleted":false},"payload":{"id":8,"name":"Tester8","age":8}} +{"uuid":"51d0ca59-733b-4603-8532-eaaf00000010","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:43:57.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1706528637000,"mysql-bin.000012",84440948],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84440948,"change_type":"INSERT","is_deleted":false},"payload":{"id":9,"name":"Tester9","age":9}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl new file mode 100644 index 0000000000..719f782299 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-backfill-logical-shard4.jsonl @@ -0,0 +1,3 @@ +{"uuid":"51d0ca59-733b-4603-8532-eaaf00000011","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:44:42.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1706528682000,"mysql-bin.000012",84441250],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84441250,"change_type":"INSERT","is_deleted":false},"payload":{"id":10,"name":"Tester10","age":10}} +{"uuid":"51d0ca59-733b-4603-8532-eaaf00000100","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:44:42.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1706528682000,"mysql-bin.000012",84441552],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84441552,"change_type":"INSERT","is_deleted":false},"payload":{"id":11,"name":"Tester11","age":11}} +{"uuid":"51d0ca59-733b-4603-8532-eaaf00000101","read_timestamp":"2024-01-29T11:44:45.293Z","source_timestamp":"2024-01-29T11:44:42.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1706528682000,"mysql-bin.000012",84441854],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000012","log_position":84441854,"change_type":"INSERT","is_deleted":false},"payload":{"id":12,"name":"Tester12","age":12}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl new file mode 100644 index 0000000000..cb50a3986d --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard1.jsonl @@ -0,0 +1,6 @@ +{"uuid":"1448336d-45a7-4d31-b4ab-9d0500000000","read_timestamp":"2024-02-06T08:17:59.481Z","source_timestamp":"2024-02-06T08:16:58.000Z","object":"S1L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1707207418000,"mysql-bin.000013",6185380],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6185380,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":1,"name":"Tester1","age":20}} +{"uuid":"1448336d-45a7-4d31-b4ab-9d0500000001","read_timestamp":"2024-02-06T08:17:59.481Z","source_timestamp":"2024-02-06T08:16:58.000Z","object":"S1L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1707207418000,"mysql-bin.000013",6185683],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6185683,"change_type":"INSERT","is_deleted":false},"payload":{"id":13,"name":"Tester13","age":13}} +{"uuid":"1448336d-45a7-4d31-b4ab-9d0500000010","read_timestamp":"2024-02-06T08:17:59.481Z","source_timestamp":"2024-02-06T08:16:58.000Z","object":"S1L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"fb57c3235f48c2f937c1652c56d3d961cdb20c1a","sort_keys":[1707207418000,"mysql-bin.000013",6185985],"source_metadata":{"table":"Users","database":"S1L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6185985,"change_type":"DELETE","is_deleted":true},"payload":{"id":3,"name":"Tester3","age":3}} +{"uuid":"caa80302-374f-40d8-b98b-c9f900000000","read_timestamp":"2024-02-06T08:19:06.299Z","source_timestamp":"2024-02-06T08:18:28.000Z","object":"S1L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1707207508000,"mysql-bin.000013",6186956],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6186956,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":1,"name":"Tester1","age":21}} +{"uuid":"caa80302-374f-40d8-b98b-c9f900000010","read_timestamp":"2024-02-06T08:19:06.299Z","source_timestamp":"2024-02-06T08:18:28.000Z","object":"S1L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1707207508000,"mysql-bin.000013",6187561],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6187561,"change_type":"DELETE","is_deleted":true},"payload":{"id":2,"name":"Tester2","age":2}} +{"uuid":"caa80302-374f-40d8-b98b-c9f900000001","read_timestamp":"2024-02-06T08:19:06.299Z","source_timestamp":"2024-02-06T08:18:28.000Z","object":"S1L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"13975412a0becf3862a77ff073f06f6ea9ed484e","sort_keys":[1707207508000,"mysql-bin.000013",6187259],"source_metadata":{"table":"Users","database":"S1L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6187259,"change_type":"INSERT","is_deleted":false},"payload":{"id":14,"name":"Tester14","age":14}} diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl new file mode 100644 index 0000000000..190ea30fa2 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/Users-cdc-shard2.jsonl @@ -0,0 +1,6 @@ +{"uuid":"d65ee5e2-bc34-420f-b926-261700000000","read_timestamp":"2024-02-06T08:20:16.811Z","source_timestamp":"2024-02-06T08:19:09.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1707207549000,"mysql-bin.000013",6187895],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6187895,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":6,"name":"Tester6","age":22}} +{"uuid":"d65ee5e2-bc34-420f-b926-261700000001","read_timestamp":"2024-02-06T08:20:16.811Z","source_timestamp":"2024-02-06T08:19:09.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1707207549000,"mysql-bin.000013",6188198],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6188198,"change_type":"INSERT","is_deleted":false},"payload":{"id":15,"name":"Tester15","age":15}} +{"uuid":"d65ee5e2-bc34-420f-b926-261700000010","read_timestamp":"2024-02-06T08:20:16.811Z","source_timestamp":"2024-02-06T08:19:09.000Z","object":"S2L1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"0bbf67ee21c003b35317d1759a865323a4a2d2cc","sort_keys":[1707207549000,"mysql-bin.000013",6188500],"source_metadata":{"table":"Users","database":"S2L1","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6188500,"change_type":"DELETE","is_deleted":true},"payload":{"id":4,"name":"Tester4","age":4}} +{"uuid":"6dd400e4-2772-42cc-b6b5-4bf700000000","read_timestamp":"2024-02-06T08:21:27.083Z","source_timestamp":"2024-02-06T08:20:38.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1707207638000,"mysql-bin.000013",6188834],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6188834,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":5,"name":"Tester5","age":23}} +{"uuid":"6dd400e4-2772-42cc-b6b5-4bf700000001","read_timestamp":"2024-02-06T08:21:27.083Z","source_timestamp":"2024-02-06T08:20:38.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1707207638000,"mysql-bin.000013",6189137],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6189137,"change_type":"INSERT","is_deleted":false},"payload":{"id":16,"name":"Tester16","age":16}} +{"uuid":"6dd400e4-2772-42cc-b6b5-4bf700000010","read_timestamp":"2024-02-06T08:21:27.083Z","source_timestamp":"2024-02-06T08:20:38.000Z","object":"S2L2_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"28072440e6e327bb20c726f7b475fdaf6f867e07","sort_keys":[1707207638000,"mysql-bin.000013",6189439],"source_metadata":{"table":"Users","database":"S2L2","primary_keys":["id"],"log_file":"mysql-bin.000013","log_position":6189439,"change_type":"DELETE","is_deleted":true},"payload":{"id":16,"name":"Tester16","age":16}} diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql new file mode 100644 index 0000000000..54d3d18d50 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/mysql-schema.sql @@ -0,0 +1,6 @@ +CREATE TABLE `Users` ( + `id` int NOT NULL, -- To: id INT64 + `name` varchar(200), -- To: name STRING(200) + `age` bigint, -- To: age_spanner INT64 Column name renamed + PRIMARY KEY (`id`) +); diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql new file mode 100644 index 0000000000..ceeeb2114e --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerShardedMigrationWithoutMigrationShardIdColumnIT/spanner-schema.sql @@ -0,0 +1,5 @@ +CREATE TABLE IF NOT EXISTS Users ( + id INT64 NOT NULL, + name STRING(200), + age INT64, +) PRIMARY KEY (id); diff --git a/v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt b/v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt deleted file mode 100644 index 3258e093fd..0000000000 --- a/v2/googlecloud-to-elasticsearch/src/main/resources/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apache-beam[gcp,aws, azure, dataframe]==2.56.0 \ No newline at end of file diff --git a/v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt b/v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt deleted file mode 100644 index 3258e093fd..0000000000 --- a/v2/googlecloud-to-googlecloud/src/main/resources/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apache-beam[gcp,aws, azure, dataframe]==2.56.0 \ No newline at end of file diff --git a/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java b/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java index 68f088913e..f2b8d82597 100644 --- a/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java +++ b/v2/googlecloud-to-neo4j/src/test/java/com/google/cloud/teleport/v2/neo4j/templates/ConstraintsIndicesIT.java @@ -38,6 +38,7 @@ import org.apache.beam.it.neo4j.conditions.Neo4jQueryCheck; import org.junit.After; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -241,6 +242,7 @@ public void canResetDatabase() throws Exception { @Category(TemplateIntegrationTest.class) @TemplateIntegrationTest(GoogleCloudToNeo4j.class) @RunWith(JUnit4.class) + @Ignore("Has known issues to be fixed in Beam 2.57") public static class Neo4j5EnterpriseIT extends ConstraintsIndicesIT { @Override diff --git a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java index febfc46e3c..81ef282f1d 100644 --- a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java +++ b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSplunk.java @@ -17,6 +17,7 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import com.google.cloud.teleport.metadata.MultiTemplate; import com.google.cloud.teleport.metadata.Template; import com.google.cloud.teleport.metadata.TemplateCategory; import com.google.cloud.teleport.metadata.TemplateParameter; @@ -28,7 +29,7 @@ import com.google.cloud.teleport.v2.transforms.CsvConverters.LineToFailsafeJson; import com.google.cloud.teleport.v2.transforms.CsvConverters.ReadCsv; import com.google.cloud.teleport.v2.transforms.ErrorConverters.LogErrors; -import com.google.cloud.teleport.v2.transforms.JavascriptTextTransformer.JavascriptTextTransformerOptions; +import com.google.cloud.teleport.v2.transforms.PythonExternalTextTransformer.PythonExternalTextTransformerOptions; import com.google.cloud.teleport.v2.transforms.SplunkConverters; import com.google.cloud.teleport.v2.transforms.SplunkConverters.FailsafeStringToSplunkEvent; import com.google.cloud.teleport.v2.transforms.SplunkConverters.SplunkOptions; @@ -37,6 +38,7 @@ import com.google.cloud.teleport.v2.values.FailsafeElement; import com.google.cloud.teleport.v2.values.SplunkTokenSource; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; import org.apache.beam.repackaged.core.org.apache.commons.lang3.EnumUtils; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; @@ -70,21 +72,47 @@ * href="https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/v2/googlecloud-to-splunk/README_GCS_To_Splunk.md">README * for instructions on how to use or modify this template. */ -@Template( - name = "GCS_To_Splunk", - category = TemplateCategory.BATCH, - displayName = "Cloud Storage To Splunk", - description = { - "A pipeline that reads a set of Text (CSV) files in Cloud Storage and writes to Splunk's" - + " HTTP Event Collector (HEC).", - "The template creates the Splunk payload as a JSON element using either CSV headers (default), JSON schema or JavaScript UDF. " - + "If a Javascript UDF and JSON schema are both inputted as parameters, only the Javascript UDF will be executed." - }, - optionsClass = GCSToSplunkOptions.class, - skipOptions = {"javascriptTextTransformReloadIntervalMinutes"}, - flexContainerName = "gcs-to-splunk", - contactInformation = "https://cloud.google.com/support", - hidden = true) +@MultiTemplate({ + @Template( + name = "GCS_To_Splunk", + category = TemplateCategory.BATCH, + displayName = "Cloud Storage To Splunk", + description = { + "A pipeline that reads a set of Text (CSV) files in Cloud Storage and writes to Splunk's" + + " HTTP Event Collector (HEC).", + "The template creates the Splunk payload as a JSON element using either CSV headers (default), JSON schema or JavaScript UDF. " + + "If a Javascript UDF and JSON schema are both inputted as parameters, only the Javascript UDF will be executed." + }, + optionsClass = GCSToSplunkOptions.class, + skipOptions = { + "javascriptTextTransformReloadIntervalMinutes", + "pythonExternalTextTransformGcsPath", + "pythonExternalTextTransformFunctionName" + }, + flexContainerName = "gcs-to-splunk", + contactInformation = "https://cloud.google.com/support", + hidden = true), + @Template( + name = "GCS_To_Splunk_Xlang", + category = TemplateCategory.BATCH, + displayName = "Cloud Storage To Splunk with Python UDFs", + type = Template.TemplateType.XLANG, + description = { + "A pipeline that reads a set of Text (CSV) files in Cloud Storage and writes to Splunk's" + + " HTTP Event Collector (HEC).", + "The template creates the Splunk payload as a JSON element using either CSV headers (default), JSON schema or Python UDF. " + + "If a Python UDF and JSON schema are both inputted as parameters, only the Python UDF will be executed." + }, + optionsClass = GCSToSplunkOptions.class, + skipOptions = { + "javascriptTextTransformGcsPath", + "javascriptTextTransformFunctionName", + "javascriptTextTransformReloadIntervalMinutes" + }, + flexContainerName = "gcs-to-splunk-xlang", + contactInformation = "https://cloud.google.com/support", + hidden = true) +}) public final class GCSToSplunk { /** String/String Coder for FailsafeElement. */ @@ -127,7 +155,9 @@ public final class GCSToSplunk { * executor at the command-line. */ public interface GCSToSplunkOptions - extends CsvConverters.CsvPipelineOptions, SplunkOptions, JavascriptTextTransformerOptions { + extends CsvConverters.CsvPipelineOptions, + SplunkOptions, + PythonExternalTextTransformerOptions { @TemplateParameter.GcsWriteFolder( order = 1, @@ -208,16 +238,32 @@ static ReadCsv readFromCsv(GCSToSplunkOptions options) { } static LineToFailsafeJson convertToFailsafeAndMaybeApplyUdf(GCSToSplunkOptions options) { - return CsvConverters.LineToFailsafeJson.newBuilder() - .setDelimiter(options.getDelimiter()) - .setJavascriptUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()) - .setJavascriptUdfFunctionName(options.getJavascriptTextTransformFunctionName()) - .setJsonSchemaPath(options.getJsonSchemaPath()) - .setHeaderTag(CSV_HEADERS) - .setLineTag(CSV_LINES) - .setUdfOutputTag(UDF_OUT) - .setUdfDeadletterTag(UDF_ERROR_OUT) - .build(); + + boolean usePythonUdf = !Strings.isNullOrEmpty(options.getPythonExternalTextTransformGcsPath()); + boolean useJavascriptUdf = !Strings.isNullOrEmpty(options.getJavascriptTextTransformGcsPath()); + + if (usePythonUdf && useJavascriptUdf) { + throw new IllegalArgumentException( + "Either javascript or Python gcs path must be provided, but not both."); + } + CsvConverters.LineToFailsafeJson.Builder lineToFailsafeJsonBuilder = + CsvConverters.LineToFailsafeJson.newBuilder() + .setDelimiter(options.getDelimiter()) + .setJsonSchemaPath(options.getJsonSchemaPath()) + .setHeaderTag(CSV_HEADERS) + .setLineTag(CSV_LINES) + .setUdfOutputTag(UDF_OUT) + .setUdfDeadletterTag(UDF_ERROR_OUT); + if (usePythonUdf) { + lineToFailsafeJsonBuilder + .setPythonUdfFileSystemPath(options.getPythonExternalTextTransformGcsPath()) + .setPythonUdfFunctionName(options.getPythonExternalTextTransformFunctionName()); + } else { + lineToFailsafeJsonBuilder + .setJavascriptUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()) + .setJavascriptUdfFunctionName(options.getJavascriptTextTransformFunctionName()); + } + return lineToFailsafeJsonBuilder.build(); } static FailsafeStringToSplunkEvent convertToSplunkEvent() { diff --git a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java index 5cb62962fb..251cdbd7f6 100644 --- a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java +++ b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroDeserializer.java @@ -15,7 +15,6 @@ */ package com.google.cloud.teleport.v2.kafka.transforms; -import io.confluent.kafka.serializers.KafkaAvroDeserializer; import java.io.IOException; import org.apache.avro.Schema; import org.apache.avro.generic.GenericDatumReader; @@ -25,8 +24,9 @@ import org.apache.avro.io.DecoderFactory; import org.apache.kafka.common.errors.SerializationException; import org.apache.kafka.common.header.Headers; +import org.apache.kafka.common.serialization.Deserializer; -public class BinaryAvroDeserializer extends KafkaAvroDeserializer { +public class BinaryAvroDeserializer implements Deserializer { private Schema schema; public BinaryAvroDeserializer() {} @@ -35,7 +35,13 @@ public BinaryAvroDeserializer(Schema schema) { this.schema = schema; } + @Override public GenericRecord deserialize(String topic, Headers header, byte[] bytes) { + return deserialize(topic, bytes); + } + + @Override + public GenericRecord deserialize(String topic, byte[] bytes) { try { Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null); DatumReader reader = new GenericDatumReader(this.schema); diff --git a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java index 307e5f1363..44924e5e49 100644 --- a/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java +++ b/v2/kafka-common/src/main/java/com/google/cloud/teleport/v2/kafka/transforms/BinaryAvroSerializer.java @@ -15,7 +15,6 @@ */ package com.google.cloud.teleport.v2.kafka.transforms; -import io.confluent.kafka.serializers.KafkaAvroSerializer; import java.io.ByteArrayOutputStream; import java.io.IOException; import org.apache.avro.Schema; @@ -25,8 +24,10 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.kafka.common.errors.SerializationException; +import org.apache.kafka.common.header.Headers; +import org.apache.kafka.common.serialization.Serializer; -public class BinaryAvroSerializer extends KafkaAvroSerializer { +public class BinaryAvroSerializer implements Serializer { private Schema schema; public BinaryAvroSerializer() {} @@ -35,7 +36,13 @@ public BinaryAvroSerializer(Schema schema) { this.schema = schema; } - public byte[] serialize(String subject, GenericRecord record) { + @Override + public byte[] serialize(String subject, Headers headers, GenericRecord record) { + return serialize(subject, record); + } + + @Override + public byte[] serialize(String topic, GenericRecord record) { try { ByteArrayOutputStream out = new ByteArrayOutputStream(); Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java index d6bbd251b5..bcc2fccda0 100644 --- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java +++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlex.java @@ -122,9 +122,6 @@ public class KafkaToBigQueryFlex { public static final TupleTag, String>> TRANSFORM_DEADLETTER_OUT = new TupleTag, String>>() {}; - /** The default suffix for error tables if dead letter table is not specified. */ - private static final String DEFAULT_DEADLETTER_TABLE_SUFFIX = "_error_records"; - /** String/String Coder for FailsafeElement. */ private static final FailsafeElementCoder FAILSAFE_ELEMENT_CODER = FailsafeElementCoder.of( @@ -253,8 +250,7 @@ public static PipelineResult runAvroPipeline( if (options.getAvroFormat().equals("NON_WIRE_FORMAT") && options.getAvroSchemaPath() != null) { - throw new UnsupportedOperationException("Only Confluent Wire Format is supported"); - // writeResult = kafkaRecords.apply(AvroTransform.of(options)); + writeResult = kafkaRecords.apply(AvroTransform.of(options)); } else { @@ -382,10 +378,7 @@ public static PipelineResult runJsonPipeline( failedInserts.apply( "WriteInsertionFailedRecords", ErrorConverters.WriteStringMessageErrors.newBuilder() - .setErrorRecordsTable( - ObjectUtils.firstNonNull( - options.getOutputDeadletterTable(), - options.getOutputTableSpec() + DEFAULT_DEADLETTER_TABLE_SUFFIX)) + .setErrorRecordsTable(ObjectUtils.firstNonNull(options.getOutputDeadletterTable())) .setErrorRecordsTableSchema(SchemaUtils.DEADLETTER_SCHEMA) .build()); } else { diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java index 43bd63273a..1f6752f353 100644 --- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java +++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/AvroTransform.java @@ -119,7 +119,8 @@ private static class KafkaRecordToGenericRecordFailsafeElementFn KafkaRecord, FailsafeElement, GenericRecord>> implements Serializable { - private transient KafkaAvroDeserializer deserializer; + private transient KafkaAvroDeserializer kafkaDeserializer; + private transient BinaryAvroDeserializer binaryDeserializer; private transient SchemaRegistryClient schemaRegistryClient; private Schema schema = null; private String topicName; @@ -135,12 +136,12 @@ private static class KafkaRecordToGenericRecordFailsafeElementFn @Setup public void setup() throws IOException, RestClientException { if (this.schema != null && this.useConfluentWireFormat.equals("NON_WIRE_FORMAT")) { - this.deserializer = new BinaryAvroDeserializer(this.schema); + this.binaryDeserializer = new BinaryAvroDeserializer(this.schema); } else if (this.schema != null && this.useConfluentWireFormat.equals("CONFLUENT_WIRE_FORMAT")) { this.schemaRegistryClient = new MockSchemaRegistryClient(); this.schemaRegistryClient.register(this.topicName, this.schema, 1, 1); - this.deserializer = new KafkaAvroDeserializer(schemaRegistryClient); + this.kafkaDeserializer = new KafkaAvroDeserializer(schemaRegistryClient); } else { throw new IllegalArgumentException( "An Avro schema is needed in order to deserialize values."); @@ -153,10 +154,16 @@ public void processElement(ProcessContext context) { GenericRecord result = null; try { // Serialize to Generic Record - result = - (GenericRecord) - this.deserializer.deserialize( - element.getTopic(), element.getHeaders(), element.getKV().getValue()); + if (this.useConfluentWireFormat.equals("NON_WIRE_FORMAT")) { + result = + this.binaryDeserializer.deserialize( + element.getTopic(), element.getHeaders(), element.getKV().getValue()); + } else { + result = + (GenericRecord) + this.kafkaDeserializer.deserialize( + element.getTopic(), element.getHeaders(), element.getKV().getValue()); + } } catch (Exception e) { LOG.error("Failed during deserialization: " + e.toString()); } diff --git a/v2/kafka-to-gcs/pom.xml b/v2/kafka-to-gcs/pom.xml index c460feb5fb..956cafb7aa 100644 --- a/v2/kafka-to-gcs/pom.xml +++ b/v2/kafka-to-gcs/pom.xml @@ -30,6 +30,17 @@ kafka-common ${project.version} + + com.google.cloud.teleport + it-google-cloud-platform + ${project.version} + test + + + org.apache.beam + beam-it-kafka + test + diff --git a/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java b/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java index 4130cabb26..30e4a33710 100644 --- a/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java +++ b/v2/kafka-to-gcs/src/main/java/com/google/cloud/teleport/v2/transforms/AvroWriteTransform.java @@ -266,6 +266,8 @@ public String getFilename( String subDir = avroDestination.name; return subDir + "/" + + outputFilenamePrefix() + + "_" + defaultNaming.getFilename(window, pane, numShards, shardIndex, compression); } } diff --git a/v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java b/v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java new file mode 100644 index 0000000000..f507c87eca --- /dev/null +++ b/v2/kafka-to-gcs/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToGcsIT.java @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.templates; + +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.teleport.metadata.TemplateIntegrationTest; +import com.google.common.io.Resources; +import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; +import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException; +import io.confluent.kafka.serializers.KafkaAvroSerializer; +import java.io.IOException; +import java.net.URL; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; +import java.util.regex.Pattern; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; +import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; +import org.apache.beam.it.common.PipelineOperator.Result; +import org.apache.beam.it.common.TestProperties; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.gcp.TemplateTestBase; +import org.apache.beam.it.gcp.artifacts.Artifact; +import org.apache.beam.it.kafka.KafkaResourceManager; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.serialization.StringSerializer; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Integration test for {@link KafkaToGcs2} (Kafka_to_GCS_2). */ +@Category(TemplateIntegrationTest.class) +@TemplateIntegrationTest(KafkaToGcs2.class) +@RunWith(JUnit4.class) +public class KafkaToGcsIT extends TemplateTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(KafkaToGcsIT.class); + + private KafkaResourceManager kafkaResourceManager; + private Schema avroSchema; + + @Before + public void setup() throws IOException { + kafkaResourceManager = + KafkaResourceManager.builder(testName).setHost(TestProperties.hostIp()).build(); + + URL avroschemaResource = Resources.getResource("KafkaToGcsIT/avro_schema.avsc"); + gcsClient.uploadArtifact("avro_schema.avsc", avroschemaResource.getPath()); + avroSchema = new Schema.Parser().parse(avroschemaResource.openStream()); + } + + @After + public void tearDown() { + ResourceManagerUtils.cleanResources(kafkaResourceManager); + } + + @Test + public void testKafkaToGcsText() throws IOException, RestClientException { + baseKafkaToGcs(b -> b.addParameter("outputFileFormat", "TEXT")); + } + + @Test + public void testKafkaToGcsAvro() throws IOException, RestClientException { + baseKafkaToGcs(b -> b.addParameter("outputFileFormat", "AVRO")); + } + + private void baseKafkaToGcs(Function paramsAdder) + throws IOException, RestClientException { + + // Arrange + String topicName = kafkaResourceManager.createTopic(testName, 5); + + LaunchConfig.Builder options = + paramsAdder.apply( + LaunchConfig.builder(testName, specPath) + .addParameter( + "bootstrapServers", + kafkaResourceManager.getBootstrapServers().replace("PLAINTEXT://", "")) + .addParameter("inputTopics", topicName) + .addParameter("windowDuration", "10s") + .addParameter("schemaPath", getGcsPath("avro_schema.avsc")) + .addParameter("offset", "earliest") + .addParameter("outputDirectory", getGcsPath(testName)) + .addParameter("outputFilenamePrefix", testName + "-") + .addParameter("numShards", "2")); + + // Act + LaunchInfo info = launchTemplate(options); + assertThatPipeline(info).isRunning(); + + MockSchemaRegistryClient registryClient = new MockSchemaRegistryClient(); + registryClient.register(topicName + "-value", avroSchema, 1, 1); + + KafkaProducer kafkaProducer = + kafkaResourceManager.buildProducer( + new StringSerializer(), new KafkaAvroSerializer(registryClient)); + + for (int i = 1; i <= 10; i++) { + GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0); + publish(kafkaProducer, topicName, i + "1", dataflow); + + GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0); + publish(kafkaProducer, topicName, i + "2", pubsub); + + GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0); + publish(kafkaProducer, topicName, i + "3", invalid); + + try { + TimeUnit.SECONDS.sleep(3); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + AtomicReference> artifacts = new AtomicReference<>(); + Pattern expectedFilePattern = Pattern.compile(".*" + testName + "-.*"); + + Result result = + pipelineOperator() + .waitForConditionAndFinish( + createConfig(info), + () -> { + artifacts.set(gcsClient.listArtifacts(testName, expectedFilePattern)); + return !artifacts.get().isEmpty(); + }); + + // Assert + assertThatResult(result).meetsConditions(); + } + + private void publish( + KafkaProducer producer, String topicName, String key, GenericRecord value) { + try { + RecordMetadata recordMetadata = + producer.send(new ProducerRecord<>(topicName, key, value)).get(); + LOG.info( + "Published record {}, partition {} - offset: {}", + recordMetadata.topic(), + recordMetadata.partition(), + recordMetadata.offset()); + } catch (Exception e) { + throw new RuntimeException("Error publishing record to Kafka", e); + } + } + + private GenericRecord createRecord(int id, String productName, double value) { + return new GenericRecordBuilder(avroSchema) + .set("productId", id) + .set("productName", productName) + .build(); + } +} diff --git a/v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc b/v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc new file mode 100644 index 0000000000..d907a7f17a --- /dev/null +++ b/v2/kafka-to-gcs/src/test/resources/KafkaToGcsIT/avro_schema.avsc @@ -0,0 +1,15 @@ +{ + "type": "record", + "namespace": "org.example.avro", + "name": "AvroProductKafkaRecord", + "fields": [ + { + "name": "productId", + "type": "int" + }, + { + "name": "productName", + "type": "string" + } + ] +} \ No newline at end of file diff --git a/v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt b/v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt deleted file mode 100644 index 3258e093fd..0000000000 --- a/v2/pubsub-binary-to-bigquery/src/main/resources/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apache-beam[gcp,aws, azure, dataframe]==2.56.0 \ No newline at end of file diff --git a/v2/pubsub-to-mongodb/src/main/resources/requirements.txt b/v2/pubsub-to-mongodb/src/main/resources/requirements.txt deleted file mode 100644 index d761175ee9..0000000000 --- a/v2/pubsub-to-mongodb/src/main/resources/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apache-beam[gcp,dataframe,azure,aws]==2.55.1 \ No newline at end of file diff --git a/v2/sourcedb-to-spanner/pom.xml b/v2/sourcedb-to-spanner/pom.xml index c22f3029d5..2fa55834ea 100644 --- a/v2/sourcedb-to-spanner/pom.xml +++ b/v2/sourcedb-to-spanner/pom.xml @@ -48,6 +48,17 @@ com.google.cloud google-cloud-core + + org.syncope.identityconnectors + framework + 0.4.3 + + + org.syncope.identityconnectors + framework-internal + 0.4.3 + runtime + diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java new file mode 100644 index 0000000000..5d9b457775 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilder.java @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.options; + +import static com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig.builderWithMySqlDefaults; + +import com.google.cloud.teleport.v2.source.reader.auth.dbauth.LocalCredentialsProvider; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.TableConfig; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchemaReference; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +public final class OptionsToConfigBuilder { + + public static final class MySql { + + public static JdbcIOWrapperConfig configWithMySqlDefaultsFromOptions( + SourceDbToSpannerOptions options) { + JdbcIOWrapperConfig.Builder builder = builderWithMySqlDefaults(); + builder = + builder + .setSourceHost(options.getSourceHost()) + .setSourcePort(options.getSourcePort()) + .setSourceSchemaReference( + SourceSchemaReference.builder().setDbName(options.getSourceDB()).build()) + .setDbAuth( + LocalCredentialsProvider.builder() + .setUserName(options.getUsername()) + .setPassword(options.getPassword()) + .build()) + .setJdbcDriverClassName(options.getJdbcDriverClassName()) + .setJdbcDriverJars(options.getJdbcDriverJars()) + .setShardID("Unsupported"); /*TODO: Support Sharded Migration */ + if (options.getSourceConnectionProperties() != "") { + builder = builder.setConnectionProperties(options.getSourceConnectionProperties()); + } + if (options.getMaxConnections() != 0) { + builder.setMaxConnections((long) options.getMaxConnections()); + } + if (options.getReconnectsEnabled()) { + builder.setAutoReconnect(true); + if (options.getReconnectAttempts() != 0) { + builder.setReconnectAttempts((long) options.getReconnectAttempts()); + } + } + ImmutableMap tablesWithPartitionColumns = + getTablesWithPartitionColumn(options); + ImmutableList tableConfigs = + tablesWithPartitionColumns.entrySet().stream() + .map( + entry -> { + TableConfig.Builder configBuilder = + TableConfig.builder(entry.getKey()).withPartitionColum(entry.getValue()); + if (options.getNumPartitions() != 0) { + configBuilder = configBuilder.setMaxPartitions(options.getNumPartitions()); + } + if (options.getFetchSize() != 0) { + configBuilder = configBuilder.setMaxFetchSize(options.getFetchSize()); + } + return configBuilder.build(); + }) + .collect(ImmutableList.toImmutableList()); + builder = builder.setTableConfigs(tableConfigs); + return builder.build(); + } + } + + private static ImmutableMap getTablesWithPartitionColumn( + SourceDbToSpannerOptions options) { + String[] tables = options.getTables().split(","); + String[] partitionColumns = options.getPartitionColumns().split(","); + if (tables.length != partitionColumns.length) { + throw new RuntimeException( + "invalid configuration. Partition column count does not match " + "tables count."); + } + ImmutableMap.Builder tableWithPartitionColumnBuilder = ImmutableMap.builder(); + for (int i = 0; i < tables.length; i++) { + tableWithPartitionColumnBuilder.put(tables[i], partitionColumns[i]); + } + return tableWithPartitionColumnBuilder.build(); + } + + private OptionsToConfigBuilder() {} +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java index b2b24e4506..f348eafc80 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java @@ -29,6 +29,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { description = "Comma-separated Cloud Storage path(s) of the JDBC driver(s)", helpText = "The comma-separated list of driver JAR files.", example = "gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar") + @Default.String("") String getJdbcDriverJars(); void setJdbcDriverJars(String driverJar); @@ -47,23 +48,39 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { @TemplateParameter.Text( order = 3, - optional = true, - regexes = { - "(^jdbc:[a-zA-Z0-9/:@.?_+!*=&-;]+$)|(^([A-Za-z0-9+/]{4}){1,}([A-Za-z0-9+/]{0,3})={0,3})" - }, + regexes = {"(^jdbc:[a-zA-Z0-9/:@.]+$)"}, groupName = "Source", - description = "Connection URL to connect to the source database.", - helpText = - "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. Can" - + " be passed in as a string that's Base64-encoded and then encrypted with a Cloud" - + " KMS key. Currently supported sources: MySQL", - example = "jdbc:mysql://some-host:3306/sampledb") - String getSourceConnectionURL(); + description = + "Connection URL to connect to the source database host. Port number and connection properties must be supplied separately.", + helpText = "The JDBC connection URL string. For example, `jdbc:mysql://some-host`.") + String getSourceHost(); - void setSourceConnectionURL(String connectionURL); + void setSourceHost(String host); @TemplateParameter.Text( order = 4, + optional = false, + regexes = {"(^[0-9]+$)"}, + groupName = "Source", + description = "Port number of source database.", + helpText = "Port Number of Source Database. For example, `3306`.") + String getSourcePort(); + + void setSourcePort(String port); + + /* TODO: (support Sharding, PG namespaces) */ + @TemplateParameter.Text( + order = 5, + regexes = {"(^[a-zA-Z0-9]+$)"}, + groupName = "Source", + description = "source database name.", + helpText = "Name of the Source Database. For example, `person9`.") + String getSourceDB(); + + void setSourceDB(String db); + + @TemplateParameter.Text( + order = 6, optional = true, regexes = {"^[a-zA-Z0-9_;!*&=@#-:\\/]+$"}, groupName = "Source", @@ -72,12 +89,13 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { "Properties string to use for the JDBC connection. Format of the string must be" + " [propertyName=property;]*.", example = "unicode=true;characterEncoding=UTF-8") + @Default.String("") String getSourceConnectionProperties(); void setSourceConnectionProperties(String connectionProperties); @TemplateParameter.Text( - order = 5, + order = 7, optional = true, regexes = {"^.+$"}, groupName = "Source", @@ -85,24 +103,26 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { helpText = "The username to be used for the JDBC connection. Can be passed in as a Base64-encoded" + " string encrypted with a Cloud KMS key.") + @Default.String("") String getUsername(); void setUsername(String username); @TemplateParameter.Password( - order = 6, + order = 8, optional = true, groupName = "Source", description = "JDBC connection password.", helpText = "The password to be used for the JDBC connection. Can be passed in as a Base64-encoded" + " string encrypted with a Cloud KMS key.") + @Default.String("") String getPassword(); void setPassword(String password); @TemplateParameter.Text( - order = 7, + order = 9, optional = true, groupName = "Source Parameters", description = "The name of a column of numeric type that will be used for partitioning.", @@ -116,7 +136,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setPartitionColumns(String partitionColumns); @TemplateParameter.Text( - order = 8, + order = 10, optional = true, groupName = "Source Parameters", description = "Comma-separated names of the tables in the source database.", @@ -125,8 +145,9 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setTables(String table); + /* TODO(pipelineController) allow per table NumPartitions. */ @TemplateParameter.Integer( - order = 9, + order = 11, optional = true, groupName = "Source", description = "The number of partitions.", @@ -134,12 +155,25 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { "The number of partitions. This, along with the lower and upper bound, form partitions" + " strides for generated WHERE clause expressions used to split the partition column" + " evenly. When the input is less than 1, the number is set to 1.") + @Default.Integer(0) /* Use Auto Inference */ Integer getNumPartitions(); - void setNumPartitions(Integer numPartitions); + void setNumPartitions(Integer value); + + /* TODO(pipelineController) allow per table FetchSize. */ + @TemplateParameter.Integer( + order = 12, + optional = true, + groupName = "Source", + description = "Table Read Fetch Size.", + helpText = "The fetch size of a single table read.") + @Default.Integer(0) /* Use Beam Default */ + Integer getFetchSize(); + + void setFetchSize(Integer numPartitions); @TemplateParameter.Text( - order = 10, + order = 13, description = "Cloud Spanner Instance Id.", helpText = "The destination Cloud Spanner instance.") String getInstanceId(); @@ -147,7 +181,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setInstanceId(String value); @TemplateParameter.Text( - order = 11, + order = 14, description = "Cloud Spanner Database Id.", helpText = "The destination Cloud Spanner database.") String getDatabaseId(); @@ -155,7 +189,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setDatabaseId(String value); @TemplateParameter.ProjectId( - order = 12, + order = 15, description = "Cloud Spanner Project Id.", helpText = "This is the name of the Cloud Spanner project.") String getProjectId(); @@ -163,7 +197,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setProjectId(String projectId); @TemplateParameter.Text( - order = 13, + order = 16, optional = true, description = "Cloud Spanner Endpoint to call", helpText = "The Cloud Spanner endpoint to call in the template.", @@ -174,7 +208,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setSpannerHost(String value); @TemplateParameter.Text( - order = 14, + order = 17, optional = true, description = "Source database columns to ignore", helpText = @@ -185,14 +219,48 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setIgnoreColumns(String value); @TemplateParameter.Text( - order = 15, + order = 18, optional = true, description = "Maximum number of connections to Source database per worker", helpText = - "Configures the JDBC connection pool on each worker with maximum number of connections. Use a negative number for no limit. Default value is 100.", + "Configures the JDBC connection pool on each worker with maximum number of connections. Use a negative number for no limit.", example = "-1") - @Default.Integer(100) + @Default.Integer(0) // Take Dialect Specific default in the wrapper Integer getMaxConnections(); void setMaxConnections(Integer value); + + @TemplateParameter.Text( + order = 19, + optional = true, + description = "enable connection reconnects", + helpText = "Enables the JDBC connection reconnects.", + example = "10") + @Default.Boolean(true) // Take Dialect Specific default in the wrapper. + Boolean getReconnectsEnabled(); + + void setReconnectsEnabled(Boolean value); + + @TemplateParameter.Text( + order = 20, + optional = true, + description = "Maximum number of connection reconnect attempts, if reconnects are enabled", + helpText = "Configures the JDBC connection reconnect attempts.", + example = "10") + @Default.Integer(0) // Take Dialect Specific default in the wrapper. + Integer getReconnectAttempts(); + + void setReconnectAttempts(Integer value); + + @TemplateParameter.GcsReadFile( + order = 21, + optional = true, + description = + "Session File Path in Cloud Storage, to provide mapping information in the form of a session file", + helpText = + "Session file path in Cloud Storage that contains mapping information from" + + " Spanner Migration Tool") + String getSessionFilePath(); + + void setSessionFilePath(String value); } diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java deleted file mode 100644 index 177da15ce5..0000000000 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/DataSourceProvider.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.v2.source; - -import static com.google.cloud.teleport.v2.utils.KMSUtils.maybeDecrypt; - -import com.google.cloud.teleport.v2.options.SourceDbToSpannerOptions; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.VarHandle; -import javax.sql.DataSource; -import org.apache.beam.sdk.io.jdbc.JdbcIO; -import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class DataSourceProvider - implements org.apache.beam.sdk.transforms.SerializableFunction { - - private static final Logger LOG = LoggerFactory.getLogger(DataSourceProvider.class); - private static volatile DataSource dataSource = null; - // VarHandle provides a strongly typed reference to @dataSource - // and supports various atomic access modes like the acquire-release semantics. - private static final VarHandle DATA_SOURCE; - private final JdbcIO.DataSourceConfiguration config; - - // Bind DATA_SOURCE to dataSource - static { - try { - MethodHandles.Lookup lookup = MethodHandles.lookup(); - DATA_SOURCE = - lookup - .in(DataSourceProvider.class) - .findStaticVarHandle(DataSourceProvider.class, "dataSource", DataSource.class); - } catch (ReflectiveOperationException e) { - // Logger might not be initialized in static scope! - System.err.println("Error while binding VarHandle: " + e.toString()); - throw new ExceptionInInitializerError(e); - } - } - - /** - * Constructs an instance of DataSourceProvider. - * - * @param options Pipeline options. - */ - public DataSourceProvider(SourceDbToSpannerOptions options) { - config = getDataSourceConfiguration(options); - } - - /** - * Returns a Singleton {@link DataSource} after initializing it if necessary. - * - * @see Idomatic - * implementation of Double checked locking pattern. - */ - @Override - public DataSource apply(Void input) { - DataSource localRef = getDataSourceAcquire(); - if (localRef == null) { - synchronized (this) { - localRef = getDataSourceAcquire(); - if (localRef == null) { - localRef = JdbcIO.PoolableDataSourceProvider.of(config).apply(null); - setDataSourceRelease(localRef); - LOG.debug("initialize DataSource dataSource {}", localRef); - } - } - } - return localRef; - } - - /** - * Access {@link DataSourceProvider#dataSource} with memory_order_acquire memory - * ordering semantics. - * - *

The load operation of {@link DataSourceProvider#dataSource} to the returned value - * guarantees: - * - *

    - *
  1. No reads or writes in the current thread can be reordered before this load. - *
  2. All writes in other threads that release the same variable are visible in the current - * thread. - *
- * - * @see VarHandle - * @see - * Release-Acquire_ordering - */ - private DataSource getDataSourceAcquire() { - return (DataSource) DATA_SOURCE.getAcquire(); - } - - /** - * Set {@link DataSourceProvider#dataSource} with memory_order_release memory - * ordering semantics. - * - *

The store operation to {@link DataSourceProvider#dataSource} with this memory ordering - * guarantees: - * - *

    - *
  1. No reads or writes in the current thread can be reordered after this store. - *
  2. All writes in the current thread are visible in other threads that acquire the same - * variable. - *
- * - * @see VarHandle - * @see - * Release-Acquire_ordering - */ - private void setDataSourceRelease(DataSource value) { - DATA_SOURCE.setRelease(value); - } - - private static JdbcIO.DataSourceConfiguration getDataSourceConfiguration( - SourceDbToSpannerOptions options) { - var config = - JdbcIO.DataSourceConfiguration.create( - StaticValueProvider.of(options.getJdbcDriverClassName()), - maybeDecrypt(options.getSourceConnectionURL(), null)) - .withUsername(maybeDecrypt(options.getUsername(), null)) - .withPassword(maybeDecrypt(options.getPassword(), null)) - .withMaxConnections(options.getMaxConnections()); - - if (options.getSourceConnectionProperties() != null) { - config = config.withConnectionProperties(options.getSourceConnectionProperties()); - } - if (options.getJdbcDriverJars() != null) { - config = config.withDriverJars(options.getJdbcDriverJars()); - } - return config; - } -} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java new file mode 100644 index 0000000000..174d08c287 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/ReaderImpl.java @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader; + +import com.google.auto.value.AutoValue; +import com.google.cloud.teleport.v2.source.reader.io.IoWrapper; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema; +import com.google.cloud.teleport.v2.source.reader.io.transform.ReaderTransform; +import java.io.Serializable; + +@AutoValue +public abstract class ReaderImpl implements Reader, Serializable { + + abstract SourceSchema sourceSchema(); + + abstract ReaderTransform readerTransform(); + + public static ReaderImpl of(IoWrapper ioWrapper) { + SourceSchema sourceSchema = ioWrapper.discoverTableSchema(); + ReaderTransform.Builder readerTransformBuilder = ReaderTransform.builder(); + ioWrapper + .getTableReaders() + .entrySet() + .forEach(entry -> readerTransformBuilder.withTableReader(entry.getKey(), entry.getValue())); + + return ReaderImpl.create(sourceSchema, readerTransformBuilder.build()); + } + + @Override + public SourceSchema getSourceSchema() { + return this.sourceSchema(); + } + + @Override + public ReaderTransform getReaderTransform() { + return this.readerTransform(); + } + + static ReaderImpl create(SourceSchema sourceSchema, ReaderTransform readerTransform) { + return new AutoValue_ReaderImpl(sourceSchema, readerTransform); + } +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java new file mode 100644 index 0000000000..3c45ac0b3d --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/DbAuth.java @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.auth.dbauth; + +import java.io.Serializable; +import org.apache.beam.sdk.options.ValueProvider; + +/** + * Interface for Various ways of providing DB credentials to be provided to {@link + * org.apache.beam.sdk.io.jdbc.JdbcIO JdbcIO}. + */ +public interface DbAuth extends Serializable { + + /** + * Get Value provider for DB userName. + * + * @return the username to be provided to {@link org.apache.beam.sdk.io.jdbc.JdbcIO JdbcIO} + */ + ValueProvider getUserName(); + + /** + * Get Value provider for DB password. + * + * @return the password to be provided to {@link org.apache.beam.sdk.io.jdbc.JdbcIO JdbcIO} + */ + ValueProvider getPassword(); +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java new file mode 100644 index 0000000000..3071ec7e36 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProvider.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.auth.dbauth; + +import java.io.IOException; +import java.io.Serializable; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.beam.sdk.options.ValueProvider; +import org.checkerframework.checker.initialization.qual.Initialized; +import org.checkerframework.checker.nullness.qual.NonNull; +import org.checkerframework.checker.nullness.qual.UnknownKeyFor; +import org.identityconnectors.common.security.GuardedString; + +/** + * Utility Class to wrap the password in a {@link GuardedString}. Wrapping the password in {@link + * GuardedString} helps prevent accidental logging of the password from the reader code. {@link + * GuardedString} also zeros the string before it is freed. + */ +public final class GuardedStringValueProvider implements ValueProvider, Serializable { + private GuardedString guardedString; + + /** + * Creates a new Instance of {@link GuardedStringValueProvider}. + * + * @param value Value to guard + * @return created instance. + */ + public static GuardedStringValueProvider create(String value) { + return new GuardedStringValueProvider(new GuardedString(value.toCharArray())); + } + + /** + * Implementation {@link ValueProvider#get()}. + * + * @return the wrapped string. + */ + @Override + public String get() { + AtomicReference ret = new AtomicReference<>(""); + this.guardedString().access((clearChars) -> ret.set(new String(clearChars))); + return ret.get(); + } + + private GuardedString guardedString() { + return this.guardedString; + } + + private GuardedStringValueProvider(GuardedString guardedString) { + this.guardedString = guardedString; + } + + @Override + public @UnknownKeyFor @NonNull @Initialized boolean isAccessible() { + return true; + } + + private void writeObject(java.io.ObjectOutputStream out) throws IOException { + // TODO: wok on an encrypted version of this. + out.writeObject(this.get()); + } + + private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { + this.guardedString = new GuardedString(((String) in.readObject()).toCharArray()); + } + + private void readObjectNoData() {} +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java new file mode 100644 index 0000000000..e1aeff804a --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProvider.java @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.auth.dbauth; + +import com.google.auto.value.AutoValue; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; + +/** + * Implements the {@link DbAuth} interface for credentails provided to the dataflow job locally, + * like via input parameters as against through a secretManager url. + */ +@AutoValue +public abstract class LocalCredentialsProvider implements DbAuth { + abstract String userName(); + + abstract GuardedStringValueProvider password(); + + @Override + public ValueProvider getUserName() { + return StaticValueProvider.of(this.userName()); + } + + @Override + public ValueProvider getPassword() { + return password(); + } + + public static Builder builder() { + return new AutoValue_LocalCredentialsProvider.Builder(); + } + + @AutoValue.Builder + public abstract static class Builder { + + public abstract Builder setUserName(String value); + + abstract Builder setPassword(GuardedStringValueProvider value); + + public Builder setPassword(String password) { + return this.setPassword(GuardedStringValueProvider.create(password)); + } + + public abstract LocalCredentialsProvider build(); + } +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java new file mode 100644 index 0000000000..60e0709a3e --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/package-info.java @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2024 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +/** DB Authentication for Reader. */ +package com.google.cloud.teleport.v2.source.reader.auth.dbauth; diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java new file mode 100644 index 0000000000..d71c1afb94 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/auth/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2024 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** DB and Instance Authentication for Reader. */ +package com.google.cloud.teleport.v2.source.reader.auth; diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java new file mode 100644 index 0000000000..a4d90671d3 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/IoWrapper.java @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.io; + +import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference; +import com.google.common.collect.ImmutableMap; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; + +public interface IoWrapper { + ImmutableMap>> getTableReaders(); + + SourceSchema discoverTableSchema(); +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java index ff25a85d0b..8c714bf7ae 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java @@ -108,7 +108,7 @@ protected static String getSchemaDiscoveryQuery(SourceSchemaReference sourceSche .append(String.join(",", InformationSchemaCols.colList())) .append( String.format( - " FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = %s AND", + " FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = '%s' AND", sourceSchemaReference.dbName())) .append(" TABLE_NAME = ?") .toString(); diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java new file mode 100644 index 0000000000..df6587977b --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper; + +import com.google.cloud.teleport.v2.source.reader.io.IoWrapper; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.TableConfig; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcSourceRowMapper; +import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow; +import com.google.cloud.teleport.v2.source.reader.io.schema.SchemaDiscovery; +import com.google.cloud.teleport.v2.source.reader.io.schema.SchemaDiscoveryImpl; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableSchema; +import com.google.cloud.teleport.v2.spanner.migrations.schema.SourceColumnType; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import javax.sql.DataSource; +import org.apache.beam.sdk.io.jdbc.JdbcIO; +import org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration; +import org.apache.beam.sdk.io.jdbc.JdbcIO.ReadWithPartitions; +import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; +import org.checkerframework.checker.initialization.qual.Initialized; +import org.checkerframework.checker.nullness.qual.NonNull; +import org.checkerframework.checker.nullness.qual.UnknownKeyFor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class JdbcIoWrapper implements IoWrapper { + private final ImmutableMap>> + tableReaders; + private final SourceSchema sourceSchema; + + private static final Logger logger = LoggerFactory.getLogger(JdbcIoWrapper.class); + + public static JdbcIoWrapper of(JdbcIOWrapperConfig config) { + DataSourceConfiguration dataSourceConfiguration = getDataSourceConfiguration(config); + + SourceSchema sourceSchema = getSourceSchema(config, dataSourceConfiguration); + ImmutableMap>> tableReaders = + buildTableReaders(config, dataSourceConfiguration, sourceSchema); + return new JdbcIoWrapper(tableReaders, sourceSchema); + } + + @Override + public ImmutableMap>> + getTableReaders() { + return this.tableReaders; + } + + @Override + public SourceSchema discoverTableSchema() { + return this.sourceSchema; + } + + static ImmutableMap>> + buildTableReaders( + JdbcIOWrapperConfig config, + DataSourceConfiguration dataSourceConfiguration, + SourceSchema sourceSchema) { + return config.tableConfigs().stream() + .map( + tableConfig -> { + SourceTableSchema sourceTableSchema = + findSourceTableSchema(sourceSchema, tableConfig); + return Map.entry( + SourceTableReference.builder() + .setSourceSchemaReference(sourceSchema.schemaReference()) + .setSourceTableName(sourceTableSchema.tableName()) + .setSourceTableSchemaUUID(sourceTableSchema.tableSchemaUUID()) + .build(), + getJdbcIO(config, dataSourceConfiguration, tableConfig, sourceTableSchema)); + }) + .collect(ImmutableMap.toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + static SourceTableSchema findSourceTableSchema( + SourceSchema sourceSchema, TableConfig tableConfig) { + return sourceSchema.tableSchemas().stream() + .filter(schema -> schema.tableName().equals(tableConfig.tableName())) + .findFirst() + .orElseThrow(); + } + + static SourceSchema getSourceSchema( + JdbcIOWrapperConfig config, DataSourceConfiguration dataSourceConfiguration) { + SchemaDiscovery schemaDiscovery = + new SchemaDiscoveryImpl(config.dialectAdapter(), config.schemaDiscoveryBackOff()); + SourceSchema.Builder sourceSchemaBuilder = + SourceSchema.builder().setSchemaReference(config.sourceSchemaReference()); + DataSource dataSource = dataSourceConfiguration.buildDatasource(); + ImmutableList tables = + config.tableConfigs().stream() + .map(TableConfig::tableName) + .collect(ImmutableList.toImmutableList()); + ImmutableMap> tableSchemas = + schemaDiscovery.discoverTableSchema(dataSource, config.sourceSchemaReference(), tables); + tableSchemas.entrySet().stream() + .map( + tableEntry -> { + SourceTableSchema.Builder sourceTableSchemaBuilder = + SourceTableSchema.builder().setTableName(tableEntry.getKey()); + tableEntry + .getValue() + .entrySet() + .forEach( + colEntry -> + sourceTableSchemaBuilder.addSourceColumnNameToSourceColumnType( + colEntry.getKey(), colEntry.getValue())); + return sourceTableSchemaBuilder.build(); + }) + .forEach(sourceSchemaBuilder::addTableSchema); + return sourceSchemaBuilder.build(); + } + + private static PTransform> getJdbcIO( + JdbcIOWrapperConfig config, + DataSourceConfiguration dataSourceConfiguration, + TableConfig tableConfig, + SourceTableSchema sourceTableSchema) { + ReadWithPartitions jdbcIO = + JdbcIO.readWithPartitions() + .withTable(tableConfig.tableName()) + .withPartitionColumn(tableConfig.partitionColumns().get(0)) + .withDataSourceProviderFn(JdbcIO.PoolableDataSourceProvider.of(dataSourceConfiguration)) + .withRowMapper( + new JdbcSourceRowMapper(config.valueMappingsProvider(), sourceTableSchema)); + if (tableConfig.maxFetchSize() != null) { + jdbcIO = jdbcIO.withFetchSize(tableConfig.maxFetchSize()); + } + if (tableConfig.maxPartitions() != null) { + jdbcIO = jdbcIO.withNumPartitions(tableConfig.maxPartitions()); + } + return jdbcIO; + } + + private static DataSourceConfiguration getDataSourceConfiguration(JdbcIOWrapperConfig config) { + + DataSourceConfiguration dataSourceConfig = + JdbcIO.DataSourceConfiguration.create( + StaticValueProvider.of(config.jdbcDriverClassName()), + StaticValueProvider.of(getUrl(config))) + .withDriverJars(config.jdbcDriverJars()) + .withMaxConnections(Math.toIntExact(config.maxConnections())); + + if (!config.dbAuth().getUserName().get().isBlank()) { + dataSourceConfig = dataSourceConfig.withUsername(config.dbAuth().getUserName().get()); + } + if (!config.dbAuth().getPassword().get().isBlank()) { + dataSourceConfig = dataSourceConfig.withPassword(config.dbAuth().getPassword().get()); + } + return dataSourceConfig; + } + + private static String getUrl(JdbcIOWrapperConfig config) { + StringBuffer urlBuilder = + new StringBuffer() + .append(config.sourceHost()) + .append(":") + .append(config.sourcePort()) + .append("/") + .append(config.sourceSchemaReference().dbName()); + /* TODO: Handle PG Namespace */ + ImmutableList.Builder attributesBuilder = new ImmutableList.Builder<>(); + if (config.autoReconnect()) { + attributesBuilder + .add("autoReconnect=true") + .add("maxReconnects=" + config.reconnectAttempts()); + } + String attributes = String.join("&", attributesBuilder.build()); + if (!attributes.isBlank()) { + urlBuilder.append("?").append(attributes); + } + logger.debug("connection url is" + urlBuilder.toString()); + return urlBuilder.toString(); + } + + private JdbcIoWrapper( + ImmutableMap>> tableReaders, + SourceSchema sourceSchema) { + this.tableReaders = tableReaders; + this.sourceSchema = sourceSchema; + } +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java new file mode 100644 index 0000000000..e6291af82d --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/JdbcIOWrapperConfig.java @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config; + +import com.google.auto.value.AutoValue; +import com.google.cloud.teleport.v2.source.reader.auth.dbauth.DbAuth; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.DialectAdapter; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.defaults.MySqlConfigDefaults; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcValueMappingsProvider; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchemaReference; +import com.google.cloud.teleport.v2.source.reader.io.schema.typemapping.UnifiedTypeMapper.MapperType; +import com.google.common.collect.ImmutableList; +import org.apache.beam.sdk.util.FluentBackoff; + +/** + * Configuration for {@link + * com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.JdbcIoWrapper JdbcIoWrapper}. + */ +@AutoValue +public abstract class JdbcIOWrapperConfig { + + /** Source Endpoint. */ + public abstract String sourceHost(); + + /** Source Port. */ + public abstract String sourcePort(); + + /** {@link SourceSchemaReference}. */ + public abstract SourceSchemaReference sourceSchemaReference(); + + /** Table Configurations. */ + public abstract ImmutableList tableConfigs(); + + /** Shard ID. */ + public abstract String shardID(); + + /** DB credentials. */ + public abstract DbAuth dbAuth(); + + /* + * A comma-separated list of driver JAR files. (Example: + * "gs://bucket/driver_jar1.jar,gs://bucket/driver_jar2.jar") + */ + public abstract String jdbcDriverJars(); + + /* Name of the JDbc Driver Class. */ + public abstract String jdbcDriverClassName(); + + /** Schema Mapper Type, defaults to MySQl. */ + public abstract MapperType schemaMapperType(); + + /** Dialect Adapter. */ + public abstract DialectAdapter dialectAdapter(); + + /** Source Row Mapping Provider. */ + public abstract JdbcValueMappingsProvider valueMappingsProvider(); + + /* + * Properties string to use for the JDBC connection. + * Format of the string must be [propertyName=property;] + * Defaults to a vetted configuration based on benchmarking results. + * Example: + * "maxTotal=160;maxpoolsize=160;maxIdle=160;minIdle=160" + * + ";wait_timeout=57600" + * + ";interactive_timeout=57600" + * + ";idletimeout=3600" + * + ";maxwaittime=600_000" + * + ";maxWaitMillis=600_000" + * + ";maxConnLifetimeMillis=600_000" + * + + * ";testOnCreate=true;testOnBorrow=true;testOnReturn=true;testWhileIdle=true" + */ + public abstract String connectionProperties(); + + /** Auto Reconnect for dropped connections. */ + public abstract Boolean autoReconnect(); + + /** Reconnect Attempts for Auto Reconnect default 10. */ + public abstract Long reconnectAttempts(); + + /** Max Number of connections. */ + public abstract Long maxConnections(); + + /** BackOff Strategy for Schema Discovery retries. Defaults to {@link FluentBackoff#DEFAULT}. */ + public abstract FluentBackoff schemaDiscoveryBackOff(); + + public static Builder builderWithMySqlDefaults() { + return new AutoValue_JdbcIOWrapperConfig.Builder() + .setSchemaMapperType(MySqlConfigDefaults.DEFAULT_MYSQL_SCHEMA_MAPPER_TYPE) + .setDialectAdapter(MySqlConfigDefaults.DEFAULT_MYSQL_DIALECT_ADAPTER) + .setValueMappingsProvider(MySqlConfigDefaults.DEFAULT_MYSQL_VALUE_MAPPING_PROVIDER) + .setAutoReconnect(MySqlConfigDefaults.DEFAULT_MYSQL_AUTO_RECONNECT) + .setReconnectAttempts(MySqlConfigDefaults.DEFAULT_MYSQL_RECONNECT_ATTEMPTS) + .setConnectionProperties(MySqlConfigDefaults.DEFAULT_MYSQL_CONNECTION_PROPERTIES) + .setMaxConnections(MySqlConfigDefaults.DEFAULT_MYSQL_MAX_CONNECTIONS) + .setSchemaDiscoveryBackOff(MySqlConfigDefaults.DEFAULT_MYSQL_SCHEMA_DISCOVERY_BACKOFF); + } + + @AutoValue.Builder + public abstract static class Builder { + + public abstract Builder setSourceHost(String value); + + public abstract Builder setSourcePort(String value); + + public abstract Builder setSourceSchemaReference(SourceSchemaReference value); + + public abstract Builder setTableConfigs(ImmutableList value); + + public abstract Builder setShardID(String value); + + public abstract Builder setDbAuth(DbAuth value); + + public abstract Builder setSchemaMapperType(MapperType value); + + public abstract Builder setDialectAdapter(DialectAdapter value); + + public abstract Builder setValueMappingsProvider(JdbcValueMappingsProvider value); + + public abstract Builder setJdbcDriverJars(String value); + + public abstract Builder setJdbcDriverClassName(String value); + + public abstract Builder setConnectionProperties(String value); + + public abstract Builder setReconnectAttempts(Long value); + + public abstract Builder setAutoReconnect(Boolean value); + + public abstract Builder setSchemaDiscoveryBackOff(FluentBackoff value); + + public abstract Builder setMaxConnections(Long value); + + public abstract JdbcIOWrapperConfig build(); + } +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java new file mode 100644 index 0000000000..aa3f025fd1 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfig.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config; + +import com.google.auto.value.AutoValue; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import javax.annotation.Nullable; + +/** Table Configuration. */ +@AutoValue +public abstract class TableConfig { + + /** Name of the table. */ + public abstract String tableName(); + + /** + * Max number of read partitions. If not-null uses the user supplied maxPartitions, instead of + * auto-inference. defaults to null. + */ + @Nullable + public abstract Integer maxPartitions(); + + /** + * Configures the size of data read in db, per db read call. Defaults to beam's DEFAULT_FETCH_SIZE + * of 50_000. For manually fine-tuning this, take into account the read ahead buffer pool settings + * (innodb_read_ahead_threshold) and the worker memory. + */ + @Nullable + public abstract Integer maxFetchSize(); + + /** Partition Column. As of now only a single partition column is supported */ + public abstract ImmutableList partitionColumns(); + + public static Builder builder(String tableName) { + return new AutoValue_TableConfig.Builder() + .setTableName(tableName) + .setMaxPartitions(null) + .setMaxFetchSize(null); + } + + @AutoValue.Builder + public abstract static class Builder { + + abstract Builder setTableName(String value); + + public abstract Builder setMaxPartitions(Integer value); + + public abstract Builder setMaxFetchSize(Integer value); + + abstract ImmutableList.Builder partitionColumnsBuilder(); + + public Builder withPartitionColum(String column) { + this.partitionColumnsBuilder().add(column); + return this; + } + + abstract TableConfig autoBuild(); + + public TableConfig build() { + TableConfig tableConfig = this.autoBuild(); + Preconditions.checkState( + tableConfig.partitionColumns().size() == 1, + "A single partition column is required. Currently Partition Columns are not auto inferred and composite partition columns are not supported."); + return tableConfig; + } + } +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java new file mode 100644 index 0000000000..d61cd88cf5 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/MySqlConfigDefaults.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.defaults; + +import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.DialectAdapter; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.mysql.MysqlDialectAdapter; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.mysql.MysqlDialectAdapter.MySqlVersion; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcValueMappingsProvider; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.provider.MysqlJdbcValueMappings; +import com.google.cloud.teleport.v2.source.reader.io.schema.typemapping.UnifiedTypeMapper.MapperType; +import org.apache.beam.sdk.util.FluentBackoff; + +// TODO: Fine-tune the defaults based on benchmarking. + +/** + * MySql Default Configuration for {@link + * com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.JdbcIoWrapper JdbcIoWrapper}. + */ +public class MySqlConfigDefaults { + + public static final MapperType DEFAULT_MYSQL_SCHEMA_MAPPER_TYPE = MapperType.MYSQL; + public static final DialectAdapter DEFAULT_MYSQL_DIALECT_ADAPTER = + new MysqlDialectAdapter(MySqlVersion.DEFAULT); + public static final JdbcValueMappingsProvider DEFAULT_MYSQL_VALUE_MAPPING_PROVIDER = + new MysqlJdbcValueMappings(); + + public static final String DEFAULT_MYSQL_CONNECTION_PROPERTIES = + "maxTotal=160;maxpoolsize=160;maxIdle=160;minIdle=160" + + ";wait_timeout=57600" + + ";interactive_timeout=57600" + + ";idletimeout=3600" + + ";maxwaittime=600_000" + + ";maxWaitMillis=600_000" + + ";maxConnLifetimeMillis=600_000" + + ";testOnCreate=true;testOnBorrow=true;testOnReturn=true;testWhileIdle=true"; + + public static final Long DEFAULT_MYSQL_MAX_CONNECTIONS = 160L; + + public static final boolean DEFAULT_MYSQL_AUTO_RECONNECT = true; + + public static final long DEFAULT_MYSQL_RECONNECT_ATTEMPTS = 10L; + public static final FluentBackoff DEFAULT_MYSQL_SCHEMA_DISCOVERY_BACKOFF = FluentBackoff.DEFAULT; + + private MySqlConfigDefaults() {} +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java new file mode 100644 index 0000000000..a4583e01c2 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/defaults/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2024 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** Config Defaults for jdbc sources. */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.defaults; diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java new file mode 100644 index 0000000000..b598a31c7b --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2024 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** Config for JdbcIoWrapper. */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config; diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java new file mode 100644 index 0000000000..473e81c656 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2024 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** IoWrapper for jdbc sources. */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper; diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java index 766007fd26..f568f2d325 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMapper.java @@ -15,6 +15,7 @@ */ package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper; +import java.io.Serializable; import java.sql.ResultSet; import java.sql.SQLException; import org.apache.avro.Schema; @@ -25,7 +26,7 @@ * * @param */ -public class JdbcValueMapper { +public class JdbcValueMapper implements Serializable { private ResultSetValueExtractor valueExtractor; private ResultSetValueMapper valueMapper; diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java index eee89df914..8dbf46b0b5 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcValueMappingsProvider.java @@ -16,12 +16,13 @@ package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper; import com.google.common.collect.ImmutableMap; +import java.io.Serializable; /** * An interface to be implemented for various jdbc source types to get the {@link JdbcValueMapper} * for various source types. */ -public interface JdbcValueMappingsProvider { +public interface JdbcValueMappingsProvider extends Serializable { /** * Get Mapping of source types to {@link JdbcValueMapper}. diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java index 82fc9d6e7e..99c7771887 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueExtractor.java @@ -15,6 +15,7 @@ */ package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper; +import java.io.Serializable; import java.sql.ResultSet; import java.sql.SQLException; import javax.annotation.Nullable; @@ -24,7 +25,7 @@ * * @param type of the value extracted. */ -public interface ResultSetValueExtractor { +public interface ResultSetValueExtractor extends Serializable { /** * Extract the requested field from the result set. diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java index fe93b58997..95b6c342db 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/ResultSetValueMapper.java @@ -15,6 +15,7 @@ */ package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper; +import java.io.Serializable; import java.sql.ResultSet; import java.sql.SQLException; import org.apache.avro.Schema; @@ -27,7 +28,7 @@ * * @param Type of the filed extracted from {@link ResultSet}. */ -public interface ResultSetValueMapper { +public interface ResultSetValueMapper extends Serializable { /** * Map the extracted value to an object accepted by {@link diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java index 4096af05dc..a977c0f97c 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/schema/SourceTableSchema.java @@ -66,7 +66,11 @@ public Schema getAvroPayload() { */ public static Builder builder() { - var builder = new AutoValue_SourceTableSchema.Builder(); + return builder(MapperType.MYSQL); + } + + public static Builder builder(MapperType mapperType) { + var builder = new AutoValue_SourceTableSchema.Builder().initialize(mapperType); builder.setTableSchemaUUID(UUID.randomUUID().toString()); return builder; } @@ -77,6 +81,8 @@ public abstract static class Builder { public abstract Builder setTableName(String value); + private UnifiedTypeMapper.MapperType mapperType; + abstract ImmutableMap.Builder sourceColumnNameToSourceColumnTypeBuilder(); @@ -88,7 +94,7 @@ public final Builder addSourceColumnNameToSourceColumnType( this.payloadFieldAssembler = this.payloadFieldAssembler .name(sourceColumnName) - .type(new UnifiedTypeMapper(MapperType.MYSQL).getSchema(sourceColumnType)) + .type(new UnifiedTypeMapper(this.mapperType).getSchema(sourceColumnType)) .noDefault(); return this; } @@ -119,6 +125,11 @@ public Builder() { abstract SourceTableSchema autoBuild(); + public Builder initialize(UnifiedTypeMapper.MapperType mapperType) { + this.mapperType = mapperType; + return this; + } + public SourceTableSchema build() { this.setAvroSchema(this.payloadFieldAssembler.endRecord().noDefault().endRecord()); SourceTableSchema sourceTableSchema = autoBuild(); diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java index 05e64228af..c365b55879 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/AccumulatingTableReader.java @@ -39,12 +39,12 @@ */ @AutoValue abstract class AccumulatingTableReader extends PTransform { - abstract ImmutableMap>> + public abstract ImmutableMap>> tableTransforms(); - abstract TupleTag sourceRowTag(); + public abstract TupleTag sourceRowTag(); - abstract TupleTag sourceTableReferenceTag(); + public abstract TupleTag sourceTableReferenceTag(); @Override public PCollectionTuple expand(PBegin input) { diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java index b00174fe8a..a02b700f0f 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/transform/ReaderTransform.java @@ -18,6 +18,7 @@ import com.google.auto.value.AutoValue; import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow; import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference; +import java.io.Serializable; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; @@ -56,12 +57,12 @@ * table separately in the `PCollectionTuple`. */ @AutoValue -public abstract class ReaderTransform { - abstract TupleTag sourceRowTag(); +public abstract class ReaderTransform implements Serializable { + public abstract TupleTag sourceRowTag(); - abstract TupleTag sourceTableReferenceTag(); + public abstract TupleTag sourceTableReferenceTag(); - abstract PTransform readTransform(); + public abstract PTransform readTransform(); public static Builder builder() { TupleTag sourceRowTupleTag = new TupleTag<>(); diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java index 6d171a493a..202f605af5 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/SourceDbToSpanner.java @@ -15,27 +15,44 @@ */ package com.google.cloud.teleport.v2.templates; +import com.google.cloud.spanner.BatchClient; +import com.google.cloud.spanner.BatchReadOnlyTransaction; +import com.google.cloud.spanner.DatabaseAdminClient; +import com.google.cloud.spanner.Dialect; import com.google.cloud.spanner.Mutation; +import com.google.cloud.spanner.TimestampBound; import com.google.cloud.teleport.metadata.Template; import com.google.cloud.teleport.metadata.TemplateCategory; import com.google.cloud.teleport.v2.common.UncaughtExceptionLogger; +import com.google.cloud.teleport.v2.options.OptionsToConfigBuilder; import com.google.cloud.teleport.v2.options.SourceDbToSpannerOptions; -import com.google.cloud.teleport.v2.source.DataSourceProvider; -import com.google.cloud.teleport.v2.spanner.ResultSetToMutation; +import com.google.cloud.teleport.v2.source.reader.ReaderImpl; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.JdbcIoWrapper; +import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchema; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference; +import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableSchema; +import com.google.cloud.teleport.v2.source.reader.io.transform.ReaderTransform; +import com.google.cloud.teleport.v2.spanner.ddl.Ddl; +import com.google.cloud.teleport.v2.spanner.ddl.InformationSchemaScanner; +import com.google.cloud.teleport.v2.spanner.migrations.schema.ISchemaMapper; +import com.google.cloud.teleport.v2.spanner.migrations.schema.IdentityMapper; +import com.google.cloud.teleport.v2.spanner.migrations.schema.SessionBasedMapper; +import com.google.cloud.teleport.v2.transformer.SourceRowToMutationDoFn; import com.google.common.annotations.VisibleForTesting; -import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; -import java.util.Set; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.io.gcp.spanner.SpannerAccessor; +import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig; import org.apache.beam.sdk.io.gcp.spanner.SpannerIO; import org.apache.beam.sdk.io.gcp.spanner.SpannerIO.Write; -import org.apache.beam.sdk.io.jdbc.JdbcIO; import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PCollectionTuple; /** * A template that copies data from a relational database using JDBC to an existing Spanner @@ -101,63 +118,70 @@ public static void main(String[] args) { @VisibleForTesting static PipelineResult run(SourceDbToSpannerOptions options) { Pipeline pipeline = Pipeline.create(options); - Map> columnsToIgnore = getColumnsToIgnore(options); - Map tableVsPartitionMap = getTablesVsPartitionColumn(options); - for (String table : getTablesVsPartitionColumn(options).keySet()) { - PCollection rows = - pipeline.apply( - "ReadPartitions_" + table, - getJdbcReader( - table, tableVsPartitionMap.get(table), columnsToIgnore.get(table), options)); - rows.apply("Write_" + table, getSpannerWrite(options)); - } + + ReaderImpl reader = + ReaderImpl.of( + JdbcIoWrapper.of( + OptionsToConfigBuilder.MySql.configWithMySqlDefaultsFromOptions(options))); + SourceSchema srcSchema = reader.getSourceSchema(); + ReaderTransform readerTransform = reader.getReaderTransform(); + + PCollectionTuple rowsAndTables = pipeline.apply("Read rows", readerTransform.readTransform()); + PCollection sourceRows = rowsAndTables.get(readerTransform.sourceRowTag()); + + SourceRowToMutationDoFn transformDoFn = + SourceRowToMutationDoFn.create(getSchemaMapper(options), getTableIDToRefMap(srcSchema)); + PCollection mutations = sourceRows.apply("Transform", ParDo.of(transformDoFn)); + mutations.apply("Write", getSpannerWrite(options)); + return pipeline.run(); } - private static Map getTablesVsPartitionColumn(SourceDbToSpannerOptions options) { - String[] tables = options.getTables().split(","); - String[] partitionColumns = options.getPartitionColumns().split(","); - if (tables.length != partitionColumns.length) { - throw new RuntimeException( - "invalid configuration. Partition column count does not match " + "tables count."); + private static ISchemaMapper getSchemaMapper(SourceDbToSpannerOptions options) { + SpannerConfig spannerConfig = + SpannerConfig.create() + .withProjectId(ValueProvider.StaticValueProvider.of(options.getProjectId())) + .withHost(ValueProvider.StaticValueProvider.of(options.getSpannerHost())) + .withInstanceId(ValueProvider.StaticValueProvider.of(options.getInstanceId())) + .withDatabaseId(ValueProvider.StaticValueProvider.of(options.getDatabaseId())); + Ddl ddl = getInformationSchemaAsDdl(spannerConfig); + ISchemaMapper schemaMapper = new IdentityMapper(ddl); + if (options.getSessionFilePath() != null && !options.getSessionFilePath().equals("")) { + schemaMapper = new SessionBasedMapper(options.getSessionFilePath(), ddl); } - Map tableVsPartitionColumn = new HashMap(); - for (int i = 0; i < tables.length; i++) { - tableVsPartitionColumn.put(tables[i], partitionColumns[i]); - } - return tableVsPartitionColumn; + return schemaMapper; } - private static Map> getColumnsToIgnore(SourceDbToSpannerOptions options) { - String ignoreStr = options.getIgnoreColumns(); - if (ignoreStr == null || ignoreStr.isEmpty()) { - return Collections.emptyMap(); - } - Map> ignore = new HashMap<>(); - for (String tableColumns : ignoreStr.split(",")) { - int tableNameIndex = tableColumns.indexOf(':'); - if (tableNameIndex == -1) { - continue; - } - String table = tableColumns.substring(0, tableNameIndex); - String columnStr = tableColumns.substring(tableNameIndex + 1); - Set columns = new HashSet<>(Arrays.asList(columnStr.split(";"))); - ignore.put(table, columns); - } - return ignore; + // TODO: SpannerInfoschema scanner code is duplicated across live, bulk and reverse replication + // templates. We should refactor everything to spanner-common. + private static Ddl getInformationSchemaAsDdl(SpannerConfig spannerConfig) { + SpannerAccessor spannerAccessor = SpannerAccessor.getOrCreate(spannerConfig); + DatabaseAdminClient databaseAdminClient = spannerAccessor.getDatabaseAdminClient(); + Dialect dialect = + databaseAdminClient + .getDatabase(spannerConfig.getInstanceId().get(), spannerConfig.getDatabaseId().get()) + .getDialect(); + BatchClient batchClient = spannerAccessor.getBatchClient(); + BatchReadOnlyTransaction context = + batchClient.batchReadOnlyTransaction(TimestampBound.strong()); + InformationSchemaScanner scanner = new InformationSchemaScanner(context, dialect); + Ddl ddl = scanner.scan(); + spannerAccessor.close(); + return ddl; } - private static JdbcIO.ReadWithPartitions getJdbcReader( - String table, - String partitionColumn, - Set columnsToIgnore, - SourceDbToSpannerOptions options) { - return JdbcIO.readWithPartitions() - .withDataSourceProviderFn(new DataSourceProvider(options)) - .withTable(table) - .withPartitionColumn(partitionColumn) - .withRowMapper(ResultSetToMutation.create(table, columnsToIgnore)) - .withNumPartitions(options.getNumPartitions()); + private static Map getTableIDToRefMap(SourceSchema srcSchema) { + Map tableIdMapper = new HashMap<>(); + for (SourceTableSchema srcTableSchema : srcSchema.tableSchemas()) { + tableIdMapper.put( + srcTableSchema.tableSchemaUUID(), + SourceTableReference.builder() + .setSourceSchemaReference(srcSchema.schemaReference()) + .setSourceTableName(srcTableSchema.tableName()) + .setSourceTableSchemaUUID(srcTableSchema.tableSchemaUUID()) + .build()); + } + return tableIdMapper; } private static Write getSpannerWrite(SourceDbToSpannerOptions options) { diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java index c45f396f63..1f9375c15c 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java @@ -51,6 +51,8 @@ public static SourceRowToMutationDoFn create( @ProcessElement public void processElement(ProcessContext c) { SourceRow sourceRow = c.element(); + LOG.debug("Starting transformation for Source Row {}", sourceRow); + if (!tableIdMapper().containsKey(sourceRow.tableSchemaUUID())) { // TODO: Remove LOG statements from processElement once counters and DLQ is supported. LOG.error( @@ -71,10 +73,7 @@ public void processElement(ProcessContext c) { c.output(mutation); } catch (Exception e) { // TODO: Add DLQ integration once supported. - LOG.error( - "Unable to transform source row to spanner mutation: {} {}", - e.getMessage(), - e.fillInStackTrace()); + LOG.error("Unable to transform source row to spanner mutation: {}", e.getMessage()); } } diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java new file mode 100644 index 0000000000..7aa4112705 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/options/OptionsToConfigBuilderTest.java @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.options; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config.JdbcIOWrapperConfig; +import com.google.common.collect.ImmutableList; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** Test class for {@link OptionsToConfigBuilder}. */ +@RunWith(MockitoJUnitRunner.class) +public class OptionsToConfigBuilderTest { + + @Test + public void testConfigWithMySqlDefaultsFromOptions() { + final String testdriverClassName = "org.apache.derby.jdbc.EmbeddedDriver"; + final String testHost = "localHost"; + final String testPort = "3306"; + final String testuser = "user"; + final String testpassword = "password"; + SourceDbToSpannerOptions sourceDbToSpannerOptions = + PipelineOptionsFactory.as(SourceDbToSpannerOptions.class); + sourceDbToSpannerOptions.setSourceHost(testHost); + sourceDbToSpannerOptions.setSourcePort(testPort); + sourceDbToSpannerOptions.setJdbcDriverClassName(testdriverClassName); + sourceDbToSpannerOptions.setSourceConnectionProperties( + "maxTotal=160;maxpoolsize=160;maxIdle=160;minIdle=160" + ";wait_timeout=57600"); + sourceDbToSpannerOptions.setFetchSize(50000); + sourceDbToSpannerOptions.setMaxConnections(150); + sourceDbToSpannerOptions.setNumPartitions(4000); + sourceDbToSpannerOptions.setUsername(testuser); + sourceDbToSpannerOptions.setPassword(testpassword); + sourceDbToSpannerOptions.setReconnectsEnabled(true); + sourceDbToSpannerOptions.setReconnectAttempts(10); + sourceDbToSpannerOptions.setSourceDB("testDB"); + sourceDbToSpannerOptions.setTables("table1,table2"); + sourceDbToSpannerOptions.setPartitionColumns("col1,col2"); + JdbcIOWrapperConfig config = + OptionsToConfigBuilder.MySql.configWithMySqlDefaultsFromOptions(sourceDbToSpannerOptions); + assertThat(config.autoReconnect()).isTrue(); + assertThat(config.jdbcDriverClassName()).isEqualTo(testdriverClassName); + assertThat(config.sourceHost()).isEqualTo(testHost); + assertThat(config.sourcePort()).isEqualTo(testPort); + assertThat( + ImmutableList.of( + config.tableConfigs().get(0).tableName(), config.tableConfigs().get(1).tableName())) + .containsExactlyElementsIn(ImmutableList.of("table1", "table2")); + assertThat(config.dbAuth().getUserName().get()).isEqualTo(testuser); + assertThat(config.dbAuth().getPassword().get()).isEqualTo(testpassword); + } +} diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java deleted file mode 100644 index 10bbad040c..0000000000 --- a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/DataSourceProviderTest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2024 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.v2.source; - -import com.google.cloud.teleport.v2.options.SourceDbToSpannerOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.junit.MockitoJUnitRunner; - -/** Test class for {@link DataSourceProvider}. */ -@RunWith(MockitoJUnitRunner.class) -public class DataSourceProviderTest { - - @BeforeClass - public static void beforeClass() throws Exception { - // by default, derby uses a lock timeout of 60 seconds. In order to speed up the test - // and detect the lock faster, we decrease this timeout - System.setProperty("derby.locks.waitTimeout", "2"); - System.setProperty("derby.stream.error.file", "build/derby.log"); - } - - @Before - @Test - public void testDataSourceProvider() { - SourceDbToSpannerOptions sourceDbToSpannerOptions = - PipelineOptionsFactory.as(SourceDbToSpannerOptions.class); - sourceDbToSpannerOptions.setJdbcDriverClassName("org.apache.derby.jdbc.EmbeddedDriver"); - sourceDbToSpannerOptions.setSourceConnectionURL("jdbc:derby:memory:testDB;create=true"); - sourceDbToSpannerOptions.setSourceConnectionProperties(""); - var firstSource = new DataSourceProvider(sourceDbToSpannerOptions).apply(null); - var secondSource = new DataSourceProvider(sourceDbToSpannerOptions).apply(null); - // To verify singleton behavior, check that the references are equal. - assert (firstSource == secondSource); - } -} diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java new file mode 100644 index 0000000000..314a6d828b --- /dev/null +++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/GuardedStringValueProviderTest.java @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.auth.dbauth; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** Test class for {@link GuardedStringValueProvider}. */ +@RunWith(MockitoJUnitRunner.class) +public class GuardedStringValueProviderTest { + @Test + public void testGuardedStringValueProvider() { + final String testPassword = "testPassword"; + GuardedStringValueProvider provider = GuardedStringValueProvider.create(testPassword); + assertThat(provider.isAccessible()).isTrue(); + assertThat(provider.get()).isEqualTo(testPassword); + } +} diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java new file mode 100644 index 0000000000..5f43addb94 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/auth/dbauth/LocalCredentialsProviderTest.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.auth.dbauth; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** Test class for {@link GuardedStringValueProvider}. */ +@RunWith(MockitoJUnitRunner.class) +public class LocalCredentialsProviderTest { + @Test + public void testLocalCredentialsProvider() { + final String testUserName = "testUserName"; + final String testPassword = "testPassword"; + LocalCredentialsProvider provider = + LocalCredentialsProvider.builder() + .setUserName(testUserName) + .setPassword(testPassword) + .build(); + assertThat(provider.getUserName().get()).isEqualTo(testUserName); + assertThat(provider.getPassword().get()).isEqualTo(testPassword); + } +} diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java index 8fcf0057b7..4d79dbb2e3 100644 --- a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java +++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapterTest.java @@ -180,7 +180,7 @@ public void getSchemaDiscoveryQuery() { MysqlDialectAdapter.getSchemaDiscoveryQuery( SourceSchemaReference.builder().setDbName("testDB").build())) .isEqualTo( - "SELECT COLUMN_NAME,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = testDB AND TABLE_NAME = ?"); + "SELECT COLUMN_NAME,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = 'testDB' AND TABLE_NAME = ?"); } private static ResultSet getMockInfoSchemaRs() throws SQLException { diff --git a/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java new file mode 100644 index 0000000000..d50e9fd314 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/config/TableConfigTest.java @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.source.reader.io.jdbc.iowrapper.config; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import com.google.common.collect.ImmutableList; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** Test class for {@link TableConfig}. */ +@RunWith(MockitoJUnitRunner.class) +public class TableConfigTest { + @Test + public void testTableConfigBuildsWithDefaults() { + final String testTable = "testTable"; + final String partitionColumn = "col_1"; + + TableConfig tableConfig = + TableConfig.builder(testTable).withPartitionColum(partitionColumn).build(); + assertThat(tableConfig.tableName()).isEqualTo(testTable); + assertThat(tableConfig.maxFetchSize()).isNull(); + assertThat(tableConfig.maxPartitions()).isNull(); + assertThat(tableConfig.partitionColumns()).isEqualTo(ImmutableList.of(partitionColumn)); + } + + @Test + public void testTableConfigBuilds() { + final String testTable = "testTable"; + final String partitionColumn = "col_1"; + final int maxFetchSize = 100; + final int maxPartitions = 100; + + TableConfig tableConfig = + TableConfig.builder(testTable) + .withPartitionColum(partitionColumn) + .setMaxFetchSize(maxFetchSize) + .setMaxPartitions(maxPartitions) + .build(); + assertThat(tableConfig.tableName()).isEqualTo(testTable); + assertThat(tableConfig.maxFetchSize()).isEqualTo(maxFetchSize); + assertThat(tableConfig.maxPartitions()).isEqualTo(maxPartitions); + assertThat(tableConfig.partitionColumns()).isEqualTo(ImmutableList.of(partitionColumn)); + } + + @Test + public void testTableConfigPreconditions() { + final String testTable = "testTable"; + + assertThrows(IllegalStateException.class, () -> TableConfig.builder(testTable).build()); + assertThrows( + IllegalStateException.class, + () -> + TableConfig.builder(testTable) + .withPartitionColum("col_1") + .withPartitionColum("col_2") + .build()); + } +} diff --git a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java index 3ec2b54e00..9822f23276 100644 --- a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java +++ b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java @@ -59,7 +59,7 @@ public GenericRecordTypeConvertor(ISchemaMapper schemaMapper, String namespace) /** * This method takes in a generic record and returns a map between the Spanner column name and the * corresponding Spanner column value. This handles the data conversion logic from a GenericRecord - * field to a spanner Value. + * field to a Map of Spanner column name to spanner Value. */ public Map transformChangeEvent(GenericRecord record, String srcTableName) { Map result = new HashMap<>(); @@ -70,31 +70,80 @@ public Map transformChangeEvent(GenericRecord record, String srcT * TODO: Handle columns that will not exist at source - synth id - shard id - multi-column * transformations - auto-gen keys - Default columns - generated columns */ - String srcColName = - schemaMapper.getSourceColumnName(namespace, spannerTableName, spannerColName); - Type spannerColumnType = - schemaMapper.getSpannerColumnType(namespace, spannerTableName, spannerColName); - Value value = - getSpannerValue( - record.get(srcColName), - record.getSchema().getField(srcColName).schema(), - srcColName, - spannerColumnType); - result.put(spannerColName, value); + try { + String srcColName = + schemaMapper.getSourceColumnName(namespace, spannerTableName, spannerColName); + Type spannerColumnType = + schemaMapper.getSpannerColumnType(namespace, spannerTableName, spannerColName); + LOG.debug( + "Transformer processing srcCol: {} spannerColumnType:{}", + srcColName, + spannerColumnType); + + Value value = + getSpannerValue( + record.get(srcColName), + record.getSchema().getField(srcColName).schema(), + srcColName, + spannerColumnType); + result.put(spannerColName, value); + } catch (NullPointerException e) { + LOG.info("Unable to transform change event: {}", e.getMessage()); + throw e; + } catch (IllegalArgumentException e) { + LOG.info("Unable to transform change event: {}", e.getMessage()); + throw e; + } catch (Exception e) { + LOG.info( + "Unable to convert spanner value for spanner col: {}. {}", + spannerColName, + e.getMessage()); + throw new RuntimeException( + String.format( + "Unable to convert spanner value for spanner col: {}. {}", + spannerColName, + e.getMessage())); + } } return result; } /** Extract the field value from Generic Record and try to convert it to @spannerType. */ - Value getSpannerValue( + public Value getSpannerValue( Object recordValue, Schema fieldSchema, String recordColName, Type spannerType) { // Logical and record types should be converted to string. + LOG.debug( + "gettingSpannerValue for recordValue: {}, fieldSchema: {}, recordColName: {}, spannerType: {}", + recordColName, + recordValue, + fieldSchema, + spannerType); + if (fieldSchema.getType().equals(Schema.Type.UNION)) { + List types = fieldSchema.getTypes(); + LOG.debug("found union type: {}", types); + // Schema types can only union with Type NULL. Any other UNION is unsupported. + if (types.size() == 2 && types.stream().anyMatch(s -> s.getType().equals(Schema.Type.NULL))) { + if (recordValue == null) { + return null; + } + fieldSchema = + types.stream().filter(s -> !s.getType().equals(Schema.Type.NULL)).findFirst().get(); + } else { + throw new IllegalArgumentException( + String.format( + "Unknown schema field type {} for field {} with value {}.", + fieldSchema, + recordColName, + recordValue)); + } + } if (fieldSchema.getLogicalType() != null) { recordValue = handleLogicalFieldType(recordColName, recordValue, fieldSchema); } else if (fieldSchema.getType().equals(Schema.Type.RECORD)) { // Get the avro field of type record from the whole record. recordValue = handleRecordFieldType(recordColName, (GenericRecord) recordValue, fieldSchema); } + LOG.debug("Updated record value is {} for recordColName {}", recordValue, recordColName); Dialect dialect = schemaMapper.getDialect(); if (dialect == null) { throw new NullPointerException("schemaMapper returned null spanner dialect."); @@ -121,6 +170,7 @@ static class CustomAvroTypes { /** Avro logical types are converted to an equivalent string type. */ static String handleLogicalFieldType(String fieldName, Object recordValue, Schema fieldSchema) { + LOG.debug("found logical type for col {} with schema {}", fieldName, fieldSchema); if (recordValue == null) { return null; } @@ -152,14 +202,17 @@ static String handleLogicalFieldType(String fieldName, Object recordValue, Schem } else if (fieldSchema.getLogicalType() instanceof LogicalTypes.TimestampMillis) { Instant timestamp = Instant.ofEpochMilli(Long.valueOf(recordValue.toString())); return timestamp.atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); - } // TODO: add support for custom logical types VARCHAR, JSON and NUMBER once format is - // finalised. - else { - LOG.error( - "Unknown field type {} for field {} in {}. Ignoring it.", - fieldSchema, - fieldName, - recordValue); + } else if (fieldSchema.getLogicalType() != null + && fieldSchema.getLogicalType().getName().equals(CustomAvroTypes.JSON)) { + return recordValue.toString(); + } else if (fieldSchema.getLogicalType() != null + && fieldSchema.getLogicalType().getName().equals(CustomAvroTypes.NUMBER)) { + return recordValue.toString(); + } else if (fieldSchema.getLogicalType() != null + && fieldSchema.getLogicalType().getName().equals(CustomAvroTypes.VARCHAR)) { + return recordValue.toString(); + } else { + LOG.error("Unknown field type {} for field {} in {}.", fieldSchema, fieldName, recordValue); throw new UnsupportedOperationException( String.format( "Unknown field type %s for field %s in %s.", fieldSchema, fieldName, recordValue)); @@ -168,6 +221,7 @@ static String handleLogicalFieldType(String fieldName, Object recordValue, Schem /** Record field types are converted to an equivalent string type. */ static String handleRecordFieldType(String fieldName, GenericRecord element, Schema fieldSchema) { + LOG.debug("found record type for col {} with schema: {}", fieldName, fieldSchema); if (element == null) { return null; } diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java index 8941c77598..2b713445dd 100644 --- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java +++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java @@ -31,13 +31,12 @@ import com.google.cloud.teleport.v2.spanner.migrations.schema.IdentityMapper; import com.google.cloud.teleport.v2.spanner.type.Type; import com.google.cloud.teleport.v2.utils.SchemaUtils; -import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; -import java.nio.file.Files; -import java.nio.file.Paths; import java.util.List; import java.util.Map; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.avro.generic.GenericData; @@ -47,11 +46,105 @@ public class GenericRecordTypeConvertorTest { + public Schema getLogicalTypesSchema() { + // Create schema types with LogicalTypes + Schema dateType = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); + Schema decimalType = LogicalTypes.decimal(4, 2).addToSchema(Schema.create(Schema.Type.BYTES)); + Schema timeMicrosType = LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG)); + Schema timeMillisType = LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT)); + Schema timestampMicrosType = + LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); + Schema timestampMillisType = + LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG)); + Schema jsonType = + new LogicalType(GenericRecordTypeConvertor.CustomAvroTypes.JSON) + .addToSchema(SchemaBuilder.builder().stringType()); + Schema numberType = + new LogicalType(GenericRecordTypeConvertor.CustomAvroTypes.NUMBER) + .addToSchema(SchemaBuilder.builder().stringType()); + Schema varcharType = + new LogicalType(GenericRecordTypeConvertor.CustomAvroTypes.VARCHAR) + .addToSchema(SchemaBuilder.builder().stringType()); + + // Build the schema using the created types + return SchemaBuilder.record("logicalTypes") + .namespace("com.test.schema") + .fields() + .name("date_col") + .type(dateType) + .noDefault() + .name("decimal_col") + .type(decimalType) + .noDefault() + .name("time_micros_col") + .type(timeMicrosType) + .noDefault() + .name("time_millis_col") + .type(timeMillisType) + .noDefault() + .name("timestamp_micros_col") + .type(timestampMicrosType) + .noDefault() + .name("timestamp_millis_col") + .type(timestampMillisType) + .noDefault() + .name("json_col") + .type(jsonType) + .noDefault() + .name("number_col") + .type(numberType) + .noDefault() + .name("varchar_col") + .type(varcharType) + .noDefault() + .endRecord(); + } + + public Schema unionNullType(Schema schema) { + return SchemaBuilder.builder().unionOf().nullType().and().type(schema).endUnion(); + } + + public Schema getAllSpannerTypesSchema() { + Schema decimalType = + unionNullType(LogicalTypes.decimal(5, 2).addToSchema(Schema.create(Schema.Type.BYTES))); + Schema dateType = + unionNullType(LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT))); + Schema timestampType = + unionNullType(LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG))); + return SchemaBuilder.record("all_types") + .namespace("com.test.schema") + .fields() + .name("bool_col") + .type(unionNullType(Schema.create(Schema.Type.BOOLEAN))) + .noDefault() + .name("int_col") + .type(unionNullType(Schema.create(Schema.Type.LONG))) + .noDefault() + .name("float_col") + .type(unionNullType(Schema.create(Schema.Type.DOUBLE))) + .noDefault() + .name("string_col") + .type(unionNullType(Schema.create(Schema.Type.STRING))) + .noDefault() + .name("numeric_col") + .type(decimalType) + .noDefault() + .name("bytes_col") + .type(unionNullType(Schema.create(Schema.Type.BYTES))) + .noDefault() + .name("timestamp_col") + .type(timestampType) + .noDefault() + .name("date_col") + .type(dateType) + .noDefault() + .endRecord(); + } + @Test - public void testHandleLogicalFieldType() throws IOException { - Schema avroSchema = - SchemaUtils.parseAvroSchema( - Files.readString(Paths.get("src/test/resources/avro/logical-types-schema.avsc"))); + public void testHandleLogicalFieldType() { + Schema avroSchema = getLogicalTypesSchema(); + GenericRecord genericRecord = new GenericData.Record(avroSchema); genericRecord.put("date_col", 738991); genericRecord.put( @@ -60,6 +153,9 @@ public void testHandleLogicalFieldType() throws IOException { genericRecord.put("time_millis_col", 48035000); genericRecord.put("timestamp_micros_col", 1602599400056483L); genericRecord.put("timestamp_millis_col", 1602599400056L); + genericRecord.put("json_col", "{\"k1\":\"v1\"}"); + genericRecord.put("number_col", "289452"); + genericRecord.put("varchar_col", "Hellogcds"); String col = "date_col"; String result = @@ -96,6 +192,24 @@ public void testHandleLogicalFieldType() throws IOException { GenericRecordTypeConvertor.handleLogicalFieldType( col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema()); assertEquals("Test timestamp_millis_col conversion: ", "2020-10-13T14:30:00.056Z", result); + + col = "json_col"; + result = + GenericRecordTypeConvertor.handleLogicalFieldType( + col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema()); + assertEquals("Test json_col conversion: ", "{\"k1\":\"v1\"}", result); + + col = "number_col"; + result = + GenericRecordTypeConvertor.handleLogicalFieldType( + col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema()); + assertEquals("Test number_col conversion: ", "289452", result); + + col = "varchar_col"; + result = + GenericRecordTypeConvertor.handleLogicalFieldType( + col, genericRecord.get(col), genericRecord.getSchema().getField(col).schema()); + assertEquals("Test varchar_col conversion: ", "Hellogcds", result); } @Test @@ -235,11 +349,8 @@ static Ddl getIdentityDdl() { } @Test - public void transformChangeEventTest_identityMapper() throws IOException { - GenericRecord genericRecord = - new GenericData.Record( - SchemaUtils.parseAvroSchema( - Files.readString(Paths.get("src/test/resources/avro/all-spanner-types.avsc")))); + public void transformChangeEventTest_identityMapper() { + GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema()); genericRecord.put("bool_col", true); genericRecord.put("int_col", 10); genericRecord.put("float_col", 10.34); @@ -247,8 +358,7 @@ public void transformChangeEventTest_identityMapper() throws IOException { genericRecord.put( "numeric_col", ByteBuffer.wrap(new BigDecimal("12.34").unscaledValue().toByteArray())); genericRecord.put("bytes_col", new byte[] {10, 20, 30}); - genericRecord.put( - "timestamp_col", AvroTestingHelper.createTimestampTzRecord(1602599400056483L, 3600000)); + genericRecord.put("timestamp_col", 1602599400056483L); genericRecord.put("date_col", 738991); GenericRecordTypeConvertor genericRecordTypeConvertor = new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), ""); @@ -268,8 +378,41 @@ public void transformChangeEventTest_identityMapper() throws IOException { assertEquals(expected, actual); } + @Test + public void transformChangeEventTest_illegalUnionType() { + GenericRecordTypeConvertor genericRecordTypeConvertor = + new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), ""); + Schema schema = + SchemaBuilder.builder() + .unionOf() + .nullType() + .and() + .type(Schema.create(Schema.Type.BOOLEAN)) + .and() + .type(Schema.create(Schema.Type.STRING)) + .endUnion(); + assertThrows( + IllegalArgumentException.class, + () -> genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string())); + } + + @Test + public void transformChangeEventTest_nullType() { + GenericRecordTypeConvertor genericRecordTypeConvertor = + new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), ""); + Schema schema = + SchemaBuilder.builder() + .unionOf() + .nullType() + .and() + .type(Schema.create(Schema.Type.BOOLEAN)) + .endUnion(); + assertNull( + genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string())); + } + @Test(expected = IllegalArgumentException.class) - public void transformChangeEventTest_incorrectSpannerType() throws IOException { + public void transformChangeEventTest_incorrectSpannerType() { ISchemaMapper mockSchemaMapper = mock(ISchemaMapper.class); when(mockSchemaMapper.getDialect()).thenReturn(Dialect.GOOGLE_STANDARD_SQL); @@ -281,10 +424,7 @@ public void transformChangeEventTest_incorrectSpannerType() throws IOException { when(mockSchemaMapper.getSpannerColumnType(anyString(), anyString(), anyString())) .thenReturn(Type.array(Type.bool())); - GenericRecord genericRecord = - new GenericData.Record( - SchemaUtils.parseAvroSchema( - Files.readString(Paths.get("src/test/resources/avro/all-spanner-types.avsc")))); + GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema()); genericRecord.put("bool_col", true); GenericRecordTypeConvertor genericRecordTypeConvertor = new GenericRecordTypeConvertor(mockSchemaMapper, ""); @@ -293,7 +433,7 @@ public void transformChangeEventTest_incorrectSpannerType() throws IOException { } @Test - public void transformChangeEventTest_nullDialect() throws IOException { + public void transformChangeEventTest_nullDialect() { ISchemaMapper mockSchemaMapper = mock(ISchemaMapper.class); when(mockSchemaMapper.getDialect()).thenReturn(null); when(mockSchemaMapper.getSpannerTableName(anyString(), anyString())).thenReturn("test"); @@ -304,10 +444,7 @@ public void transformChangeEventTest_nullDialect() throws IOException { when(mockSchemaMapper.getSpannerColumnType(anyString(), anyString(), anyString())) .thenReturn(Type.array(Type.bool())); - GenericRecord genericRecord = - new GenericData.Record( - SchemaUtils.parseAvroSchema( - Files.readString(Paths.get("src/test/resources/avro/all-spanner-types.avsc")))); + GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema()); genericRecord.put("bool_col", true); GenericRecordTypeConvertor genericRecordTypeConvertor = new GenericRecordTypeConvertor(mockSchemaMapper, ""); @@ -318,4 +455,22 @@ public void transformChangeEventTest_nullDialect() throws IOException { // Verify that the mock method was called. Mockito.verify(mockSchemaMapper).getDialect(); } + + @Test + public void transformChangeEventTest_catchAllException() { + ISchemaMapper mockSchemaMapper = mock(ISchemaMapper.class); + when(mockSchemaMapper.getSpannerTableName(anyString(), anyString())).thenReturn("test"); + when(mockSchemaMapper.getSpannerColumns(anyString(), anyString())) + .thenReturn(List.of("bool_col")); + when(mockSchemaMapper.getSourceColumnName(anyString(), anyString(), anyString())) + .thenThrow(new RuntimeException()); + + GenericRecordTypeConvertor genericRecordTypeConvertor = + new GenericRecordTypeConvertor(mockSchemaMapper, ""); + assertThrows( + RuntimeException.class, + () -> genericRecordTypeConvertor.transformChangeEvent(null, "all_types")); + // Verify that the mock method was called. + Mockito.verify(mockSchemaMapper).getSourceColumnName(anyString(), anyString(), anyString()); + } } diff --git a/v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc b/v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc deleted file mode 100644 index 707e2b20cf..0000000000 --- a/v2/spanner-common/src/test/resources/avro/all-spanner-types.avsc +++ /dev/null @@ -1,62 +0,0 @@ -{ - "type": "record", - "name": "all_types", - "namespace": "com.test.schema", - "fields": [ - { - "name": "bool_col", - "type": "boolean" - }, - { - "name": "int_col", - "type": "long" - }, - { - "name": "float_col", - "type": "double" - }, - { - "name": "string_col", - "type": "string" - }, - { - "name": "numeric_col", - "type": { - "type": "bytes", - "logicalType": "decimal", - "precision": 5, - "scale": 2 - } - }, - { - "name": "bytes_col", - "type": "bytes" - }, - { - "name": "timestamp_col", - "type": { - "type": "record", - "name": "timestampTz", - "fields": [ - { - "name": "timestamp", - "type": "long", - "logicalType": "timestamp-micros" - }, - { - "name": "offset", - "type": "int", - "logicalType": "time-millis" - } - ] - } - }, - { - "name": "date_col", - "type": { - "type": "int", - "logicalType": "date" - } - } - ] -} diff --git a/v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc b/v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc deleted file mode 100644 index b096de5063..0000000000 --- a/v2/spanner-common/src/test/resources/avro/logical-types-schema.avsc +++ /dev/null @@ -1,51 +0,0 @@ -{ - "type": "record", - "name": "logicalTypes", - "namespace": "com.test.schema", - "fields": [ - { - "name": "date_col", - "type": { - "type": "int", - "logicalType": "date" - } - }, - { - "name": "decimal_col", - "type": { - "type": "bytes", - "logicalType": "decimal", - "precision": 4, - "scale": 2 - } - }, - { - "name": "time_micros_col", - "type": { - "type": "long", - "logicalType": "time-micros" - } - }, - { - "name": "time_millis_col", - "type": { - "type": "int", - "logicalType": "time-millis" - } - }, - { - "name": "timestamp_micros_col", - "type": { - "type": "long", - "logicalType": "timestamp-micros" - } - }, - { - "name": "timestamp_millis_col", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - } - ] -} From 5c92cc829a8b91fb112e3edfb2aed896cffd8611 Mon Sep 17 00:00:00 2001 From: Nick Anikin <52892974+an2x@users.noreply.github.com> Date: Fri, 10 May 2024 10:30:13 -0700 Subject: [PATCH 18/70] Fix typo. Co-authored-by: Anand Inguva <34158215+AnandInguva@users.noreply.github.com> --- .../java/com/google/cloud/teleport/plugin/sample/AtoBOk.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java index f0ea357288..48d3d56cae 100644 --- a/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java +++ b/plugins/core-plugin/src/test/java/com/google/cloud/teleport/plugin/sample/AtoBOk.java @@ -110,7 +110,7 @@ public interface AtoBOptions { @TemplateParameter.KafkaTopic( order = 10, description = "Kafka input topic", - helpText = "Kafka topic to trad from", + helpText = "Kafka topic to read from", example = "projects/project-foo/locations/us-central1/clusters/cluster-bar/topics/topic-baz") String getInputKafkaTopic(); From 8b4eecb7f7fb33f2b1f0bfa6d3edb41b2e4b3314 Mon Sep 17 00:00:00 2001 From: Akshara Uke Date: Mon, 13 May 2024 04:57:15 +0000 Subject: [PATCH 19/70] optimized the processing for lean Spanner write intervals --- .../handler/GCSToSourceStreamingHandler.java | 38 ++++-- .../transforms/GcsToSourceStreamer.java | 18 +-- .../v2/templates/utils/GCSReader.java | 111 +++++++++--------- .../v2/templates/utils/SpannerDao.java | 4 +- 4 files changed, 95 insertions(+), 76 deletions(-) diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java index c1e926489f..7a133de03f 100644 --- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java +++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/processing/handler/GCSToSourceStreamingHandler.java @@ -36,9 +36,10 @@ public class GCSToSourceStreamingHandler { private static final Logger LOG = LoggerFactory.getLogger(GCSToSourceStreamingHandler.class); - public static void process(ProcessingContext taskContext, SpannerDao spannerDao) { + public static String process(ProcessingContext taskContext, SpannerDao spannerDao) { String shardId = taskContext.getShard().getLogicalShardId(); GCSReader inputFileReader = new GCSReader(taskContext, spannerDao); + String fileProcessedStartInterval = taskContext.getStartTimestamp(); try { Instant readStartTime = Instant.now(); @@ -52,9 +53,11 @@ public static void process(ProcessingContext taskContext, SpannerDao spannerDao) + " records from the buffer in " + ChronoUnit.MILLIS.between(readStartTime, readEndTime) + " milliseconds"); + // This may have changed in case the interval did not have data + fileProcessedStartInterval = inputFileReader.getCurrentIntervalStart(); if (records.isEmpty()) { - markShardSuccess(taskContext, spannerDao); - return; + markShardSuccess(taskContext, spannerDao, fileProcessedStartInterval); + return fileProcessedStartInterval; } String connectString = @@ -74,27 +77,36 @@ public static void process(ProcessingContext taskContext, SpannerDao spannerDao) InputRecordProcessor.processRecords( records, taskContext.getSchema(), dao, shardId, taskContext.getSourceDbTimezoneOffset()); - markShardSuccess(taskContext, spannerDao); + markShardSuccess(taskContext, spannerDao, fileProcessedStartInterval); dao.cleanup(); LOG.info( "Shard " + shardId + ": Successfully processed batch of " + records.size() + " records."); } catch (Exception e) { Metrics.counter(GCSToSourceStreamingHandler.class, "shard_failed_" + shardId).inc(); - markShardFailure(taskContext, spannerDao); + markShardFailure(taskContext, spannerDao, fileProcessedStartInterval); throw new RuntimeException("Failure when processing records", e); } + return fileProcessedStartInterval; } - private static void markShardSuccess(ProcessingContext taskContext, SpannerDao spannerDao) { - markShardProgress(taskContext, Constants.SHARD_PROGRESS_STATUS_SUCCESS, spannerDao); + private static void markShardSuccess( + ProcessingContext taskContext, SpannerDao spannerDao, String fileProcessedStartInterval) { + markShardProgress( + taskContext, + Constants.SHARD_PROGRESS_STATUS_SUCCESS, + spannerDao, + fileProcessedStartInterval); } private static void markShardProgress( - ProcessingContext taskContext, String status, SpannerDao spannerDao) { + ProcessingContext taskContext, + String status, + SpannerDao spannerDao, + String fileProcessedStartInterval) { ShardProgressTracker shardProgressTracker = new ShardProgressTracker(spannerDao, taskContext.getRunId()); - String fileStartTime = taskContext.getStartTimestamp(); - com.google.cloud.Timestamp startTs = com.google.cloud.Timestamp.parseTimestamp(fileStartTime); + com.google.cloud.Timestamp startTs = null; + startTs = com.google.cloud.Timestamp.parseTimestamp(fileProcessedStartInterval); ShardProgress shardProgress = new ShardProgress(taskContext.getShard().getLogicalShardId(), startTs, status); @@ -102,7 +114,9 @@ private static void markShardProgress( shardProgressTracker.writeShardProgress(shardProgress); } - private static void markShardFailure(ProcessingContext taskContext, SpannerDao spannerDao) { - markShardProgress(taskContext, Constants.SHARD_PROGRESS_STATUS_ERROR, spannerDao); + private static void markShardFailure( + ProcessingContext taskContext, SpannerDao spannerDao, String fileProcessedStartInterval) { + markShardProgress( + taskContext, Constants.SHARD_PROGRESS_STATUS_ERROR, spannerDao, fileProcessedStartInterval); } } diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java index 958ac743eb..4104712699 100644 --- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java +++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/transforms/GcsToSourceStreamer.java @@ -132,18 +132,18 @@ public void processElement( } String shardId = keyString.read(); + + String storedStartTime = startString.read(); + if (storedStartTime == null) { + startString.write(element.getValue().getStartTimestamp()); + } + // Set timer if not already running. if (shardId == null) { - + keyString.write(element.getKey()); Instant outputTimestamp = Instant.now().plus(Duration.millis(incrementIntervalInMilliSeconds)); timer.set(outputTimestamp); - keyString.write(element.getKey()); - } - - String storedStartTime = startString.read(); - if (storedStartTime == null) { - startString.write(element.getValue().getStartTimestamp()); } num_shards.inc(); } @@ -169,10 +169,10 @@ public void onExpiry( try { taskContext.setStartTimestamp(startString.read()); - GCSToSourceStreamingHandler.process(taskContext, spannerDao); + String processedStartTs = GCSToSourceStreamingHandler.process(taskContext, spannerDao); Instant nextTimer = Instant.now().plus(Duration.millis(incrementIntervalInMilliSeconds)); com.google.cloud.Timestamp startTs = - com.google.cloud.Timestamp.parseTimestamp(startString.read()); + com.google.cloud.Timestamp.parseTimestamp(processedStartTs); Instant startInst = new Instant(startTs.toSqlTimestamp()); Instant endInst = startInst.plus(taskContext.getWindowDuration()); startString.write(endInst.toString()); diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java index 8af883fdbe..2f7cce1f25 100644 --- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java +++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/utils/GCSReader.java @@ -33,6 +33,7 @@ import java.util.List; import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.metrics.Metrics; +import org.joda.time.Duration; import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,9 +45,9 @@ public class GCSReader { private ShardFileCreationTracker shardFileCreationTracker; private Instant currentIntervalEnd; private String shardId; - private boolean shouldRetryWhenFileNotFound; - private boolean shouldFailWhenFileNotFound; - private boolean queriedDataSeenTable; + private Duration windowDuration; + private String gcsPath; + private Instant currentIntervalStart; private static final Logger LOG = LoggerFactory.getLogger(GCSReader.class); @@ -54,14 +55,14 @@ public GCSReader(ProcessingContext taskContext, SpannerDao spannerDao) { String fileStartTime = taskContext.getStartTimestamp(); com.google.cloud.Timestamp startTs = com.google.cloud.Timestamp.parseTimestamp(fileStartTime); - Instant startInst = new Instant(startTs.toSqlTimestamp()); - currentIntervalEnd = startInst.plus(taskContext.getWindowDuration()); + currentIntervalStart = new Instant(startTs.toSqlTimestamp()); + currentIntervalEnd = currentIntervalStart.plus(taskContext.getWindowDuration()); String gcsFileName = taskContext.getGCSPath() + "/" + taskContext.getShard().getLogicalShardId() + "/" - + startInst + + currentIntervalStart + "-" + currentIntervalEnd + "-pane-0-last-0-of-1.txt"; @@ -71,9 +72,8 @@ public GCSReader(ProcessingContext taskContext, SpannerDao spannerDao) { new ShardFileCreationTracker( spannerDao, taskContext.getShard().getLogicalShardId(), taskContext.getRunId()); this.shardId = taskContext.getShard().getLogicalShardId(); - shouldRetryWhenFileNotFound = true; - shouldFailWhenFileNotFound = false; - queriedDataSeenTable = false; + this.windowDuration = taskContext.getWindowDuration(); + this.gcsPath = taskContext.getGCSPath(); } public List getRecords() { @@ -111,33 +111,8 @@ public List getRecords() { } catch (com.fasterxml.jackson.core.JsonProcessingException ex) { throw new RuntimeException("Failed in processing the record ", ex); } catch (IOException e) { - LOG.warn("File not found : " + fileName); - if (shouldRetryWhenFileNotFound) { - if (!queriedDataSeenTable) { - return checkAndReturnIfFileExists(); - } else { - /* We do not need to call checkAndReturnIfFileExists again as it was called already - as this will lead to stack overflow when the time taken to write file to GCS is large. - GCS writing can take arbitrarty time in unforeseen scenario like Dataflow worker restart. - So we just try to read the file in the same function call until found.*/ - return waitTillFileCreatedAndReturn(); - } - } else { - if (shouldFailWhenFileNotFound) { - Metrics.counter(GCSReader.class, "file_not_found_errors_" + shardId).inc(); - throw new RuntimeException("File " + fileName + " expected but not found : " + e); - } - /* The logic for writing to skipped file table can generate load on the metadata database - when the first file from the reader template comes very late. - In this case, a lot of file intervals will be skipped since no file exists. - This causes DEADLINE_EXCEEDED and hence can negatively harm the progress of - both the pipelines as the metadata database is shared. - Hence the code to store the file intervals skipped is removed - and only warnings are logged. Since it was only for audit purpose anyway.*/ - LOG.warn("File not found : " + fileName + " skipping the file"); - } - + return checkAndReturnIfFileExists(); } catch (Exception e) { throw new RuntimeException("Failed in GcsReader ", e); } @@ -154,15 +129,13 @@ public List getRecords() { * we check the shard_file_create_progress table until the created_upto value is greater than or * equal to the current window. * - *

If the created_upto is equal to current window - then it's indication that file for current - * window is written and should exist in GCS. So we lookup the file again and fail if the file is - * not found. - * - *

If the created_upto is greater than current window, we need to know if there was any data in - * Spanner for the window we are checking. For this we query the date_seen table. If data was seen - * for the current window, then file should exist in GCS and we lookup the file indefinitely until - * is it found. If, however, there was no data for the current window in data_seen, then it means - * file for the current interval is not there in GCS. We just simply skip the file. + *

If the created_upto is greater than or equal to thecurrent window, we need to know if there + * was any data in Spanner for the window we are checking. For this we query the date_seen table. + * If data was seen for the current window, then file should exist in GCS and we lookup the file + * indefinitely until is it found. If, however, there was no data for the current window in + * data_seen, then it means file for the current interval is not there in GCS. We then keep + * incrementally looking in data_seen for the next window unitl we find data and then return the + * file contents */ private List checkAndReturnIfFileExists() { try { @@ -198,17 +171,45 @@ private List checkAndReturnIfFileExists() { // if the file is expected to be present - retry until found if (shardFileCreationTracker.doesDataExistForTimestamp(currentEndTimestamp)) { LOG.info("Data exists for shard {} and time end {} ", shardId, currentEndTimestamp); - shouldRetryWhenFileNotFound = - true; // can happen due to out of order writes or the write to GCS was very slow - shouldFailWhenFileNotFound = true; - } else { - shouldRetryWhenFileNotFound = false; - shouldFailWhenFileNotFound = false; - } - queriedDataSeenTable = true; - return getRecords(); + // Data does not exist for the current window. So we scan the data_seen table to see which + // is the next window for which data exists. + LOG.info("Data does not exist for shard {} and time end {} ", shardId, currentEndTimestamp); + Instant previousWindowEnd = currentIntervalEnd; + Instant nextWindowEnd = previousWindowEnd.plus(windowDuration); + Timestamp nextEndTimestamp = Timestamp.parseTimestamp(nextWindowEnd.toString()); + // Note that since the firstPipelineProgress has a time, eventually we will find the + // data_seen entry + while (firstPipelineProgress.compareTo(nextEndTimestamp) >= 0) { + if (!shardFileCreationTracker.doesDataExistForTimestamp(nextEndTimestamp)) { + LOG.info( + "Data does not exist for shard {} and time end {} ", shardId, nextEndTimestamp); + previousWindowEnd = nextWindowEnd; + nextWindowEnd = previousWindowEnd.plus(windowDuration); + nextEndTimestamp = Timestamp.parseTimestamp(nextWindowEnd.toString()); + } else { + // Now we have found the next interval which will have the file expected + // Construct the file name and return contents + LOG.info("Data exists for shard {} and time end {} ", shardId, nextEndTimestamp); + this.fileName = + this.gcsPath + + "/" + + this.shardId + + "/" + + previousWindowEnd + + "-" + + nextWindowEnd + + "-pane-0-last-0-of-1.txt"; + currentIntervalStart = + nextWindowEnd.minus( + windowDuration); // for the caller to know the current interval start + break; + } + } + } + // File should exist now, so wait until found the file and return records + return waitTillFileCreatedAndReturn(); } catch (Exception e) { throw new RuntimeException( " Cannot determine file creation progress for shard : " + shardId, e); @@ -259,4 +260,8 @@ private List waitTillFileCreatedAndReturn() { } return changeStreamList; } + + public String getCurrentIntervalStart() { + return currentIntervalStart.toString(); + } } diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java index 59c1273486..990003c6bd 100644 --- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/utils/SpannerDao.java @@ -219,7 +219,7 @@ private void checkAndCreateDataSeenTable() { + " run_id character varying NOT NULL,shard character" + " varying NOT NULL,window_seen timestamp with time zone NOT NULL,update_ts" + " timestamp with time zone DEFAULT CURRENT_TIMESTAMP,PRIMARY KEY(id))" - + " TTL INTERVAL '2 days' ON update_ts"; + + " TTL INTERVAL '30 days' ON update_ts"; } else { createTable = @@ -228,7 +228,7 @@ private void checkAndCreateDataSeenTable() { + " (id STRING(MAX) NOT NULL, run_id" + " STRING(MAX) NOT NULL,shard STRING(MAX) NOT NULL, window_seen TIMESTAMP NOT" + " NULL , update_ts TIMESTAMP DEFAULT (CURRENT_TIMESTAMP)) PRIMARY" - + " KEY(id) , ROW DELETION POLICY (OLDER_THAN(update_ts, INTERVAL 2 DAY))"; + + " KEY(id) , ROW DELETION POLICY (OLDER_THAN(update_ts, INTERVAL 30 DAY))"; } OperationFuture op = databaseAdminClient.updateDatabaseDdl( From 178294bb983c772b92dbba77e872d194241fd7cd Mon Sep 17 00:00:00 2001 From: Shreya Khajanchi Date: Mon, 13 May 2024 12:34:04 +0530 Subject: [PATCH 20/70] added unit test to spanner-common --- .../ddl/InformationSchemaScannerTest.java | 215 ++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java new file mode 100644 index 0000000000..7ff4e3171e --- /dev/null +++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.spanner.ddl; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.cloud.spanner.Dialect; +import com.google.cloud.spanner.ReadContext; +import com.google.cloud.spanner.ResultSet; +import com.google.cloud.spanner.Statement; +import org.junit.Test; + +public class InformationSchemaScannerTest { + + void mockGSQLColumnOptions(ReadContext context) { + Statement listColumnOptions = + Statement.of( + "SELECT t.table_name, t.column_name, t.option_name, t.option_type," + + " t.option_value" + + " FROM information_schema.column_options AS t" + + " WHERE t.table_catalog = '' AND t.table_schema = ''" + + " ORDER BY t.table_name, t.column_name"); + ResultSet listColumnOptionsResultSet = mock(ResultSet.class); + when(context.executeQuery(listColumnOptions)).thenReturn(listColumnOptionsResultSet); + when(listColumnOptionsResultSet.next()).thenReturn(true, false); + when(listColumnOptionsResultSet.getString(0)).thenReturn("singer"); + when(listColumnOptionsResultSet.getString(1)).thenReturn("singerName"); + when(listColumnOptionsResultSet.getString(2)).thenReturn("option1"); + when(listColumnOptionsResultSet.getString(3)).thenReturn("STRING"); + when(listColumnOptionsResultSet.getString(4)).thenReturn("SomeName"); + } + + void mockGSQLIndex(ReadContext context) { + Statement listIndexes = + Statement.of( + "SELECT t.table_name, t.index_name, t.parent_table_name, t.is_unique," + + " t.is_null_filtered" + + " FROM information_schema.indexes AS t" + + " WHERE t.table_catalog = '' AND t.table_schema = '' AND" + + " t.index_type='INDEX' AND t.spanner_is_managed = FALSE" + + " ORDER BY t.table_name, t.index_name"); + ResultSet listIndexessResultSet = mock(ResultSet.class); + when(context.executeQuery(listIndexes)).thenReturn(listIndexessResultSet); + when(listIndexessResultSet.next()).thenReturn(true, false); + when(listIndexessResultSet.getString(0)).thenReturn("singer"); + when(listIndexessResultSet.getString(1)).thenReturn("index1"); + when(listIndexessResultSet.isNull(2)).thenReturn(true); + when(listIndexessResultSet.getBoolean(3)).thenReturn(false); + when(listIndexessResultSet.getBoolean(4)).thenReturn(false); + when(listIndexessResultSet.isNull(5)).thenReturn(true); + } + + void mockGSQLIndexColumns(ReadContext context) { + Statement listIndexColumns = + Statement.of( + "SELECT t.table_name, t.column_name, t.column_ordering, t.index_name " + + "FROM information_schema.index_columns AS t " + + "WHERE t.table_catalog = '' AND t.table_schema = '' " + + "ORDER BY t.table_name, t.index_name, t.ordinal_position"); + ResultSet listIndexColumnsResultSet = mock(ResultSet.class); + when(context.executeQuery(listIndexColumns)).thenReturn(listIndexColumnsResultSet); + when(listIndexColumnsResultSet.next()).thenReturn(true, false); + when(listIndexColumnsResultSet.getString(0)).thenReturn("singer"); + when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName"); + when(listIndexColumnsResultSet.isNull(2)).thenReturn(true); + when(listIndexColumnsResultSet.getString(3)).thenReturn("index1"); + } + + void mockGSQLForeignKey(ReadContext context) { + Statement listForeignKeys = + Statement.of( + "SELECT rc.constraint_name," + + " kcu1.table_name," + + " kcu1.column_name," + + " kcu2.table_name," + + " kcu2.column_name" + + " FROM information_schema.referential_constraints as rc" + + " INNER JOIN information_schema.key_column_usage as kcu1" + + " ON kcu1.constraint_catalog = rc.constraint_catalog" + + " AND kcu1.constraint_schema = rc.constraint_schema" + + " AND kcu1.constraint_name = rc.constraint_name" + + " INNER JOIN information_schema.key_column_usage as kcu2" + + " ON kcu2.constraint_catalog = rc.unique_constraint_catalog" + + " AND kcu2.constraint_schema = rc.unique_constraint_schema" + + " AND kcu2.constraint_name = rc.unique_constraint_name" + + " AND kcu2.ordinal_position = kcu1.position_in_unique_constraint" + + " WHERE rc.constraint_catalog = ''" + + " AND rc.constraint_schema = ''" + + " AND kcu1.constraint_catalog = ''" + + " AND kcu1.constraint_schema = ''" + + " AND kcu2.constraint_catalog = ''" + + " AND kcu2.constraint_schema = ''" + + " ORDER BY rc.constraint_name, kcu1.ordinal_position;"); + ResultSet listForeignKeysResultSet = mock(ResultSet.class); + when(context.executeQuery(listForeignKeys)).thenReturn(listForeignKeysResultSet); + when(listForeignKeysResultSet.next()).thenReturn(true, false); + when(listForeignKeysResultSet.getString(0)).thenReturn("fk1"); + when(listForeignKeysResultSet.getString(1)).thenReturn("album"); + when(listForeignKeysResultSet.getString(2)).thenReturn("singerId"); + when(listForeignKeysResultSet.getString(3)).thenReturn("singer"); + when(listForeignKeysResultSet.getString(4)).thenReturn("singerId"); + } + + void mockGSQLCheckConstraint(ReadContext context) { + Statement listCheckConstraints = + Statement.of( + "SELECT ctu.TABLE_NAME," + + " cc.CONSTRAINT_NAME," + + " cc.CHECK_CLAUSE" + + " FROM INFORMATION_SCHEMA.CONSTRAINT_TABLE_USAGE as ctu" + + " INNER JOIN INFORMATION_SCHEMA.CHECK_CONSTRAINTS as cc" + + " ON ctu.constraint_catalog = cc.constraint_catalog" + + " AND ctu.constraint_schema = cc.constraint_schema" + + " AND ctu.CONSTRAINT_NAME = cc.CONSTRAINT_NAME" + + " WHERE NOT STARTS_WITH(cc.CONSTRAINT_NAME, 'CK_IS_NOT_NULL_')" + + " AND ctu.table_catalog = ''" + + " AND ctu.table_schema = ''" + + " AND ctu.constraint_catalog = ''" + + " AND ctu.constraint_schema = ''" + + " AND cc.SPANNER_STATE = 'COMMITTED';"); + ResultSet listCheckConstraintsResultSet = mock(ResultSet.class); + when(context.executeQuery(listCheckConstraints)).thenReturn(listCheckConstraintsResultSet); + when(listCheckConstraintsResultSet.next()).thenReturn(true, false); + when(listCheckConstraintsResultSet.getString(0)).thenReturn("album"); + when(listCheckConstraintsResultSet.getString(1)).thenReturn("check1"); + when(listCheckConstraintsResultSet.getString(2)).thenReturn("albumName!=NULL"); + } + + void mockGSQLListTables(ReadContext context) { + Statement listTables = + Statement.of( + "SELECT t.table_name, t.parent_table_name, t.on_delete_action" + + " FROM information_schema.tables AS t" + + " WHERE t.table_catalog = '' AND t.table_schema = ''"); + ResultSet listTablesResultSet = mock(ResultSet.class); + when(context.executeQuery(listTables)).thenReturn(listTablesResultSet); + when(listTablesResultSet.next()).thenReturn(true, true, false); + when(listTablesResultSet.getString(0)).thenReturn("singer", "album"); + when(listTablesResultSet.getString(1)).thenReturn(null, "singer"); + when(listTablesResultSet.getString(2)).thenReturn(null, "CASCADE"); + } + + void mockGSQLListColumns(ReadContext context) { + Statement listColumns = + Statement.of( + "SELECT c.table_name, c.column_name," + + " c.ordinal_position, c.spanner_type, c.is_nullable," + + " c.is_generated, c.generation_expression, c.is_stored" + + " FROM information_schema.columns as c" + + " WHERE c.table_catalog = '' AND c.table_schema = '' " + + " AND c.spanner_state = 'COMMITTED' " + + " ORDER BY c.table_name, c.ordinal_position"); + ResultSet listColumnsResultSet = mock(ResultSet.class); + + when(context.executeQuery(listColumns)).thenReturn(listColumnsResultSet); + when(listColumnsResultSet.next()).thenReturn(true, true, true, true, true, false); + when(listColumnsResultSet.getString(0)) + .thenReturn("singer", "singer", "album", "album", "album"); + when(listColumnsResultSet.getString(1)) + .thenReturn("singerId", "singerName", "singerId", "albumId", "albumName"); + when(listColumnsResultSet.getString(3)).thenReturn("STRING(50)"); + when(listColumnsResultSet.getString(4)).thenReturn("NO"); + when(listColumnsResultSet.getString(5)).thenReturn("NO"); + when(listColumnsResultSet.isNull(6)).thenReturn(true); + when(listColumnsResultSet.isNull(7)).thenReturn(true); + } + + @Test + public void testScan() { + ReadContext context = mock(ReadContext.class); + + mockGSQLListTables(context); + mockGSQLListColumns(context); + mockGSQLColumnOptions(context); + mockGSQLIndex(context); + mockGSQLIndexColumns(context); + mockGSQLForeignKey(context); + mockGSQLCheckConstraint(context); + InformationSchemaScanner informationSchemaScanner = + new InformationSchemaScanner(context, Dialect.GOOGLE_STANDARD_SQL); + Ddl ddl = informationSchemaScanner.scan(); + String expectedDdl = + "CREATE TABLE `singer` (\n" + + "\t`singerId` STRING(50) NOT NULL,\n" + + "\t`singerName` STRING(50) NOT NULL OPTIONS (option1=\"SomeName\"),\n" + + ") PRIMARY KEY ()\n" + + "CREATE INDEX `index1` ON `singer`() STORING (`singerName`)\n" + + "\n" + + "CREATE TABLE `album` (\n" + + "\t`singerId` STRING(50) NOT NULL,\n" + + "\t`albumId` STRING(50) NOT NULL,\n" + + "\t`albumName` STRING(50) NOT NULL,\n" + + "\tCONSTRAINT `check1` CHECK (albumName!=NULL),\n" + + ") PRIMARY KEY (),\n" + + "INTERLEAVE IN PARENT `singer` ON DELETE CASCADE\n" + + "\n" + + "ALTER TABLE `album` ADD CONSTRAINT `fk1` FOREIGN KEY (`singerId`) REFERENCES `singer` (`singerId`)"; + assertEquals(expectedDdl, ddl.prettyPrint()); + } +} From aecc881ad7b7c1c387286a802c228f0e0553df5f Mon Sep 17 00:00:00 2001 From: Shreya Khajanchi Date: Mon, 13 May 2024 17:51:57 +0530 Subject: [PATCH 21/70] added more test cases --- .../ddl/InformationSchemaScannerTest.java | 227 +++++++++++++++++- 1 file changed, 214 insertions(+), 13 deletions(-) diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java index 7ff4e3171e..5c54f33414 100644 --- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java +++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/ddl/InformationSchemaScannerTest.java @@ -56,13 +56,12 @@ void mockGSQLIndex(ReadContext context) { + " ORDER BY t.table_name, t.index_name"); ResultSet listIndexessResultSet = mock(ResultSet.class); when(context.executeQuery(listIndexes)).thenReturn(listIndexessResultSet); - when(listIndexessResultSet.next()).thenReturn(true, false); - when(listIndexessResultSet.getString(0)).thenReturn("singer"); - when(listIndexessResultSet.getString(1)).thenReturn("index1"); + when(listIndexessResultSet.next()).thenReturn(true, true, true, false); + when(listIndexessResultSet.getString(0)).thenReturn("singer", "singer", "album"); + when(listIndexessResultSet.getString(1)).thenReturn("index1", "PRIMARY_KEY", "PRIMARY_KEY"); when(listIndexessResultSet.isNull(2)).thenReturn(true); when(listIndexessResultSet.getBoolean(3)).thenReturn(false); when(listIndexessResultSet.getBoolean(4)).thenReturn(false); - when(listIndexessResultSet.isNull(5)).thenReturn(true); } void mockGSQLIndexColumns(ReadContext context) { @@ -74,11 +73,12 @@ void mockGSQLIndexColumns(ReadContext context) { + "ORDER BY t.table_name, t.index_name, t.ordinal_position"); ResultSet listIndexColumnsResultSet = mock(ResultSet.class); when(context.executeQuery(listIndexColumns)).thenReturn(listIndexColumnsResultSet); - when(listIndexColumnsResultSet.next()).thenReturn(true, false); - when(listIndexColumnsResultSet.getString(0)).thenReturn("singer"); - when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName"); - when(listIndexColumnsResultSet.isNull(2)).thenReturn(true); - when(listIndexColumnsResultSet.getString(3)).thenReturn("index1"); + when(listIndexColumnsResultSet.next()).thenReturn(true, true, true, false); + when(listIndexColumnsResultSet.getString(0)).thenReturn("singer", "singer", "album"); + when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName", "singerId", "albumId"); + when(listIndexColumnsResultSet.isNull(2)).thenReturn(true, false, false); + when(listIndexColumnsResultSet.getString(2)).thenReturn("ASC", "DESC"); + when(listIndexColumnsResultSet.getString(3)).thenReturn("index1", "PRIMARY_KEY", "PRIMARY_KEY"); } void mockGSQLForeignKey(ReadContext context) { @@ -180,8 +180,165 @@ void mockGSQLListColumns(ReadContext context) { when(listColumnsResultSet.isNull(7)).thenReturn(true); } + void mockPgSQLColumnOptions(ReadContext context) { + Statement listColumnOptions = + Statement.of( + "SELECT t.table_name, t.column_name, t.option_name, t.option_type," + + " t.option_value" + + " FROM information_schema.column_options AS t" + + " WHERE t.table_schema NOT IN " + + " ('information_schema', 'spanner_sys', 'pg_catalog')" + + " ORDER BY t.table_name, t.column_name"); + ResultSet listColumnOptionsResultSet = mock(ResultSet.class); + when(context.executeQuery(listColumnOptions)).thenReturn(listColumnOptionsResultSet); + when(listColumnOptionsResultSet.next()).thenReturn(true, false); + when(listColumnOptionsResultSet.getString(0)).thenReturn("singer"); + when(listColumnOptionsResultSet.getString(1)).thenReturn("singerName"); + when(listColumnOptionsResultSet.getString(2)).thenReturn("option1"); + when(listColumnOptionsResultSet.getString(3)).thenReturn("character varying"); + when(listColumnOptionsResultSet.getString(4)).thenReturn("SomeName"); + } + + void mockPgSQLIndex(ReadContext context) { + Statement listIndexes = + Statement.of( + "SELECT t.table_name, t.index_name, t.parent_table_name, t.is_unique," + + " t.is_null_filtered, t.filter FROM information_schema.indexes AS t " + + " WHERE t.table_schema NOT IN " + + " ('information_schema', 'spanner_sys', 'pg_catalog')" + + " AND t.index_type='INDEX' AND t.spanner_is_managed = 'NO' " + + " ORDER BY t.table_name, t.index_name"); + ResultSet listIndexessResultSet = mock(ResultSet.class); + when(context.executeQuery(listIndexes)).thenReturn(listIndexessResultSet); + when(listIndexessResultSet.next()).thenReturn(true, false); + when(listIndexessResultSet.getString(0)).thenReturn("singer"); + when(listIndexessResultSet.getString(1)).thenReturn("index1"); + when(listIndexessResultSet.isNull(2)).thenReturn(true); + when(listIndexessResultSet.getString(3)).thenReturn("YES"); + when(listIndexessResultSet.getString(4)).thenReturn("YES"); + when(listIndexessResultSet.isNull(5)).thenReturn(true); + } + + void mockPgSQLIndexColumns(ReadContext context) { + Statement listIndexColumns = + Statement.of( + "SELECT t.table_name, t.column_name, t.column_ordering, t.index_name " + + "FROM information_schema.index_columns AS t " + + "WHERE t.table_schema NOT IN " + + "('information_schema', 'spanner_sys', 'pg_catalog') " + + "ORDER BY t.table_name, t.index_name, t.ordinal_position"); + ResultSet listIndexColumnsResultSet = mock(ResultSet.class); + when(context.executeQuery(listIndexColumns)).thenReturn(listIndexColumnsResultSet); + when(listIndexColumnsResultSet.next()).thenReturn(true, false); + when(listIndexColumnsResultSet.getString(0)).thenReturn("singer"); + when(listIndexColumnsResultSet.getString(1)).thenReturn("singerName"); + when(listIndexColumnsResultSet.isNull(2)).thenReturn(true); + when(listIndexColumnsResultSet.getString(3)).thenReturn("index1"); + } + + void mockPgSQLForeignKey(ReadContext context) { + Statement listForeignKeys = + Statement.of( + "SELECT rc.constraint_name," + + " kcu1.table_name," + + " kcu1.column_name," + + " kcu2.table_name," + + " kcu2.column_name" + + " FROM information_schema.referential_constraints as rc" + + " INNER JOIN information_schema.key_column_usage as kcu1" + + " ON kcu1.constraint_catalog = rc.constraint_catalog" + + " AND kcu1.constraint_schema = rc.constraint_schema" + + " AND kcu1.constraint_name = rc.constraint_name" + + " INNER JOIN information_schema.key_column_usage as kcu2" + + " ON kcu2.constraint_catalog = rc.unique_constraint_catalog" + + " AND kcu2.constraint_schema = rc.unique_constraint_schema" + + " AND kcu2.constraint_name = rc.unique_constraint_name" + + " AND kcu2.ordinal_position = kcu1.position_in_unique_constraint" + + " WHERE rc.constraint_catalog = kcu1.constraint_catalog" + + " AND rc.constraint_catalog = kcu2.constraint_catalog" + + " AND rc.constraint_schema NOT IN " + + " ('information_schema', 'spanner_sys', 'pg_catalog')" + + " AND rc.constraint_schema = kcu1.constraint_schema" + + " AND rc.constraint_schema = kcu2.constraint_schema" + + " ORDER BY rc.constraint_name, kcu1.ordinal_position;"); + ResultSet listForeignKeysResultSet = mock(ResultSet.class); + when(context.executeQuery(listForeignKeys)).thenReturn(listForeignKeysResultSet); + when(listForeignKeysResultSet.next()).thenReturn(true, false); + when(listForeignKeysResultSet.getString(0)).thenReturn("fk1"); + when(listForeignKeysResultSet.getString(1)).thenReturn("album"); + when(listForeignKeysResultSet.getString(2)).thenReturn("singerId"); + when(listForeignKeysResultSet.getString(3)).thenReturn("singer"); + when(listForeignKeysResultSet.getString(4)).thenReturn("singerId"); + } + + void mockPgSQLCheckConstraint(ReadContext context) { + Statement listCheckConstraints = + Statement.of( + "SELECT ctu.TABLE_NAME," + + " cc.CONSTRAINT_NAME," + + " cc.CHECK_CLAUSE" + + " FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS as ctu" + + " INNER JOIN INFORMATION_SCHEMA.CHECK_CONSTRAINTS as cc" + + " ON ctu.constraint_catalog = cc.constraint_catalog" + + " AND ctu.constraint_schema = cc.constraint_schema" + + " AND ctu.CONSTRAINT_NAME = cc.CONSTRAINT_NAME" + + " WHERE NOT STARTS_WITH(cc.CONSTRAINT_NAME, 'CK_IS_NOT_NULL_')" + + " AND ctu.table_catalog = ctu.constraint_catalog" + + " AND ctu.table_schema NOT IN" + + "('information_schema', 'spanner_sys', 'pg_catalog')" + + " AND ctu.table_schema = ctu.constraint_schema" + + " AND cc.SPANNER_STATE = 'COMMITTED';"); + ResultSet listCheckConstraintsResultSet = mock(ResultSet.class); + when(context.executeQuery(listCheckConstraints)).thenReturn(listCheckConstraintsResultSet); + when(listCheckConstraintsResultSet.next()).thenReturn(true, false); + when(listCheckConstraintsResultSet.getString(0)).thenReturn("album"); + when(listCheckConstraintsResultSet.getString(1)).thenReturn("check1"); + when(listCheckConstraintsResultSet.getString(2)).thenReturn("albumName!=NULL"); + } + + void mockPgSQLListTables(ReadContext context) { + Statement listTables = + Statement.of( + "SELECT t.table_name, t.parent_table_name, t.on_delete_action FROM" + + " information_schema.tables AS t" + + " WHERE t.table_schema NOT IN " + + "('information_schema', 'spanner_sys', 'pg_catalog')"); + ResultSet listTablesResultSet = mock(ResultSet.class); + when(context.executeQuery(listTables)).thenReturn(listTablesResultSet); + when(listTablesResultSet.next()).thenReturn(true, true, false); + when(listTablesResultSet.getString(0)).thenReturn("singer", "album"); + when(listTablesResultSet.getString(1)).thenReturn(null, "singer"); + when(listTablesResultSet.getString(2)).thenReturn(null, "CASCADE"); + } + + void mockPgSQLListColumns(ReadContext context) { + Statement listColumns = + Statement.of( + "SELECT c.table_name, c.column_name," + + " c.ordinal_position, c.spanner_type, c.is_nullable," + + " c.is_generated, c.generation_expression, c.is_stored" + + " FROM information_schema.columns as c" + + " WHERE c.table_schema NOT IN " + + " ('information_schema', 'spanner_sys', 'pg_catalog') " + + " AND c.spanner_state = 'COMMITTED' " + + " ORDER BY c.table_name, c.ordinal_position"); + ResultSet listColumnsResultSet = mock(ResultSet.class); + + when(context.executeQuery(listColumns)).thenReturn(listColumnsResultSet); + when(listColumnsResultSet.next()).thenReturn(true, true, true, true, true, false); + when(listColumnsResultSet.getString(0)) + .thenReturn("singer", "singer", "album", "album", "album"); + when(listColumnsResultSet.getString(1)) + .thenReturn("singerId", "singerName", "singerId", "albumId", "albumName"); + when(listColumnsResultSet.getString(3)).thenReturn("character varying(50)"); + when(listColumnsResultSet.getString(4)).thenReturn("NO"); + when(listColumnsResultSet.getString(5)).thenReturn("NO"); + when(listColumnsResultSet.isNull(6)).thenReturn(true); + when(listColumnsResultSet.isNull(7)).thenReturn(true); + } + @Test - public void testScan() { + public void testScanGSQLDdl() { ReadContext context = mock(ReadContext.class); mockGSQLListTables(context); @@ -198,7 +355,8 @@ public void testScan() { "CREATE TABLE `singer` (\n" + "\t`singerId` STRING(50) NOT NULL,\n" + "\t`singerName` STRING(50) NOT NULL OPTIONS (option1=\"SomeName\"),\n" - + ") PRIMARY KEY ()\n" + + ") PRIMARY KEY (`singerId` ASC)\n" + + "CREATE INDEX `PRIMARY_KEY` ON `singer`()\n" + "CREATE INDEX `index1` ON `singer`() STORING (`singerName`)\n" + "\n" + "CREATE TABLE `album` (\n" @@ -206,10 +364,53 @@ public void testScan() { + "\t`albumId` STRING(50) NOT NULL,\n" + "\t`albumName` STRING(50) NOT NULL,\n" + "\tCONSTRAINT `check1` CHECK (albumName!=NULL),\n" - + ") PRIMARY KEY (),\n" + + ") PRIMARY KEY (`albumId` DESC),\n" + "INTERLEAVE IN PARENT `singer` ON DELETE CASCADE\n" - + "\n" + + "CREATE INDEX `PRIMARY_KEY` ON `album`()\n" + "ALTER TABLE `album` ADD CONSTRAINT `fk1` FOREIGN KEY (`singerId`) REFERENCES `singer` (`singerId`)"; assertEquals(expectedDdl, ddl.prettyPrint()); } + + @Test + public void testScanPgSQLDdl() { + ReadContext context = mock(ReadContext.class); + + mockPgSQLListTables(context); + mockPgSQLListColumns(context); + mockPgSQLColumnOptions(context); + mockPgSQLIndex(context); + mockPgSQLIndexColumns(context); + mockPgSQLForeignKey(context); + mockPgSQLCheckConstraint(context); + InformationSchemaScanner informationSchemaScanner = + new InformationSchemaScanner(context, Dialect.POSTGRESQL); + Ddl ddl = informationSchemaScanner.scan(); + String expectedDdl = + "CREATE TABLE \"singer\" (\n" + + "\t\"singerId\" character varying(50) NOT NULL,\n" + + "\t\"singerName\" character varying(50) NOT NULL OPTIONS (option1='SomeName'),\n" + + "\tPRIMARY KEY ()\n" + + ")\n" + + "CREATE UNIQUE INDEX \"index1\" ON \"singer\"() INCLUDE (\"singerName\")\n" + + "\n" + + "CREATE TABLE \"album\" (\n" + + "\t\"singerId\" character varying(50) NOT NULL,\n" + + "\t\"albumId\" character varying(50) NOT NULL,\n" + + "\t\"albumName\" character varying(50) NOT NULL,\n" + + "\tCONSTRAINT \"check1\" CHECK (albumName!=NULL),\n" + + "\tPRIMARY KEY ()\n" + + ") \n" + + "INTERLEAVE IN PARENT \"singer\" ON DELETE CASCADE\n" + + "\n" + + "ALTER TABLE \"album\" ADD CONSTRAINT \"fk1\" FOREIGN KEY (\"singerId\") REFERENCES \"singer\" (\"singerId\")"; + assertEquals(expectedDdl, ddl.prettyPrint()); + } + + @Test(expected = IllegalArgumentException.class) + public void testWithInvalidDialect() { + ReadContext context = mock(ReadContext.class); + InformationSchemaScanner informationSchemaScanner = + new InformationSchemaScanner(context, Dialect.fromName("xyz")); + Ddl ddl = informationSchemaScanner.scan(); + } } From cae3a040f32e4ec81eb28ac0450a78c1154116e9 Mon Sep 17 00:00:00 2001 From: "vitaly.terentyev" Date: Mon, 8 Apr 2024 16:26:52 +0400 Subject: [PATCH 22/70] Template parameters: update old types to new ones --- .../v2/templates/BigtableChangeStreamsToHBase.java | 12 ++++++------ .../teleport/v2/auto/blocks/WriteToBigQuery.java | 2 +- .../v2/options/WindowedFilenamePolicyOptions.java | 2 +- .../v2/options/DataplexBigQueryToGcsOptions.java | 5 +---- .../teleport/v2/templates/DataStreamToBigQuery.java | 6 +++--- .../teleport/v2/templates/DataStreamToSpanner.java | 2 +- .../cloud/teleport/v2/templates/DataStreamToSQL.java | 2 +- .../options/ElasticsearchWriteOptions.java | 7 ++----- .../cloud/teleport/v2/templates/GCSToSourceDb.java | 4 ++-- .../teleport/v2/templates/GoogleAdsToBigQuery.java | 4 ++-- .../BigtableChangeStreamsToPubSubOptions.java | 2 +- .../SpannerChangeStreamsToBigQueryOptions.java | 4 ++-- .../options/SpannerChangeStreamsToPubSubOptions.java | 8 ++++++-- .../teleport/v2/templates/TextIOToBigQuery.java | 9 +++------ .../teleport/v2/transforms/SplunkConverters.java | 7 ++----- .../cloud/teleport/v2/templates/JmsToPubsub.java | 4 ++-- .../teleport/v2/templates/PubsubProtoToBigQuery.java | 2 +- .../teleport/v2/templates/PubSubCdcToBigQuery.java | 4 ++-- .../cloud/teleport/v2/templates/PubsubToJms.java | 2 +- .../cloud/teleport/v2/templates/PubSubToRedis.java | 2 +- .../v2/options/SourceDbToSpannerOptions.java | 6 +++--- .../SpannerChangeStreamsToShardedFileSink.java | 4 ++-- .../v2/templates/StreamingDataGenerator.java | 12 ++++++------ 23 files changed, 52 insertions(+), 60 deletions(-) diff --git a/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java b/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java index 601c46626f..c9d6d5e6a0 100644 --- a/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java +++ b/v2/bigtable-changestreams-to-hbase/src/main/java/com/google/cloud/teleport/v2/templates/BigtableChangeStreamsToHBase.java @@ -102,9 +102,9 @@ public interface BigtableToHbasePipelineOptions helpText = "Whether bidirectional replication between hbase and bigtable is enabled, adds additional logic to filter out hbase-replicated mutations") @Default.Boolean(false) - boolean getBidirectionalReplicationEnabled(); + Boolean getBidirectionalReplicationEnabled(); - void setBidirectionalReplicationEnabled(boolean bidirectionalReplicationEnabled); + void setBidirectionalReplicationEnabled(Boolean bidirectionalReplicationEnabled); @TemplateParameter.Text( optional = true, @@ -129,18 +129,18 @@ public interface BigtableToHbasePipelineOptions description = "Dry run", helpText = "When dry run is enabled, pipeline will not write to Hbase") @Default.Boolean(false) - boolean getDryRunEnabled(); + Boolean getDryRunEnabled(); - void setDryRunEnabled(boolean dryRunEnabled); + void setDryRunEnabled(Boolean dryRunEnabled); @TemplateParameter.Boolean( optional = true, description = "Filter GC mutations", helpText = "Filters out garbage collection Delete mutations from CBT") @Default.Boolean(false) - boolean getFilterGCMutations(); + Boolean getFilterGCMutations(); - void setFilterGCMutations(boolean filterGCMutations); + void setFilterGCMutations(Boolean filterGCMutations); } /** diff --git a/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java b/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java index a5a5fb7deb..586b2c5ecf 100644 --- a/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java +++ b/v2/common/src/main/java/com/google/cloud/teleport/v2/auto/blocks/WriteToBigQuery.java @@ -70,7 +70,7 @@ public interface SinkOptions void setOutputTableSpec(String input); - @TemplateParameter.Text( + @TemplateParameter.GcsReadFile( order = 2, optional = true, description = "GCS Path to JSON file containing BigQuery table schema.", diff --git a/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java b/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java index 87c6721dc2..26f579671d 100644 --- a/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java +++ b/v2/common/src/main/java/com/google/cloud/teleport/v2/options/WindowedFilenamePolicyOptions.java @@ -40,7 +40,7 @@ public interface WindowedFilenamePolicyOptions extends PipelineOptions { void setOutputShardTemplate(String value); - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 2, optional = true, description = "Number of shards", diff --git a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java index eb1e7c1165..bfbde306ba 100644 --- a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java +++ b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java @@ -75,12 +75,9 @@ public interface DataplexBigQueryToGcsOptions void setDestinationStorageBucketAssetName(String destinationStorageBucketAssetName); - @TemplateParameter.Text( + @TemplateParameter.DateTime( order = 4, optional = true, - regexes = { - "^([0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|[+-][0-9]{2}:[0-9]{2})?)?|-[pP]([0-9]+(\\.[0-9]+)?Y)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?W)?([0-9]+(\\.[0-9]+)?D)?(T([0-9]+(\\.[0-9]+)?H)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?S)?)?)$" - }, description = "Move data older than the date.", helpText = "Move data older than this date (and optional time). For partitioned tables, move partitions last modified before this date/time. For non-partitioned tables, move if the table was last modified before this date/time. If not specified, move all tables / partitions. The date/time is parsed in the default time zone by default, but optional suffixes Z and +HH:mm are supported. Format: YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss or YYYY-MM-DDTHH:mm:ss+03:00. Relative date/time (https://en.wikipedia.org/wiki/ISO_8601#Durations) is also supported. Format: -PnDTnHnMn.nS (must start with -P meaning time in the past).") diff --git a/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java b/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java index a4eddefec7..0427b9f128 100644 --- a/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java +++ b/v2/datastream-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToBigQuery.java @@ -144,7 +144,7 @@ public interface Options InputUDFOptions, BigQueryStorageApiStreamingOptions { - @TemplateParameter.Text( + @TemplateParameter.GcsReadFile( order = 1, description = "File location for Datastream file output in Cloud Storage.", helpText = @@ -273,7 +273,7 @@ public interface Options void setIgnoreFields(String value); - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 13, optional = true, description = "The number of minutes between merges for a given table", @@ -293,7 +293,7 @@ public interface Options void setDeadLetterQueueDirectory(String value); - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 15, optional = true, description = "The number of minutes between DLQ Retries.", diff --git a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java index a29d67b18b..568ffbe609 100644 --- a/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java +++ b/v2/datastream-to-spanner/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSpanner.java @@ -131,7 +131,7 @@ public class DataStreamToSpanner { *

Inherits standard configuration options. */ public interface Options extends PipelineOptions, StreamingOptions { - @TemplateParameter.Text( + @TemplateParameter.GcsReadFile( order = 1, description = "File location for Datastream file output in Cloud Storage.", helpText = diff --git a/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java b/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java index ec3b2ab261..849b725c09 100644 --- a/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java +++ b/v2/datastream-to-sql/src/main/java/com/google/cloud/teleport/v2/templates/DataStreamToSQL.java @@ -97,7 +97,7 @@ public class DataStreamToSQL { *

Inherits standard configuration options. */ public interface Options extends PipelineOptions, StreamingOptions { - @TemplateParameter.Text( + @TemplateParameter.GcsReadFile( order = 1, description = "File location for Datastream file input in Cloud Storage.", helpText = diff --git a/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java b/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java index 78baba94a8..baf9bfe94a 100644 --- a/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java +++ b/v2/elasticsearch-common/src/main/java/com/google/cloud/teleport/v2/elasticsearch/options/ElasticsearchWriteOptions.java @@ -56,7 +56,7 @@ public interface ElasticsearchWriteOptions extends PipelineOptions { void setElasticsearchUsername(String elasticsearchUsername); - @TemplateParameter.Text( + @TemplateParameter.Password( order = 4, optional = true, description = "Password for Elasticsearch endpoint", @@ -262,14 +262,11 @@ public interface ElasticsearchWriteOptions extends PipelineOptions { void setDisableCertificateValidation(Boolean disableCertificateValidation); - @TemplateParameter.Text( + @TemplateParameter.KmsEncryptionKey( order = 24, optional = true, parentName = "apiKeySource", parentTriggerValues = {"KMS"}, - regexes = { - "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$" - }, description = "Google Cloud KMS encryption key for the API key", helpText = "The Cloud KMS key to decrypt the API key. This parameter must be " diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java index c701ad91e5..d4f4f13a25 100644 --- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java +++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java @@ -134,7 +134,7 @@ public interface Options extends PipelineOptions, StreamingOptions { void setTimerIntervalInMilliSec(Integer value); - @TemplateParameter.Text( + @TemplateParameter.DateTime( order = 6, optional = true, description = @@ -165,7 +165,7 @@ public interface Options extends PipelineOptions, StreamingOptions { void setWindowDuration(String windowDuration); - @TemplateParameter.Text( + @TemplateParameter.GcsReadFolder( order = 8, optional = false, description = "GCS input directory path", diff --git a/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java b/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java index 78bc803d28..31db481d80 100644 --- a/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java +++ b/v2/google-ads-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java @@ -115,9 +115,9 @@ public interface GoogleAdsToBigQueryOptions extends WriteOptions, GoogleAdsOptio + "Divide the desired per pipeline QPS by the maximum number of workers. " + "Avoid exceeding per-account or developer token limits. " + "See Rate Limits (https://developers.google.com/google-ads/api/docs/best-practices/rate-limits).") - double getQpsPerWorker(); + Double getQpsPerWorker(); - void setQpsPerWorker(double qpsPerWorker); + void setQpsPerWorker(Double qpsPerWorker); @TemplateParameter.GcsReadFile( order = 5, diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java index cdf6e09e30..fbf4d5daba 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java @@ -31,7 +31,7 @@ public interface BigtableChangeStreamsToPubSubOptions extends DataflowPipelineOptions, BigtableCommonOptions.ReadChangeStreamOptions { - @TemplateParameter.Text( + @TemplateParameter.PubsubTopic( order = 1, description = "The output Pub/Sub topic name", helpText = "The name of the destination Pub/Sub topic.") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java index cec4b3d8dc..51b9d1d98e 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToBigQueryOptions.java @@ -249,7 +249,7 @@ public interface SpannerChangeStreamsToBigQueryOptions description = "Whether or not to disable retries for the DLQ", helpText = "Whether or not to disable retries for the DLQ") @Default.Boolean(false) - boolean getDisableDlqRetries(); + Boolean getDisableDlqRetries(); - void setDisableDlqRetries(boolean value); + void setDisableDlqRetries(Boolean value); } diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java index b6d9809388..7a7f66a475 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java @@ -151,8 +151,12 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt void setSpannerHost(String value); - @TemplateParameter.Text( + @TemplateParameter.Enum( order = 12, + enumOptions = { + @TemplateEnumOption("JSON"), + @TemplateEnumOption("AVRO") + }, optional = true, description = "Output data format", helpText = @@ -187,7 +191,7 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt void setPubsubProjectId(String pubsubProjectId); - @TemplateParameter.Text( + @TemplateParameter.PubsubTopic( order = 15, description = "The output Pub/Sub topic", helpText = "The Pub/Sub topic to publish PubsubMessage.") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java index 649f1959a0..74035b8f5b 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java @@ -210,10 +210,9 @@ public interface Options extends DataflowPipelineOptions, PythonExternalTextTransformerOptions, BigQueryStorageApiBatchOptions { - @TemplateParameter.Text( + @TemplateParameter.GcsReadFile( order = 1, optional = false, - regexes = {"^gs:\\/\\/[^\\n\\r]+$"}, description = "The GCS location of the text you'd like to process", helpText = "The gs:// path to the text in Cloud Storage you'd like to process.", example = "gs://your-bucket/your-file.txt") @@ -232,10 +231,9 @@ public interface Options void setJSONPath(String value); - @TemplateParameter.Text( + @TemplateParameter.BigQueryTable( order = 3, optional = false, - regexes = {".+:.+\\..+"}, description = "Output table to write to", helpText = "The BigQuery table name you want to create to store your processed data in. If you reuse an existing BigQuery table, the data is appended to the destination table.", @@ -244,10 +242,9 @@ public interface Options void setOutputTable(String value); - @TemplateParameter.Text( + @TemplateParameter.GcsWriteFile( order = 4, optional = false, - regexes = {"^gs:\\/\\/[^\\n\\r]+$"}, description = "GCS path to javascript fn for transforming output", helpText = "The Cloud Storage URI of the `.js` file that defines the JavaScript user-defined function (UDF) you want to use.", diff --git a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java index 6ecbb00ea1..96581de347 100644 --- a/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java +++ b/v2/googlecloud-to-splunk/src/main/java/com/google/cloud/teleport/v2/transforms/SplunkConverters.java @@ -154,12 +154,9 @@ public interface SplunkOptions extends PipelineOptions { void setTokenSource(String tokenSource); - @TemplateParameter.Text( + @TemplateParameter.KmsEncryptionKey( order = 7, optional = true, - regexes = { - "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$" - }, description = "Google Cloud KMS encryption key for the token", helpText = "The Cloud KMS key to decrypt the HEC token string. This parameter must be " @@ -188,7 +185,7 @@ public interface SplunkOptions extends PipelineOptions { void setTokenSecretId(String secretId); - @TemplateParameter.Text( + @TemplateParameter.GcsReadFile( order = 9, optional = true, description = "Cloud Storage path to root CA certificate.", diff --git a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java index 304a2295f4..6a5cd2c789 100644 --- a/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java +++ b/v2/jms-to-pubsub/src/main/java/com/google/cloud/teleport/v2/templates/JmsToPubsub.java @@ -172,7 +172,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { void setInputType(String inputType); - @TemplateParameter.Text( + @TemplateParameter.PubsubTopic( order = 4, description = "Output Pub/Sub topic", helpText = @@ -192,7 +192,7 @@ public interface JmsToPubsubOptions extends PipelineOptions { void setUsername(String username); - @TemplateParameter.Text( + @TemplateParameter.Password( order = 6, description = "JMS Password", helpText = "The password associated with the provided username.", diff --git a/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java b/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java index cb797f6cd1..16f5eecfb7 100644 --- a/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java +++ b/v2/pubsub-binary-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubsubProtoToBigQuery.java @@ -187,7 +187,7 @@ public interface PubSubProtoToBigQueryOptions void setFullMessageName(String value); - @TemplateParameter.Text( + @TemplateParameter.Boolean( order = 3, optional = true, description = "Preserve Proto Field Names", diff --git a/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java b/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java index d45c4dbebf..c6cfe8bec8 100644 --- a/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java +++ b/v2/pubsub-cdc-to-bigquery/src/main/java/com/google/cloud/teleport/v2/templates/PubSubCdcToBigQuery.java @@ -152,7 +152,7 @@ public interface Options void setAutoMapTables(Boolean value); - @TemplateParameter.Text( + @TemplateParameter.GcsReadFile( order = 3, optional = true, description = "Cloud Storage file with BigQuery schema fields to be used in DDL", @@ -235,7 +235,7 @@ public interface Options void setWindowDuration(String value); // Thread Count - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 10, optional = true, description = "Thread Number", diff --git a/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java b/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java index 83fe2e9a25..b78c8d57ec 100644 --- a/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java +++ b/v2/pubsub-to-jms/src/main/java/com/google/cloud/teleport/v2/templates/PubsubToJms.java @@ -183,7 +183,7 @@ public interface PubsubToJmsOptions extends PipelineOptions { void setUsername(String username); - @TemplateParameter.Text( + @TemplateParameter.Password( order = 6, description = "JMS Password", helpText = "Password for username provided for authentication with JMS server", diff --git a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java index 24c62ed1aa..d5d76168ba 100644 --- a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java +++ b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java @@ -164,7 +164,7 @@ public interface PubSubToRedisOptions void setRedisPort(int redisPort); - @TemplateParameter.Text( + @TemplateParameter.Password( order = 4, description = "Redis DB Password", helpText = "Redis database password.") diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java index f348eafc80..d78cf945af 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/options/SourceDbToSpannerOptions.java @@ -218,7 +218,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setIgnoreColumns(String value); - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 18, optional = true, description = "Maximum number of connections to Source database per worker", @@ -230,7 +230,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setMaxConnections(Integer value); - @TemplateParameter.Text( + @TemplateParameter.Boolean( order = 19, optional = true, description = "enable connection reconnects", @@ -241,7 +241,7 @@ public interface SourceDbToSpannerOptions extends CommonTemplateOptions { void setReconnectsEnabled(Boolean value); - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 20, optional = true, description = "Maximum number of connection reconnect attempts, if reconnects are enabled", diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java index 084e2f8de5..63d2fadaf2 100644 --- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java @@ -154,7 +154,7 @@ public interface Options extends PipelineOptions, StreamingOptions { void setMetadataDatabase(String value); - @TemplateParameter.Text( + @TemplateParameter.DateTime( order = 7, optional = true, description = "Changes are read from the given timestamp", @@ -164,7 +164,7 @@ public interface Options extends PipelineOptions, StreamingOptions { void setStartTimestamp(String value); - @TemplateParameter.Text( + @TemplateParameter.DateTime( order = 8, optional = true, description = "Changes are read until the given timestamp", diff --git a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java index b949cb53d2..c0439ba943 100644 --- a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java +++ b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java @@ -95,7 +95,7 @@ public class StreamingDataGenerator { * the executor at the command-line. */ public interface StreamingDataGeneratorOptions extends PipelineOptions { - @TemplateParameter.Text( + @TemplateParameter.Long( order = 1, regexes = {"^[1-9][0-9]*$"}, description = "Required output rate", @@ -361,7 +361,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { void setStatement(String statement); - @TemplateParameter.Text( + @TemplateParameter.ProjectId( order = 22, optional = true, parentName = "sinkType", @@ -409,7 +409,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { void setSpannerTableName(String spannerTableName); - @TemplateParameter.Text( + @TemplateParameter.Long( order = 26, optional = true, parentName = "sinkType", @@ -422,7 +422,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { void setMaxNumMutations(Long value); - @TemplateParameter.Text( + @TemplateParameter.Long( order = 27, optional = true, parentName = "sinkType", @@ -435,7 +435,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { void setMaxNumRows(Long value); - @TemplateParameter.Text( + @TemplateParameter.Long( order = 28, optional = true, parentName = "sinkType", @@ -448,7 +448,7 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { void setBatchSizeBytes(Long value); - @TemplateParameter.Text( + @TemplateParameter.Long( order = 29, optional = true, parentName = "sinkType", From 968a7d19726ba636d87acea2ee3c2171c725d107 Mon Sep 17 00:00:00 2001 From: "vitaly.terentyev" Date: Mon, 15 Apr 2024 19:08:14 +0400 Subject: [PATCH 23/70] Fix Spotless --- .../v2/options/SpannerChangeStreamsToPubSubOptions.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java index 7a7f66a475..634d3e59ab 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java @@ -153,10 +153,7 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt @TemplateParameter.Enum( order = 12, - enumOptions = { - @TemplateEnumOption("JSON"), - @TemplateEnumOption("AVRO") - }, + enumOptions = {@TemplateEnumOption("JSON"), @TemplateEnumOption("AVRO")}, optional = true, description = "Output data format", helpText = From b560460daf08afcaf3a920934b49a0a3acbb6879 Mon Sep 17 00:00:00 2001 From: "vitaly.terentyev" Date: Mon, 15 Apr 2024 19:48:48 +0400 Subject: [PATCH 24/70] Removed regexes --- .../cloud/teleport/v2/templates/StreamingDataGenerator.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java index c0439ba943..2c007c8a23 100644 --- a/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java +++ b/v2/streaming-data-generator/src/main/java/com/google/cloud/teleport/v2/templates/StreamingDataGenerator.java @@ -97,7 +97,6 @@ public class StreamingDataGenerator { public interface StreamingDataGeneratorOptions extends PipelineOptions { @TemplateParameter.Long( order = 1, - regexes = {"^[1-9][0-9]*$"}, description = "Required output rate", helpText = "Indicates rate of messages per second to be published to Pub/Sub") @Required @@ -366,7 +365,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { optional = true, parentName = "sinkType", parentTriggerValues = {"SPANNER"}, - regexes = {"^.+$"}, description = "GCP Project Id of where the Spanner table lives.", helpText = "GCP Project Id of where the Spanner table lives.") String getProjectId(); @@ -414,7 +412,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { optional = true, parentName = "sinkType", parentTriggerValues = {"SPANNER"}, - regexes = {"^[1-9][0-9]*$"}, description = "Max mutatated cells per batch.", helpText = "Specifies the cell mutation limit (maximum number of mutated cells per batch). Default value is 5000") @@ -427,7 +424,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { optional = true, parentName = "sinkType", parentTriggerValues = {"SPANNER"}, - regexes = {"^[1-9][0-9]*$"}, description = "Max rows per batch.", helpText = "Specifies the row mutation limit (maximum number of mutated rows per batch). Default value is 1000") @@ -440,7 +436,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { optional = true, parentName = "sinkType", parentTriggerValues = {"SPANNER"}, - regexes = {"^[1-9][0-9]*$"}, description = "Max batch size in bytes.", helpText = "Specifies the batch size limit (max number of bytes mutated per batch). Default value is 1MB") @@ -453,7 +448,6 @@ public interface StreamingDataGeneratorOptions extends PipelineOptions { optional = true, parentName = "sinkType", parentTriggerValues = {"SPANNER"}, - regexes = {"^[1-9][0-9]*$"}, description = "Commit deadline in seconds for write requests.", helpText = "Specifies the deadline in seconds for the Commit API call.") Long getCommitDeadlineSeconds(); From e3ddfecf0f96b2f6bb7065ac9f927a2b4ecee951 Mon Sep 17 00:00:00 2001 From: "vitaly.terentyev" Date: Tue, 16 Apr 2024 11:18:26 +0400 Subject: [PATCH 25/70] Update old types for v1 templates --- .../cloud/teleport/bigtable/CassandraToBigtable.java | 5 +---- .../google/cloud/teleport/spanner/ImportPipeline.java | 3 +-- .../cloud/teleport/templates/BigQueryToTFRecord.java | 9 +++------ .../teleport/templates/common/DatadogConverters.java | 5 +---- .../teleport/templates/common/DatastoreConverters.java | 6 ++---- .../teleport/templates/common/SplunkConverters.java | 7 ++----- .../v2/options/DataplexBigQueryToGcsOptions.java | 5 ++++- 7 files changed, 14 insertions(+), 26 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java index f80d7116de..e72d47cb34 100644 --- a/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java +++ b/v1/src/main/java/com/google/cloud/teleport/bigtable/CassandraToBigtable.java @@ -78,12 +78,9 @@ public interface Options extends PipelineOptions { @SuppressWarnings("unused") void setCassandraHosts(ValueProvider hosts); - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 2, optional = true, - regexes = { - "^([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$" - }, description = "Cassandra Port", helpText = "The TCP port to use to reach Apache Cassandra on the nodes. The default value is 9042.") diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java index 4376c1d5ea..d640484310 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ImportPipeline.java @@ -173,10 +173,9 @@ public interface Options extends PipelineOptions { void setWaitUntilFinish(boolean value); - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 10, optional = true, - regexes = {"[0-9]+"}, description = "DDL Creation timeout in minutes", helpText = "The timeout in minutes for DDL statements performed by the template. The default value is 30 minutes.") diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java b/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java index e081591200..e2b5dd65b5 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/BigQueryToTFRecord.java @@ -350,10 +350,9 @@ public interface Options extends BigQueryReadOptions { void setOutputSuffix(ValueProvider outputSuffix); - @TemplateParameter.Text( + @TemplateParameter.Float( order = 3, optional = true, - regexes = {"(^\\.[1-9]*$)|(^[01]*)"}, description = "Percentage of data to be in the training set ", helpText = "The percentage of query data allocated to training TFRecord files. The default value is 1, or 100%.") @@ -362,10 +361,9 @@ public interface Options extends BigQueryReadOptions { void setTrainingPercentage(ValueProvider trainingPercentage); - @TemplateParameter.Text( + @TemplateParameter.Float( order = 4, optional = true, - regexes = {"(^\\.[1-9]*$)|(^[01]*)"}, description = "Percentage of data to be in the testing set ", helpText = "The percentage of query data allocated to testing TFRecord files. The default value is 0, or 0%.") @@ -374,10 +372,9 @@ public interface Options extends BigQueryReadOptions { void setTestingPercentage(ValueProvider testingPercentage); - @TemplateParameter.Text( + @TemplateParameter.Float( order = 5, optional = true, - regexes = {"(^\\.[1-9]*$)|(^[01]*)"}, description = "Percentage of data to be in the validation set ", helpText = "The percentage of query data allocated to validation TFRecord files. The default value is 0, or 0%.") diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java index a9ec270208..f0b7e7d92e 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatadogConverters.java @@ -136,12 +136,9 @@ public interface DatadogOptions extends PipelineOptions { void setIncludePubsubMessage(ValueProvider includePubsubMessage); - @TemplateParameter.Text( + @TemplateParameter.KmsEncryptionKey( order = 6, optional = true, - regexes = { - "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$" - }, description = "Google Cloud KMS encryption key for the API key", helpText = "The Cloud KMS key to use to decrypt the API Key. You must provide this parameter if the `apiKeySource` is set to `KMS`. If the Cloud KMS key is provided, you must pass in an encrypted API Key.", diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java index d0c518bcdb..5fa9048c8a 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/DatastoreConverters.java @@ -216,10 +216,9 @@ public interface DatastoreWriteOptions extends PipelineOptions { /** * @deprecated Please use getFirestoreHintNumWorkers() instead. */ - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 4, optional = true, - regexes = {"^[1-9]+[0-9]*$"}, description = "Expected number of workers", helpText = "Hint for the expected number of workers in the Datastore ramp-up throttling step. Default is `500`.") @@ -300,10 +299,9 @@ public interface DatastoreDeleteOptions extends PipelineOptions { /** * @deprecated Please use getFirestoreHintNumWorkers() instead. */ - @TemplateParameter.Text( + @TemplateParameter.Integer( order = 2, optional = true, - regexes = {"^[1-9][0-9]*$"}, description = "Expected number of workers", helpText = "Hint for the expected number of workers in the Datastore ramp-up throttling step.") diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java index d3d0e2e986..46e1a2eb27 100644 --- a/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java +++ b/v1/src/main/java/com/google/cloud/teleport/templates/common/SplunkConverters.java @@ -151,12 +151,9 @@ public interface SplunkOptions extends PipelineOptions { void setIncludePubsubMessage(ValueProvider includePubsubMessage); - @TemplateParameter.Text( + @TemplateParameter.KmsEncryptionKey( order = 7, optional = true, - regexes = { - "^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/keyRings\\/[^\\n\\r\\/]+\\/cryptoKeys\\/[^\\n\\r\\/]+$" - }, description = "Google Cloud KMS encryption key for the token", helpText = "The Cloud KMS key to use to decrypt the HEC token string. This parameter must be provided when tokenSource is set to KMS. If the Cloud KMS key is provided, the HEC token string `must` be passed in encrypted.", @@ -217,7 +214,7 @@ public interface SplunkOptions extends PipelineOptions { void setEnableBatchLogs(ValueProvider enableBatchLogs); - @TemplateParameter.Text( + @TemplateParameter.Boolean( order = 12, optional = true, description = diff --git a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java index bfbde306ba..eb1e7c1165 100644 --- a/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java +++ b/v2/dataplex/src/main/java/com/google/cloud/teleport/v2/options/DataplexBigQueryToGcsOptions.java @@ -75,9 +75,12 @@ public interface DataplexBigQueryToGcsOptions void setDestinationStorageBucketAssetName(String destinationStorageBucketAssetName); - @TemplateParameter.DateTime( + @TemplateParameter.Text( order = 4, optional = true, + regexes = { + "^([0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|[+-][0-9]{2}:[0-9]{2})?)?|-[pP]([0-9]+(\\.[0-9]+)?Y)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?W)?([0-9]+(\\.[0-9]+)?D)?(T([0-9]+(\\.[0-9]+)?H)?([0-9]+(\\.[0-9]+)?M)?([0-9]+(\\.[0-9]+)?S)?)?)$" + }, description = "Move data older than the date.", helpText = "Move data older than this date (and optional time). For partitioned tables, move partitions last modified before this date/time. For non-partitioned tables, move if the table was last modified before this date/time. If not specified, move all tables / partitions. The date/time is parsed in the default time zone by default, but optional suffixes Z and +HH:mm are supported. Format: YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss or YYYY-MM-DDTHH:mm:ss+03:00. Relative date/time (https://en.wikipedia.org/wiki/ISO_8601#Durations) is also supported. Format: -PnDTnHnMn.nS (must start with -P meaning time in the past).") From 89cf17e9c7da6f01f9b573dc4440fe73fbf01773 Mon Sep 17 00:00:00 2001 From: "vitaly.terentyev" Date: Thu, 18 Apr 2024 12:52:13 +0400 Subject: [PATCH 26/70] Change DateTime to Text --- .../com/google/cloud/teleport/v2/templates/GCSToSourceDb.java | 2 +- .../google/cloud/teleport/v2/templates/TextIOToBigQuery.java | 2 +- .../v2/templates/SpannerChangeStreamsToShardedFileSink.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java index d4f4f13a25..5dedd6d505 100644 --- a/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java +++ b/v2/gcs-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/GCSToSourceDb.java @@ -134,7 +134,7 @@ public interface Options extends PipelineOptions, StreamingOptions { void setTimerIntervalInMilliSec(Integer value); - @TemplateParameter.DateTime( + @TemplateParameter.Text( order = 6, optional = true, description = diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java index 74035b8f5b..86f6f13690 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/TextIOToBigQuery.java @@ -242,7 +242,7 @@ public interface Options void setOutputTable(String value); - @TemplateParameter.GcsWriteFile( + @TemplateParameter.GcsReadFile( order = 4, optional = false, description = "GCS path to javascript fn for transforming output", diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java index 63d2fadaf2..084e2f8de5 100644 --- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamsToShardedFileSink.java @@ -154,7 +154,7 @@ public interface Options extends PipelineOptions, StreamingOptions { void setMetadataDatabase(String value); - @TemplateParameter.DateTime( + @TemplateParameter.Text( order = 7, optional = true, description = "Changes are read from the given timestamp", @@ -164,7 +164,7 @@ public interface Options extends PipelineOptions, StreamingOptions { void setStartTimestamp(String value); - @TemplateParameter.DateTime( + @TemplateParameter.Text( order = 8, optional = true, description = "Changes are read until the given timestamp", From b01840746b0e7ecb65f7ad37198786a3553ca205 Mon Sep 17 00:00:00 2001 From: "vitaly.terentyev" Date: Mon, 13 May 2024 17:55:37 +0400 Subject: [PATCH 27/70] Fix PubsubTopic parameters --- .../v2/options/BigtableChangeStreamsToPubSubOptions.java | 2 +- .../v2/options/SpannerChangeStreamsToPubSubOptions.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java index fbf4d5daba..cdf6e09e30 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/BigtableChangeStreamsToPubSubOptions.java @@ -31,7 +31,7 @@ public interface BigtableChangeStreamsToPubSubOptions extends DataflowPipelineOptions, BigtableCommonOptions.ReadChangeStreamOptions { - @TemplateParameter.PubsubTopic( + @TemplateParameter.Text( order = 1, description = "The output Pub/Sub topic name", helpText = "The name of the destination Pub/Sub topic.") diff --git a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java index 634d3e59ab..2bc7b84b38 100644 --- a/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java +++ b/v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/options/SpannerChangeStreamsToPubSubOptions.java @@ -188,7 +188,7 @@ public interface SpannerChangeStreamsToPubSubOptions extends DataflowPipelineOpt void setPubsubProjectId(String pubsubProjectId); - @TemplateParameter.PubsubTopic( + @TemplateParameter.Text( order = 15, description = "The output Pub/Sub topic", helpText = "The Pub/Sub topic to publish PubsubMessage.") From 83992c61485361b7761100c143cc34c8807a4563 Mon Sep 17 00:00:00 2001 From: "vitaly.terentyev" Date: Mon, 13 May 2024 18:10:33 +0400 Subject: [PATCH 28/70] Resolve old comments --- .../java/com/google/cloud/teleport/spanner/ExportPipeline.java | 2 -- .../com/google/cloud/teleport/v2/templates/PubSubToRedis.java | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java index 867ed2e999..949201edac 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ExportPipeline.java @@ -176,8 +176,6 @@ public interface ExportPipelineOptions extends PipelineOptions { @TemplateParameter.Text( order = 10, optional = true, - parentName = "shouldExportRelatedTables", - parentTriggerValues = {"true"}, regexes = {"^[a-zA-Z0-9_]+(,[a-zA-Z0-9_]+)*$"}, description = "Cloud Spanner table name(s).", helpText = diff --git a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java index d5d76168ba..79f56fc4b4 100644 --- a/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java +++ b/v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/templates/PubSubToRedis.java @@ -221,7 +221,8 @@ public interface PubSubToRedisOptions optional = true, parentName = "redisSinkType", parentTriggerValues = {"HASH_SINK", "LOGGING_SINK"}, - description = "Hash key expiration time in sec (ttl)", + description = + "Hash key expiration time in sec (ttl), supported only for HASH_SINK and LOGGING_SINK", helpText = "Key expiration time in sec (ttl, default for HASH_SINK is -1 i.e. no expiration)") @Default.Long(-1L) From 0bea310adcc6e9a843a9de0081a53c8c7544e852 Mon Sep 17 00:00:00 2001 From: Dippatel98 Date: Mon, 13 May 2024 17:22:00 +0000 Subject: [PATCH 29/70] Add integration test for KafkaToBigQuery Template --- .../BigQueryDynamicDestination.java | 2 +- .../templates/KafkaToBigQueryFlexAvroIT.java | 412 ++++++++++++++++++ .../avro_schema.avsc | 15 + .../other_avro_schema.avsc | 19 + 4 files changed, 447 insertions(+), 1 deletion(-) create mode 100644 v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java create mode 100644 v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc create mode 100644 v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc diff --git a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java index a33fb1d7f1..8f43357d00 100644 --- a/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java +++ b/v2/kafka-to-bigquery/src/main/java/com/google/cloud/teleport/v2/transforms/BigQueryDynamicDestination.java @@ -58,7 +58,7 @@ public TableDestination getTable(GenericRecord element) { // tablename + record name (same across schemas) + schema id? String bqQualifiedFullName = element.getSchema().getFullName().replace(".", "-"); String tableName = - this.tableNamePrefix + (this.tableNamePrefix == "" ? "" : "-") + bqQualifiedFullName; + this.tableNamePrefix + (this.tableNamePrefix.isBlank() ? "" : "-") + bqQualifiedFullName; String tableSpec = this.projectName + ":" + this.datasetName + "." + tableName; return new TableDestination(tableSpec, null); } diff --git a/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java b/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java new file mode 100644 index 0000000000..faf72a3885 --- /dev/null +++ b/v2/kafka-to-bigquery/src/test/java/com/google/cloud/teleport/v2/templates/KafkaToBigQueryFlexAvroIT.java @@ -0,0 +1,412 @@ +/* + * Copyright (C) 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.templates; + +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.Field.Mode; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.TableId; +import com.google.cloud.teleport.metadata.TemplateIntegrationTest; +import com.google.cloud.teleport.v2.kafka.transforms.BinaryAvroSerializer; +import com.google.cloud.teleport.v2.utils.SecretManagerUtils; +import com.google.common.io.Resources; +import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; +import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException; +import io.confluent.kafka.serializers.KafkaAvroSerializer; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import net.jcip.annotations.NotThreadSafe; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; +import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; +import org.apache.beam.it.common.PipelineOperator.Result; +import org.apache.beam.it.common.TestProperties; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.conditions.ConditionCheck; +import org.apache.beam.it.gcp.TemplateTestBase; +import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; +import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; +import org.apache.beam.it.kafka.KafkaResourceManager; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.serialization.StringSerializer; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Integration test for {@link KafkaToBigQuery} (Kafka_To_BigQuery). */ +@Category(TemplateIntegrationTest.class) +@TemplateIntegrationTest(KafkaToBigQueryFlex.class) +@RunWith(JUnit4.class) +@NotThreadSafe +public final class KafkaToBigQueryFlexAvroIT extends TemplateTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(KafkaToBigQueryFlexAvroIT.class); + + private KafkaResourceManager kafkaResourceManager; + private BigQueryResourceManager bigQueryClient; + private String bqDatasetId; + private TableId deadletterTableId; + private TableId tableId; + private Schema bqSchema; + private org.apache.avro.Schema avroSchema; + private org.apache.avro.Schema otherAvroSchema; + + @Before + public void setup() throws IOException { + bigQueryClient = BigQueryResourceManager.builder(testName, PROJECT, credentials).build(); + bqDatasetId = bigQueryClient.createDataset(REGION); + bqSchema = + Schema.of( + Field.of("productId", StandardSQLTypeName.INT64), + Field.newBuilder("productName", StandardSQLTypeName.STRING).setMaxLength(10L).build()); + + tableId = bigQueryClient.createTable(testName, bqSchema); + kafkaResourceManager = + KafkaResourceManager.builder(testName).setHost(TestProperties.hostIp()).build(); + + URL avroSchemaResource = Resources.getResource("KafkaToBigQueryFlexAvroIT/avro_schema.avsc"); + gcsClient.uploadArtifact("avro_schema.avsc", avroSchemaResource.getPath()); + avroSchema = new org.apache.avro.Schema.Parser().parse(avroSchemaResource.openStream()); + + URL otherAvroSchemaResource = + Resources.getResource("KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc"); + gcsClient.uploadArtifact("other_avro_schema.avsc", otherAvroSchemaResource.getPath()); + otherAvroSchema = + new org.apache.avro.Schema.Parser().parse(otherAvroSchemaResource.openStream()); + } + + @After + public void tearDown() { + ResourceManagerUtils.cleanResources(kafkaResourceManager, bigQueryClient); + } + + @Test + public void testKafkaToBigQueryAvroInConfluentFormat() throws IOException, RestClientException { + baseKafkaToBigQueryAvro( + b -> + b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT") + .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc")) + .addParameter("outputTableSpec", toTableSpecLegacy(tableId))); + } + + @Test + public void testKafkaToBigQueryAvroWithSchemaRegistry() throws IOException, RestClientException { + baseKafkaToBigQueryAvro( + b -> + b.addParameter("avroFormat", "CONFLUENT_WIRE_FORMAT") + .addParameter( + "schemaRegistryConnectionUrl", + SecretManagerUtils.getSecret( + "projects/269744978479/secrets/kafka-schema-registry-connection-url/versions/1")) + .addParameter("outputDataset", bqDatasetId)); + } + + @Test + public void testKafkaToBigQueryAvroInNonConfluentFormat() + throws IOException, RestClientException { + baseKafkaToBigQueryAvro( + b -> + b.addParameter("avroFormat", "NON_WIRE_FORMAT") + .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc")) + .addParameter("outputTableSpec", toTableSpecLegacy(tableId))); + } + + @Test + public void testKafkaToBigQueryAvroWithExistingDLQ() throws IOException, RestClientException { + deadletterTableId = bigQueryClient.createTable(testName + "_dlq", getDeadletterSchema()); + + baseKafkaToBigQueryAvro( + b -> + b.addParameter("outputDeadletterTable", toTableSpecLegacy(deadletterTableId)) + .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc")) + .addParameter("outputTableSpec", toTableSpecLegacy(tableId))); + } + + @Test + public void testKafkaToBigQueryAvroWithStorageApi() throws IOException, RestClientException { + baseKafkaToBigQueryAvro( + b -> + b.addParameter("useStorageWriteApi", "true") + .addParameter("numStorageWriteApiStreams", "3") + .addParameter("storageWriteApiTriggeringFrequencySec", "3") + .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc")) + .addParameter("outputTableSpec", toTableSpecLegacy(tableId))); + } + + @Test + public void testKafkaToBigQueryAvroWithStorageApiExistingDLQ() + throws IOException, RestClientException { + deadletterTableId = bigQueryClient.createTable(testName + "_dlq", getDeadletterSchema()); + + baseKafkaToBigQueryAvro( + b -> + b.addParameter("useStorageWriteApi", "true") + .addParameter("numStorageWriteApiStreams", "3") + .addParameter("storageWriteApiTriggeringFrequencySec", "3") + .addParameter("outputDeadletterTable", toTableSpecLegacy(deadletterTableId)) + .addParameter("avroSchemaPath", getGcsPath("avro_schema.avsc")) + .addParameter("outputTableSpec", toTableSpecLegacy(tableId))); + } + + private Schema getDeadletterSchema() { + Schema dlqSchema = + Schema.of( + Field.newBuilder("timestamp", StandardSQLTypeName.TIMESTAMP) + .setMode(Mode.REQUIRED) + .build(), + Field.newBuilder("payloadString", StandardSQLTypeName.STRING) + .setMode(Mode.REQUIRED) + .build(), + Field.newBuilder("payloadBytes", StandardSQLTypeName.BYTES) + .setMode(Mode.REQUIRED) + .build(), + Field.newBuilder( + "attributes", + StandardSQLTypeName.STRUCT, + Field.newBuilder("key", StandardSQLTypeName.STRING) + .setMode(Mode.NULLABLE) + .build(), + Field.newBuilder("value", StandardSQLTypeName.STRING) + .setMode(Mode.NULLABLE) + .build()) + .setMode(Mode.REPEATED) + .build(), + Field.newBuilder("errorMessage", StandardSQLTypeName.STRING) + .setMode(Mode.NULLABLE) + .build(), + Field.newBuilder("stacktrace", StandardSQLTypeName.STRING) + .setMode(Mode.NULLABLE) + .build()); + return dlqSchema; + } + + private void baseKafkaToBigQueryAvro( + Function paramsAdder) + throws IOException, RestClientException { + // Arrange + String topicName = kafkaResourceManager.createTopic(testName, 5); + + LaunchConfig.Builder options = + paramsAdder.apply( + LaunchConfig.builder(testName, specPath) + .addParameter( + "readBootstrapServers", + kafkaResourceManager.getBootstrapServers().replace("PLAINTEXT://", "")) + .addParameter("kafkaReadTopics", topicName) + .addParameter("kafkaReadOffset", "earliest") + .addParameter("messageFormat", "AVRO")); + + // Act + LaunchInfo info = launchTemplate(options); + assertThatPipeline(info).isRunning(); + + List conditions = new ArrayList(); + + if (options.getParameter("avroFormat") != null + && options.getParameter("avroFormat").equals("CONFLUENT_WIRE_FORMAT") + && options.getParameter("schemaRegistryConnectionUrl") != null) { + + publishDoubleSchemaMessages(topicName); + TableId avroTable = TableId.of(bqDatasetId, avroSchema.getFullName().replace(".", "-")); + TableId otherAvroTable = + TableId.of(bqDatasetId, otherAvroSchema.getFullName().replace(".", "-")); + + conditions.add(BigQueryRowsCheck.builder(bigQueryClient, avroTable).setMinRows(20).build()); + conditions.add( + BigQueryRowsCheck.builder(bigQueryClient, otherAvroTable).setMinRows(20).build()); + + } else if (options.getParameter("avroFormat") != null + && options.getParameter("avroFormat").equals("NON_WIRE_FORMAT") + && options.getParameter("avroSchemaPath") != null) { + + publishBinaryMessages(topicName); + conditions.add(BigQueryRowsCheck.builder(bigQueryClient, tableId).setMinRows(20).build()); + + } else { + + publishSingleSchemaMessages(topicName); + conditions.add(BigQueryRowsCheck.builder(bigQueryClient, tableId).setMinRows(20).build()); + } + + if (options.getParameter("outputDeadletterTable") != null) { + conditions.add( + BigQueryRowsCheck.builder(bigQueryClient, deadletterTableId).setMinRows(10).build()); + } + + Result result = + pipelineOperator() + .waitForConditionsAndFinish( + createConfig(info), conditions.toArray(new ConditionCheck[0])); + + // Assert + assertThatResult(result).meetsConditions(); + } + + private void publishSingleSchemaMessages(String topicName) + throws IOException, RestClientException { + MockSchemaRegistryClient registryClient = new MockSchemaRegistryClient(); + registryClient.register(topicName + "-value", avroSchema, 1, 1); + + KafkaProducer kafkaProducer = + kafkaResourceManager.buildProducer( + new StringSerializer(), new KafkaAvroSerializer(registryClient)); + + for (int i = 1; i <= 10; i++) { + GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0); + publish(kafkaProducer, topicName, i + "1", dataflow); + + GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0); + publish(kafkaProducer, topicName, i + "2", pubsub); + + GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0); + publish(kafkaProducer, topicName, i + "3", invalid); + + try { + TimeUnit.SECONDS.sleep(3); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + private void publishDoubleSchemaMessages(String topicName) + throws IOException, RestClientException { + MockSchemaRegistryClient registryClient = new MockSchemaRegistryClient(); + registryClient.register(topicName + "-value", avroSchema, 1, 3); + registryClient.register(topicName + "-value", otherAvroSchema, 1, 4); + + KafkaProducer kafkaProducer = + kafkaResourceManager.buildProducer( + new StringSerializer(), new KafkaAvroSerializer(registryClient)); + + for (int i = 1; i <= 10; i++) { + GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0); + publish(kafkaProducer, topicName, i + "1", dataflow); + + GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0); + publish(kafkaProducer, topicName, i + "2", pubsub); + + GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0); + publish(kafkaProducer, topicName, i + "3", invalid); + + GenericRecord otherDataflow = + createOtherRecord(Integer.valueOf(i + "4"), "Dataflow", "dataflow", 0); + publish(kafkaProducer, topicName, i + "4", otherDataflow); + + GenericRecord otherPubsub = + createOtherRecord(Integer.valueOf(i + "5"), "Pub/Sub", "pubsub", 0); + publish(kafkaProducer, topicName, i + "5", otherPubsub); + + GenericRecord otherInvalid = + createOtherRecord( + Integer.valueOf(i + "6"), "InvalidNameTooLong", "InvalidNameTooLong", 0); + publish(kafkaProducer, topicName, i + "6", otherInvalid); + + try { + TimeUnit.SECONDS.sleep(3); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + public void publishBinaryMessages(String topicName) throws IOException { + KafkaProducer kafkaProducer = + kafkaResourceManager.buildProducer( + new StringSerializer(), new BinaryAvroSerializer(avroSchema)); + + for (int i = 1; i <= 10; i++) { + GenericRecord dataflow = createRecord(Integer.valueOf(i + "1"), "Dataflow", 0); + publishBinary(kafkaProducer, topicName, i + "1", dataflow); + + GenericRecord pubsub = createRecord(Integer.valueOf(i + "2"), "Pub/Sub", 0); + publishBinary(kafkaProducer, topicName, i + "2", pubsub); + + GenericRecord invalid = createRecord(Integer.valueOf(i + "3"), "InvalidNameTooLong", 0); + publishBinary(kafkaProducer, topicName, i + "3", invalid); + + try { + TimeUnit.SECONDS.sleep(3); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + private void publish( + KafkaProducer producer, String topicName, String key, GenericRecord value) { + try { + RecordMetadata recordMetadata = + producer.send(new ProducerRecord<>(topicName, key, value)).get(); + LOG.info( + "Published record {}, partition {} - offset: {}", + recordMetadata.topic(), + recordMetadata.partition(), + recordMetadata.offset()); + } catch (Exception e) { + throw new RuntimeException("Error publishing record to Kafka", e); + } + } + + private void publishBinary( + KafkaProducer producer, + String topicName, + String key, + GenericRecord value) { + try { + RecordMetadata recordMetadata = + producer.send(new ProducerRecord<>(topicName, key, value)).get(); + LOG.info( + "Published record {}, partition {} - offset: {}", + recordMetadata.topic(), + recordMetadata.partition(), + recordMetadata.offset()); + } catch (Exception e) { + throw new RuntimeException("Error publishing record to Kafka", e); + } + } + + private GenericRecord createRecord(int id, String productName, double value) { + return new GenericRecordBuilder(avroSchema) + .set("productId", id) + .set("productName", productName) + .build(); + } + + private GenericRecord createOtherRecord(int id, String productName, String name, double value) { + return new GenericRecordBuilder(otherAvroSchema) + .set("productId", id) + .set("productName", productName) + .set("name", name) + .build(); + } +} diff --git a/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc new file mode 100644 index 0000000000..d907a7f17a --- /dev/null +++ b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/avro_schema.avsc @@ -0,0 +1,15 @@ +{ + "type": "record", + "namespace": "org.example.avro", + "name": "AvroProductKafkaRecord", + "fields": [ + { + "name": "productId", + "type": "int" + }, + { + "name": "productName", + "type": "string" + } + ] +} \ No newline at end of file diff --git a/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc new file mode 100644 index 0000000000..13f32d803d --- /dev/null +++ b/v2/kafka-to-bigquery/src/test/resources/KafkaToBigQueryFlexAvroIT/other_avro_schema.avsc @@ -0,0 +1,19 @@ +{ + "type": "record", + "namespace": "org.example.other.avro", + "name": "OtherAvroProductKafkaRecord", + "fields": [ + { + "name": "productId", + "type": "int" + }, + { + "name": "productName", + "type": "string" + }, + { + "name": "name", + "type": "string" + } + ] +} \ No newline at end of file From b961be547dbe786b4d0f9d934115504bb8cb5c51 Mon Sep 17 00:00:00 2001 From: Jeffrey Kinard Date: Wed, 17 Apr 2024 14:23:45 -0400 Subject: [PATCH 30/70] Add jinja preprocessing to YamlTemplate Signed-off-by: Jeffrey Kinard --- .../main/resources/Dockerfile-template-yaml | 2 + .../templates/python/YAMLTemplate.java | 9 ++ python/src/main/python/yaml-template/main.py | 91 +++++++++++++++++-- .../templates/python/YAMLTemplateIT.java | 15 ++- python/src/test/resources/YamlTemplateIT.yaml | 18 +--- 5 files changed, 108 insertions(+), 27 deletions(-) diff --git a/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml b/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml index 9af50e750b..f24de42e3b 100644 --- a/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml +++ b/plugins/core-plugin/src/main/resources/Dockerfile-template-yaml @@ -20,9 +20,11 @@ WORKDIR $WORKDIR RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi # Install dependencies to launch the pipeline and download to reduce startup time +# Remove Jinja2 dependency once YAML templatization support is added to Beam RUN python -m venv /venv \ && /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \ && /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \ + && /venv/bin/pip install --no-cache-dir -U Jinja2 \ && /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \ && rm -rf /usr/local/lib/python$PY_VERSION/site-packages \ && mv /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/ diff --git a/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java b/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java index 739c866835..8b8cb989b0 100644 --- a/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java +++ b/python/src/main/java/com/google/cloud/teleport/templates/python/YAMLTemplate.java @@ -46,4 +46,13 @@ public interface YAMLTemplate { description = "Input YAML pipeline spec file in Cloud Storage.", helpText = "A file in Cloud Storage containing a yaml description of the pipeline to run.") String getYamlPipelineFile(); + + @TemplateParameter.Text( + order = 3, + name = "jinja_variables", + optional = true, + description = "Input jinja preprocessing variables.", + helpText = + "A json dict of variables used when invoking the jinja preprocessor on the provided yaml pipeline.") + String getJinjaVariables(); } diff --git a/python/src/main/python/yaml-template/main.py b/python/src/main/python/yaml-template/main.py index 5fad83470c..4898b67ff6 100644 --- a/python/src/main/python/yaml-template/main.py +++ b/python/src/main/python/yaml-template/main.py @@ -12,23 +12,96 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. -# + import argparse -import logging +import json +import jinja2 +import yaml + +import apache_beam as beam +from apache_beam.io.filesystems import FileSystems +from apache_beam.typehints.schemas import LogicalType +from apache_beam.typehints.schemas import MillisInstant from apache_beam.yaml import cache_provider_artifacts -from apache_beam.yaml import main +from apache_beam.yaml import yaml_transform +# Workaround for https://github.com/apache/beam/issues/28151. +LogicalType.register_logical_type(MillisInstant) -def run(argv=None): + +def _configure_parser(argv): parser = argparse.ArgumentParser() - _, pipeline_args = parser.parse_known_args(argv) - pipeline_args += ['--sdk_location=container'] - cache_provider_artifacts.cache_provider_artifacts() - main.run(argv=pipeline_args) + parser.add_argument( + '--yaml_pipeline', + '--pipeline_spec', + help='A yaml description of the pipeline to run.') + parser.add_argument( + '--yaml_pipeline_file', + '--pipeline_spec_file', + help='A file containing a yaml description of the pipeline to run.') + parser.add_argument( + '--json_schema_validation', + default='generic', + help='none: do no pipeline validation against the schema; ' + 'generic: validate the pipeline shape, but not individual transforms; ' + 'per_transform: also validate the config of known transforms') + parser.add_argument( + '--jinja_variables', + default=None, + type=json.loads, + help='A json dict of variables used when invoking the jinja preprocessor ' + 'on the provided yaml pipeline.') + return parser.parse_known_args(argv) + + +def _pipeline_spec_from_args(known_args): + if known_args.yaml_pipeline_file and known_args.yaml_pipeline: + raise ValueError( + "Exactly one of yaml_pipeline or yaml_pipeline_file must be set.") + elif known_args.yaml_pipeline_file: + with FileSystems.open(known_args.yaml_pipeline_file) as fin: + pipeline_yaml = fin.read().decode() + elif known_args.yaml_pipeline: + pipeline_yaml = known_args.yaml_pipeline + else: + raise ValueError( + "Exactly one of yaml_pipeline or yaml_pipeline_file must be set.") + + return pipeline_yaml + + +class _BeamFileIOLoader(jinja2.BaseLoader): + def get_source(self, environment, path): + source = FileSystems.open(path).read().decode() + return source, path, lambda: True + + +def run(argv=None): + known_args, pipeline_args = _configure_parser(argv) + pipeline_yaml = ( # keep formatting + jinja2.Environment( + undefined=jinja2.StrictUndefined, loader=_BeamFileIOLoader()) + .from_string(_pipeline_spec_from_args(known_args)) + .render(**known_args.jinja_variables or {})) + pipeline_spec = yaml.load(pipeline_yaml, Loader=yaml_transform.SafeLineLoader) + + with beam.Pipeline( # linebreak for better yapf formatting + options=beam.options.pipeline_options.PipelineOptions( + pipeline_args, + pickle_library='cloudpickle', + **yaml_transform.SafeLineLoader.strip_metadata(pipeline_spec.get( + 'options', {}))), + display_data={'yaml': pipeline_yaml}) as p: + print("Building pipeline...") + yaml_transform.expand_pipeline( + p, pipeline_spec, validate_schema=known_args.json_schema_validation) + print("Running pipeline...") if __name__ == '__main__': + import logging logging.getLogger().setLevel(logging.INFO) - run() + cache_provider_artifacts.cache_provider_artifacts() + run() \ No newline at end of file diff --git a/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java b/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java index 40b77e0d30..f96700e158 100644 --- a/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java +++ b/python/src/test/java/com/google/cloud/teleport/templates/python/YAMLTemplateIT.java @@ -98,10 +98,7 @@ private void testSimpleComposite( } private String createSimpleYamlMessage() throws IOException { - String yamlMessage = - Files.readString(Paths.get(Resources.getResource("YamlTemplateIT.yaml").getPath())); - yamlMessage = yamlMessage.replaceAll("INPUT_PATH", getGcsBasePath() + "/input/test.csv"); - return yamlMessage.replaceAll("OUTPUT_PATH", getGcsBasePath() + "/output"); + return Files.readString(Paths.get(Resources.getResource("YamlTemplateIT.yaml").getPath())); } private void runYamlTemplateTest( @@ -109,8 +106,16 @@ private void runYamlTemplateTest( paramsAdder) throws IOException { // Arrange + String inputPath = getGcsBasePath() + "/input/test.csv"; + String outputPath = getGcsBasePath() + "/output"; PipelineLauncher.LaunchConfig.Builder options = - paramsAdder.apply(PipelineLauncher.LaunchConfig.builder(testName, specPath)); + paramsAdder.apply( + PipelineLauncher.LaunchConfig.builder(testName, specPath) + .addParameter( + "jinja_variables", + String.format( + "{\"INPUT_PATH_PARAM\": \"%s\", \"OUTPUT_PATH_PARAM\": \"%s\"}", + inputPath, outputPath))); // Act PipelineLauncher.LaunchInfo info = launchTemplate(options); diff --git a/python/src/test/resources/YamlTemplateIT.yaml b/python/src/test/resources/YamlTemplateIT.yaml index 2646e5de75..1e26ee90a4 100644 --- a/python/src/test/resources/YamlTemplateIT.yaml +++ b/python/src/test/resources/YamlTemplateIT.yaml @@ -3,7 +3,7 @@ pipeline: transforms: - type: ReadFromCsv config: - path: "INPUT_PATH" + path: {{ INPUT_PATH_PARAM }} - type: MapToFields name: MapWithErrorHandling input: ReadFromCsv @@ -42,21 +42,13 @@ pipeline: fields: sum: expression: num + inverse - - type: WriteToJsonPython + - type: WriteToJson name: WriteGoodFiles input: Sum config: - path: "OUTPUT_PATH/good" - - type: WriteToJsonPython + path: {{ OUTPUT_PATH_PARAM }}/good + - type: WriteToJson name: WriteBadFiles input: TrimErrors config: - path: "OUTPUT_PATH/bad" - -# TODO(polber) - remove with https://github.com/apache/beam/pull/30777 -providers: - - type: python - config: - packages: [] - transforms: - 'WriteToJsonPython': 'apache_beam.io.WriteToJson' \ No newline at end of file + path: {{ OUTPUT_PATH_PARAM }}/bad From 7a3e55d8799ebf4055df954d7cb151eddb64d3ab Mon Sep 17 00:00:00 2001 From: Manit Gupta Date: Thu, 9 May 2024 12:52:22 +0530 Subject: [PATCH 31/70] Switch EventsIT and SessionIT from JSON to Avro --- .../DataStreamToSpannerEventsIT.java | 77 +++++++++++------- .../DataStreamToSpannerSessionIT.java | 10 +-- .../mysql-Articles.avro | Bin 0 -> 2484 bytes .../mysql-Articles.jsonl | 6 -- .../mysql-Authors.avro | Bin 0 -> 2154 bytes .../mysql-Authors.jsonl | 6 -- .../mysql-Books.avro | Bin 0 -> 2304 bytes .../mysql-Books.jsonl | 6 -- .../mysql-backfill-Movie.avro | Bin 0 -> 1938 bytes .../mysql-backfill-Movie.jsonl | 2 - .../mysql-backfill-Users.avro | Bin 0 -> 1927 bytes .../mysql-backfill-Users.jsonl | 2 - .../mysql-cdc-Users.avro | Bin 0 -> 2592 bytes .../mysql-cdc-Users.jsonl | 4 - .../mysql-statements.sql | 15 ++++ .../mysql-backfill-Category.avro | Bin 0 -> 1770 bytes .../mysql-backfill-Category.jsonl | 2 - .../mysql-cdc-Category.avro | Bin 0 -> 2440 bytes .../mysql-cdc-Category.jsonl | 4 - .../mysql-statements.sql | 6 ++ 20 files changed, 74 insertions(+), 66 deletions(-) create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Authors.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Books.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Movie.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-backfill-Users.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.avro delete mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl create mode 100644 v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java index 31c766da2a..0d07ec4b38 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java @@ -96,7 +96,7 @@ public void setUp() throws IOException { pubsubResourceManager, new HashMap<>() { { - put("inputFileFormat", "json"); + put("inputFileFormat", "avro"); } }); } @@ -129,8 +129,8 @@ public void migrationTestWithUpdatesAndDeletes() { uploadDataStreamFile( jobInfo, TABLE1, - "backfill.jsonl", - "DataStreamToSpannerEventsIT/mysql-backfill-Users.jsonl"), + "backfill_users.avro", + "DataStreamToSpannerEventsIT/mysql-backfill-Users.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE1) .setMinRows(2) .setMaxRows(2) @@ -138,8 +138,8 @@ public void migrationTestWithUpdatesAndDeletes() { uploadDataStreamFile( jobInfo, TABLE1, - "cdc1.jsonl", - "DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl"), + "cdc_users.avro", + "DataStreamToSpannerEventsIT/mysql-cdc-Users.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE1) .setMinRows(3) .setMaxRows(3) @@ -169,8 +169,8 @@ public void migrationTestWithInsertsOnly() { uploadDataStreamFile( jobInfo, TABLE2, - "backfill.jsonl", - "DataStreamToSpannerEventsIT/mysql-backfill-Movie.jsonl"), + "backfill_movie.avro", + "DataStreamToSpannerEventsIT/mysql-backfill-Movie.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE2) .setMinRows(2) .setMaxRows(2) @@ -197,29 +197,29 @@ public void interleavedAndFKAndIndexTest() { uploadDataStreamFile( jobInfo, "Articles", - "mysql-Articles.jsonl", - "DataStreamToSpannerEventsIT/mysql-Articles.jsonl"), + "mysql_articles.avro", + "DataStreamToSpannerEventsIT/mysql-Articles.avro"), uploadDataStreamFile( jobInfo, "Authors", - "mysql-Authors.jsonl", - "DataStreamToSpannerEventsIT/mysql-Authors.jsonl"), + "mysql_authors.avro", + "DataStreamToSpannerEventsIT/mysql-Authors.avro"), uploadDataStreamFile( jobInfo, "Books", - "mysql-Books.jsonl", - "DataStreamToSpannerEventsIT/mysql-Books.jsonl"), + "mysql_books.avro", + "DataStreamToSpannerEventsIT/mysql-Books.avro"), SpannerRowsCheck.builder(spannerResourceManager, "Articles") - .setMinRows(3) - .setMaxRows(3) + .setMinRows(4) + .setMaxRows(4) .build(), SpannerRowsCheck.builder(spannerResourceManager, "Books") - .setMinRows(3) - .setMaxRows(3) + .setMinRows(4) + .setMaxRows(4) .build(), SpannerRowsCheck.builder(spannerResourceManager, "Authors") - .setMinRows(3) - .setMaxRows(3) + .setMinRows(4) + .setMaxRows(4) .build())) .build(); @@ -295,8 +295,9 @@ private void assertMovieTableContents() { ImmutableList numericVals = spannerResourceManager.runQuery("select actor from Movie order by id"); - Assert.assertEquals(123.098, numericVals.get(0).getBigDecimal(0).doubleValue(), 0.001); - Assert.assertEquals(931.512, numericVals.get(1).getBigDecimal(0).doubleValue(), 0.001); + // delta value is required to compare floating point numbers + Assert.assertEquals(12345.09876, numericVals.get(0).getBigDecimal(0).doubleValue(), 0.00000001); + Assert.assertEquals(931.5123, numericVals.get(1).getBigDecimal(0).doubleValue(), 0.00000001); } private void assertAuthorsTable() { @@ -307,12 +308,17 @@ private void assertAuthorsTable() { row.put("name", "a1"); events.add(row); - row = new HashMap<>(); + row.clear(); + row.put("author_id", 2); + row.put("name", "a2"); + events.add(row); + + row.clear(); row.put("author_id", 3); - row.put("name", "a003"); + row.put("name", "a3"); events.add(row); - row = new HashMap<>(); + row.clear(); row.put("author_id", 4); row.put("name", "a4"); events.add(row); @@ -330,18 +336,24 @@ private void assertBooksTable() { row.put("author_id", 3); events.add(row); - row = new HashMap<>(); + row.clear(); row.put("id", 2); row.put("title", "Book002"); row.put("author_id", 3); events.add(row); - row = new HashMap<>(); + row.clear(); row.put("id", 3); row.put("title", "Book004"); row.put("author_id", 4); events.add(row); + row.clear(); + row.put("id", 4); + row.put("title", "Book005"); + row.put("author_id", 2); + events.add(row); + SpannerAsserts.assertThatStructs( spannerResourceManager.runQuery("select * from Books@{FORCE_INDEX=author_id_6}")) .hasRecordsUnorderedCaseInsensitiveColumns(events); @@ -357,20 +369,27 @@ private void assertArticlesTable() { row.put("author_id", 1); events.add(row); - row = new HashMap<>(); + row.clear(); row.put("id", 2); row.put("name", "Article002"); row.put("published_date", Date.parseDate("2024-01-01")); row.put("author_id", 1); events.add(row); - row = new HashMap<>(); + row.clear(); row.put("id", 3); - row.put("name", "Article003"); + row.put("name", "Article004"); row.put("published_date", Date.parseDate("2024-01-01")); row.put("author_id", 4); events.add(row); + row.clear(); + row.put("id", 4); + row.put("name", "Article005"); + row.put("published_date", Date.parseDate("2024-01-01")); + row.put("author_id", 3); + events.add(row); + SpannerAsserts.assertThatStructs( spannerResourceManager.runQuery("select * from Articles@{FORCE_INDEX=author_id}")) .hasRecordsUnorderedCaseInsensitiveColumns(events); diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java index 2809f2b992..2ec54f2dc6 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerSessionIT.java @@ -85,7 +85,7 @@ public void setUp() throws IOException { pubsubResourceManager, new HashMap<>() { { - put("inputFileFormat", "json"); + put("inputFileFormat", "avro"); } }); } @@ -116,8 +116,8 @@ public void migrationTestWithRenameAndDropColumn() { uploadDataStreamFile( jobInfo, TABLE, - "backfill.jsonl", - "DataStreamToSpannerSessionIT/mysql-backfill-Category.jsonl"), + "backfill_category.avro", + "DataStreamToSpannerSessionIT/mysql-backfill-Category.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE) .setMinRows(2) .setMaxRows(2) @@ -140,8 +140,8 @@ public void migrationTestWithRenameAndDropColumn() { uploadDataStreamFile( jobInfo, TABLE, - "cdc1.jsonl", - "DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl"), + "cdc_category.avro", + "DataStreamToSpannerSessionIT/mysql-cdc-Category.avro"), SpannerRowsCheck.builder(spannerResourceManager, TABLE) .setMinRows(3) .setMaxRows(3) diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-Articles.avro new file mode 100644 index 0000000000000000000000000000000000000000..01421913ae369ac9a7a5b0b9f603a78d42f76ae5 GIT binary patch literal 2484 zcmdUvO^ee&7{|L|sVjI8l?88-bF)qQw#nW+2zu}Z-P=;aWM;CRb&`oQ6QL}HqA0j$ z4}Jr|gXm}Q=EvBR_yQ{IMLamOO{dwmTPt|b913av&piM6J^z{8(cX%+4jyv0Az%+D z;NarEE&Ca^w``6fz?wTyx!%!M`_9^>F zuI7tBSFSke2qtryjCX7kR$c~-nlqz4v z1iMq>j0qwMOO10=P;i?h7xqB9t8%6zIp%NFbCxF}M3`cUCjl|?h%t&ms^)THG8yPo z1}1tl-sy=LBhL~*Ct3LF$; z-)UjwI{}O%r;VHL@#DGfhu2TueS&Wehw68|bmLt~`r-j~Ac9UrQWdrrD;a0{su1;d ztKIVapzQ~4yG|JdI#!~d3kTvVGfDKi_|yVtT_eLhn;Kfa+?sb{ScRI%pj ztaCSR-Me;o+p1WWb*ZRPi&(?qvqy(VPpg)-3@z_JrgiXbIa*3rqP1G0)%Y`7UXiWg z_kTBABX4uE_3>yqT1r=ehEYY#&lop8f%tQ zr6Dh!(v{=F=v#=35hK74rad^lpEJ&xLX%oXB`Qhi|II0qV@kB^4*g>vIW-{GZr21- z$m2_PYp6`A$0u~WuBH-FTa6_@1?;ealwf0-rn&WF-mxWVfDW$#W}1f-G^{3DwtPmo z7O?8OhJ;g>YF@r(lP#5iTB|7~%R!De+oP*dD`Lhb&RDIHLsV3po4#fa4wuBGP)svX zxXxX`jN4*z$q^JIGiP=r=lq@en)FP?Ddkkt#eg_@A_S*U7+R-%Z@CB zt4_e>>5OZ~(3NF{M#C0LX$BS7Xy3A_0l6}zE@-~BN{ve3jToADSsdlA&LJKW{{;KKu1fPvUnU7AWDrFZIzgCV(_Al$x>?WiWwTljA7=}|TDiK16E{GA`#1J=eY`0E@7?6-)0R-JjWS!6F_S&=0+?|o4sspMj zA;iFh*qB%=#4jjfLHr8Dgv7wk0(VV(F=-k+FwjVelKB1Zd+&L_yL-|0f^``_;cQu; zEu6ylOS?c0a}3r1$B6M5v_S?_{hK=BO~!VF`X?cn#saKu?3NFUf@tZuR7g&;O|aKC z&pC#1C`pQiglTS+rWs{Q-v*Rz5(Me`$br$@DWQ}Iuvb}8u!1A}Z`WkeHb&C81E=gG z#d3?y-L5LcFb$8%&7m@49`4}byqxhOqqu< zl&IFN!WyZNF7;~FUF4}_GrkXQ7k|lqmp0PX;VT>u3crqYH9x+BS%v3IaB$LA- zWpJW5@lGc)5uQw0Wt$a&08FNB3U06eVSQtfuI*Z3@Z zkAB?v@aEw8wJUEwUEgngS-KtdqGZMO`gWY)!1iz)*nN~lwvXMeemUy>?eO5^cXagb zsrtNB-a=Q?p?FMf6eBw#nHqrC3pr={_==A2`JNN>{h;6VJCq@)qZXY)*a&BmL+W%! z2UK*n1 zj0g}TNGwc@ObDs!0%C!MfxiGKf(bEHg1aWUxJ{FGU?D@4IPbgfeeb>dawS+EGf#pQ zMyEMkLJ`UEY^_@74OLPSz`J{;v3At5USnd6ordUC{rCcx^ zFIw%q{+%HZHUy3k7a&SBQa2-1;`0`vix>hj-(jHfc7zGR+-he|rk>koj1fs^4awAmfXx_MmTBCY$%7nsj4brj!#(=8D0!7M?9%H4X6tupJ zV3IrtfdGSt?KHfraHe3;cL4&T5;4?x)hYmdBqm`v0<5*a8G9pUN0YH>G;xN=f)%HP zNM2nB>zUvva6&m2n8up9F6ipkPcB>n@uJL`iezt2jo#&W%o`yhNT7Z|G(4b`AP`Gm zwmFl47NMYz*Y@soqz@c)B6I(AKO76Ip0`5O1St_#O^K3GVp}|QQE#T; zgO)%ltJ^Pgh@f;?d6LMnFjwl=JXcyIFTC6@GrnK4D_{oLei?dp(J$)P`s252 z-{8{^Yw~@nw<6C*Eq%(PML=G&J#WH)z=CZluuu&BJ0V;YHa)j{aM{<*!-PxO&JqX7^ zun_!FXlG;Ve_$tKDYiD&mfF||md;#~NiM&%urh&Ve|+D(H=pmDcL#NrytA+;#j3*F zG=?u{_I=$=slVY%iiIS8!A~Hz&pd2CP*f`Oro|{H%HP=9AAZa-W~GBrr6o(+{z1Wg zmlQ}-vzRIkaq5&V85c%h@VRI+41BX+fzt;uKHAsCG^mMrn@nPykg9l4t2G0~lV_@STw{o$x+Ij!>&3XC!)VbZ8 zpYtD&U$4L#L8x8{0AmtVs^wNtqY(TO2-~QgWFSopvK5!AP zayBB+G7jM0g*8joB5X}s93#ekD{X}^(BG+3nHQJ~^-Gmt>I-XfVXbj7j)|7mONHbl zT(Z{F<~qmFuSgPLAz=_1rD;Z)(x)xTmI#8hoH#IgJ0O%2VXZfvC|Jx9{w3GY5W}EyK&}gwgn8vYu664v7?)b3$tQpfHpn^DwM61T&!gd3lT<(# zI{;JWp${c&Kdg1&b;3;r?Q@sVqu3nPXx}s@-O7Wat0v+kfV|e*J(?a(O;l_uRnlZ+ z!3twam9P2)7hA*;6GRdg8t0~<&h0R{Xc>k}DrY*94gN;m;dmk{KBidW!+;pM#~8&h zRC75pnF!V>gNJ%E-qwj63fm{q^!%BAB$U=-+VXJ~##CC98cGw1Bk?pvje9!24lZ>- zk$^kCNW`8%oOqqM{H->k3fi&oyY!Th%OaHRb`9c#*4-8l;!G``Hi~M1i|eVg4_!R) zKHRxJv$r(8!kClI<;EupRnN!%c+SZ=o;#M!jw3fW01{)V zx*=s|MEiE~VE4u8)R}{Sfqv?Me&VMmEQUQ4^>EldI`Ck`T-&c;K4C^=#|or~Oksg3`2@wNwC aN(3&?&lRthOSXwuD>YwJyJx7i?z2Bsr)3cU literal 0 HcmV?d00001 diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl deleted file mode 100644 index d433a51c16..0000000000 --- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-cdc-Users.jsonl +++ /dev/null @@ -1,4 +0,0 @@ -{"uuid":"da9f3332-bcce-491c-924e-849200000000","read_timestamp":"2024-01-07T14:35:00.649","source_timestamp":"2024-01-07T14:34:39.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704638079000,"mysql-bin.000011",74893116],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74893116,"change_type":"DELETE","is_deleted":true},"payload":{"id":2,"name":"Tester Yadav","age":33,"subscribed":1,"plan":"B","startDate":"2024-01-01T00:00:00.000"}} -{"uuid":"da9f3332-bcce-491c-924e-849200000001","read_timestamp":"2024-01-07T14:35:00.649","source_timestamp":"2024-01-07T14:34:39.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704638079000,"mysql-bin.000011",74893475],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74893475,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"id":3,"name":"Tester Gupta","age":50,"subscribed":0,"plan":"Z","startDate":"2023-06-07T00:00:00.000"}} -{"uuid":"da9f3332-bcce-491c-924e-849200000010","read_timestamp":"2024-01-07T14:35:00.649","source_timestamp":"2024-01-07T14:34:39.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704638079000,"mysql-bin.000011",74893785],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74893785,"change_type":"INSERT","is_deleted":false},"payload":{"id":4,"name":"Tester","age":38,"subscribed":1,"plan":"D","startDate":"2023-09-10T00:00:00.000"}} -{"uuid":"587474a5-9a77-422e-9fd6-74ef00000010","read_timestamp":"2024-01-07T14:32:39.849","source_timestamp":"2024-01-07T14:31:55.000","object":"l1_Users","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/djagaluru-gen-data-json","schema_key":"1d2ab1bcde3e861022798fbded72121b56007c52","sort_keys":[1704637915000,"mysql-bin.000011",74882471],"source_metadata":{"table":"Users","database":"l1","primary_keys":["id"],"log_file":"mysql-bin.000011","log_position":74882471,"change_type":"INSERT","is_deleted":false},"payload":{"id":3,"name":"Tester Gupta","age":36,"subscribed":0,"plan":"C","startDate":"2023-06-07T00:00:00.000"}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql new file mode 100644 index 0000000000..1f356f8b62 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerEventsIT/mysql-statements.sql @@ -0,0 +1,15 @@ +### CDC + Backfill for insert, delete and updates +# First wave of INSERT +Insert into Users values(1, 'Tester Kumar', 30, 0, 'A', '2023-01-01'); Insert into Users values(2, 'Tester Yadav', 33, 1, 'B', '2024-01-01'); + +# Second wave of INSERT, UPDATE, DELETE +Delete from Users where id=2; Insert into Users values(4, 'Tester', 38, 1, 'D', '2023-09-10'); Insert into Users values(3, 'Tester Gupta', 36, 0, 'C', '2023-06-07'); Update Users set age=50, plan='Z' where id=3; + +### Insert only +# First wave of INSERT +Insert into Movie values(1, 'movie1', 12345.09876, '2023-01-01 12:12:12'); Insert into Movie values(2, 'movie2', 931.5123, '2023-11-25 17:10:12'); + +### Foreign key constraint test +Insert into Authors values(1, 'a1'); Insert into Authors values(2, 'a2'); Insert into Authors values(3, 'a3'); insert into Authors values(4, 'a4'); +Insert into Articles values(1, 'Article001', '2024-01-01', 1); Insert into Articles values(2, 'Article002', '2024-01-01', 1); Insert into Articles values(3, 'Article004', '2024-01-01', 4); Insert into Articles values(4, 'Article005', '2024-01-01', 3); +Insert into Books values(1, 'Book005', 3); Insert into Books values(2, 'Book002', 3); Insert into Books values(3, 'Book004', 4); Insert into Books values(4, 'Book005', 2); diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.avro b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-backfill-Category.avro new file mode 100644 index 0000000000000000000000000000000000000000..9f4d33df58593eac44cd55d1395c2cb803700ff4 GIT binary patch literal 1770 zcmds1J#P~+7*0=BgaiWusUw5uT$3xgt^_RVt^srEQccTOWU{3&sBN0{j?2PmyvU6k;;*Ys zq%fJJBd_r?Afd|d;eyirT61lxVt;?-Lm>#q7FiAVOO}Zou!XM{QahTw`H*T?3^J=xL(<^qF2i{6WP;+RWD|hcdzWr#!{PMi|)8R93|K<0e e@7_GGUxbk0zb0has{d$4<8*~)^ zuF2dbjD%9B?Q#wuiw4{CxUvyL-}wV?4=Nq(&Q%;k-IX0@k%()!DpJ^B6v4Qc2<*px zbQQ-WCD7^sU{XBvpn(00^?IHVPBrxVE}%=XTB^H!%BFZL7xF!8!ie}VY7KWrwMS|b z85^fYQbHEBph>CJl{i@KDGnJYg0Mg-R}Bfb?c|~c4C*pxaU{DYspy^E|j%*k9JAsn%bW($;HYS14s(H=BHQ1y=n~UkBknG5* zYU&Mn*Q-L}o?V)Cs2xcBBWdir`*{*I?^uL-}!uTdR&NOwsfgL8i* zj95JOykMD@Y1p%tJzKO2lu4p8BD~P#I>Ld7AT1+O;a zny7><%bK>ZG2?orDciI?qi7==+9l+f*tI-23p&~|i6ALvGL~+b;L}DfbEtdUj#2}R zr@YO8fem16zI?7+DT5IJK=wxKCTKtW^8NXf+u31g83&lw`rCcdk_}+&%=ydZOBIj- zIgqV42xx!3^WfF9p=cS!4z2e4|2JD5Z3D7(cXOY#WJ8Kp7G&ie0@@od9={xtR+6m) nN$b~#ebSN*;OLcw`ID8ht|B%Ha?nKvX#ZThwI(OB_e%Q>8|Oox literal 0 HcmV?d00001 diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl deleted file mode 100644 index 7825ca6e74..0000000000 --- a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-cdc-Category.jsonl +++ /dev/null @@ -1,4 +0,0 @@ -{"uuid":"05111f37-1fcc-48b2-99bd-8f1c00000000","read_timestamp":"2024-02-07T08:11:50.054Z","source_timestamp":"2024-02-07T08:10:55.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293455000,"mysql-bin.000029",9612272],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9612272,"change_type":"DELETE","is_deleted":true},"payload":{"category_id":1,"name":"xyz","last_update":"2024-02-06T12:19:37.000Z"}} -{"uuid":"3faa3ec0-cedc-4a93-b9ce-50e000000000","read_timestamp":"2024-02-07T08:13:00.421Z","source_timestamp":"2024-02-07T08:12:32.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293552000,"mysql-bin.000029",9612570],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9612570,"change_type":"INSERT","is_deleted":false},"payload":{"category_id":4,"name":"ghi","last_update":"2024-02-07T08:12:32.000Z"}} -{"uuid":"359901f6-96bc-4f68-9f6c-7d4500000001","read_timestamp":"2024-02-07T08:10:39.444Z","source_timestamp":"2024-02-07T08:10:35.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293435000,"mysql-bin.000029",9611982],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9611982,"change_type":"INSERT","is_deleted":false},"payload":{"category_id":3,"name":"def","last_update":"2024-02-07T08:10:35.000Z"}} -{"uuid":"359901f6-96bc-4f68-9f6c-7d4500000000","read_timestamp":"2024-02-07T08:10:39.444Z","source_timestamp":"2024-02-07T08:10:15.000Z","object":"it_Category","read_method":"mysql-cdc-binlog","stream_name":"projects/545418958905/locations/us-central1/streams/int-test-shreya","schema_key":"e38807ece1eb1660b3baaafbe2ffb849c6c3023a","sort_keys":[1707293415000,"mysql-bin.000029",9611684],"source_metadata":{"table":"Category","database":"it","primary_keys":["category_id"],"log_file":"mysql-bin.000029","log_position":9611684,"change_type":"UPDATE-INSERT","is_deleted":false},"payload":{"category_id":2,"name":"abc1","last_update":"2024-02-06T12:19:47.000Z"}} \ No newline at end of file diff --git a/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql new file mode 100644 index 0000000000..0ce47716c1 --- /dev/null +++ b/v2/datastream-to-spanner/src/test/resources/DataStreamToSpannerSessionIT/mysql-statements.sql @@ -0,0 +1,6 @@ +### CDC + Backfill for insert, delete and updates +# First wave of INSERT +Insert into Category values(1, 'xyz', '2024-02-06T12:19:37'); Insert into Category values(2, 'abc', '2024-02-06T12:19:47'); + +# Second wave of INSERT, UPDATE, DELETE +Delete from Category where category_id=1; Insert into Category values(4, 'ghi', '2024-02-07T08:12:32'); Insert into Category values(3, 'def', '2024-02-07T08:10:35'); Update Category set name='abc1' where category_id=2; \ No newline at end of file From 6e7b262bc2b298881940d5a8a665ca92cf643012 Mon Sep 17 00:00:00 2001 From: Manit Gupta Date: Mon, 13 May 2024 09:06:40 +0530 Subject: [PATCH 32/70] Fix row.clear() --- .../templates/DataStreamToSpannerEventsIT.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java index 0d07ec4b38..4cc2d4df56 100644 --- a/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java +++ b/v2/datastream-to-spanner/src/test/java/com/google/cloud/teleport/v2/templates/DataStreamToSpannerEventsIT.java @@ -308,17 +308,17 @@ private void assertAuthorsTable() { row.put("name", "a1"); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("author_id", 2); row.put("name", "a2"); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("author_id", 3); row.put("name", "a3"); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("author_id", 4); row.put("name", "a4"); events.add(row); @@ -336,19 +336,19 @@ private void assertBooksTable() { row.put("author_id", 3); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("id", 2); row.put("title", "Book002"); row.put("author_id", 3); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("id", 3); row.put("title", "Book004"); row.put("author_id", 4); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("id", 4); row.put("title", "Book005"); row.put("author_id", 2); @@ -369,21 +369,21 @@ private void assertArticlesTable() { row.put("author_id", 1); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("id", 2); row.put("name", "Article002"); row.put("published_date", Date.parseDate("2024-01-01")); row.put("author_id", 1); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("id", 3); row.put("name", "Article004"); row.put("published_date", Date.parseDate("2024-01-01")); row.put("author_id", 4); events.add(row); - row.clear(); + row = new HashMap<>(); row.put("id", 4); row.put("name", "Article005"); row.put("published_date", Date.parseDate("2024-01-01")); From 304fd59e904e17f9de5e1ba97f01f03cf23a4404 Mon Sep 17 00:00:00 2001 From: Deep1998 Date: Wed, 8 May 2024 15:47:04 +0530 Subject: [PATCH 33/70] Add error counters for transformer and reader --- .../teleport/v2/constants/MetricCounters.java | 31 +++++++++++++++++++ .../teleport/v2/constants/package-info.java | 16 ++++++++++ .../mysql/MysqlDialectAdapter.java | 14 +++++++-- .../jdbc/rowmapper/JdbcSourceRowMapper.java | 7 +++++ .../transformer/SourceRowToMutationDoFn.java | 13 +++++++- 5 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java create mode 100644 v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java new file mode 100644 index 0000000000..3b79f6bb54 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/MetricCounters.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.constants; + +// Counters for bulk template. +public class MetricCounters { + + // Counter for errors in the transformer. + public static final String TRANSFORMER_ERRORS = "transformer_errors"; + + // Counter for errors encountered by the reader when trying to map JDBC ResultSet to a SourceRow. + public static final String READER_MAPPING_ERRORS = "reader_mapping_errors"; + + // Counter for errors encountered by the reader while discovering schema. This counts all sorts of + // errors including SQLTransientConnectionException, SQLNonTransientConnectionException, + // SQLExceptions etc. + public static final String READER_SCHEMA_DISCOVERY_ERRORS = "reader_schema_discovery_errors"; +} diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java new file mode 100644 index 0000000000..3e1124dc05 --- /dev/null +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/constants/package-info.java @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.constants; diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java index 8c714bf7ae..17494e442e 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/dialectadapter/mysql/MysqlDialectAdapter.java @@ -15,9 +15,11 @@ */ package com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.mysql; +import com.google.cloud.teleport.v2.constants.MetricCounters; import com.google.cloud.teleport.v2.source.reader.io.exception.RetriableSchemaDiscoveryException; import com.google.cloud.teleport.v2.source.reader.io.exception.SchemaDiscoveryException; import com.google.cloud.teleport.v2.source.reader.io.jdbc.dialectadapter.DialectAdapter; +import com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper.JdbcSourceRowMapper; import com.google.cloud.teleport.v2.source.reader.io.schema.SourceSchemaReference; import com.google.cloud.teleport.v2.spanner.migrations.schema.SourceColumnType; import com.google.common.collect.ImmutableList; @@ -29,6 +31,8 @@ import java.sql.SQLNonTransientConnectionException; import java.sql.SQLTransientConnectionException; import javax.sql.DataSource; +import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Metrics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,6 +42,9 @@ public final class MysqlDialectAdapter implements DialectAdapter { private static final Logger logger = LoggerFactory.getLogger(MysqlDialectAdapter.class); + private final Counter schemaDiscoveryErrors = + Metrics.counter(JdbcSourceRowMapper.class, MetricCounters.READER_SCHEMA_DISCOVERY_ERRORS); + public MysqlDialectAdapter(MySqlVersion mySqlVersion) { this.mySqlVersion = mySqlVersion; } @@ -79,24 +86,25 @@ public ImmutableMap> discoverTabl String.format( "Transient connection error while discovering table schema for datasource=%s db=%s tables=%s, cause=%s", dataSource, sourceSchemaReference, tables, e)); - // TODO: Add metrics for transient connection errors. + schemaDiscoveryErrors.inc(); throw new RetriableSchemaDiscoveryException(e); } catch (SQLNonTransientConnectionException e) { logger.error( String.format( "Non Transient connection error while discovering table schema for datasource=%s, db=%s tables=%s, cause=%s", dataSource, sourceSchemaReference, tables, e)); - // TODO: Add metrics for non-transient connection errors. + schemaDiscoveryErrors.inc(); throw new SchemaDiscoveryException(e); } catch (SQLException e) { logger.error( String.format( "Sql exception while discovering table schema for datasource=%s db=%s tables=%s, cause=%s", dataSource, sourceSchemaReference, tables, e)); - // TODO: Add metrics for SQL exceptions. + schemaDiscoveryErrors.inc(); throw new SchemaDiscoveryException(e); } catch (SchemaDiscoveryException e) { // Already logged. + schemaDiscoveryErrors.inc(); throw e; } return tablesBuilder.build(); diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java index 3e920e252b..76354273c7 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/rowmapper/JdbcSourceRowMapper.java @@ -15,6 +15,7 @@ */ package com.google.cloud.teleport.v2.source.reader.io.jdbc.rowmapper; +import com.google.cloud.teleport.v2.constants.MetricCounters; import com.google.cloud.teleport.v2.source.reader.io.exception.ValueMappingException; import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow; import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableSchema; @@ -24,6 +25,8 @@ import java.util.concurrent.TimeUnit; import org.apache.avro.Schema; import org.apache.beam.sdk.io.jdbc.JdbcIO; +import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Metrics; import org.checkerframework.checker.initialization.qual.Initialized; import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; @@ -39,6 +42,9 @@ public final class JdbcSourceRowMapper implements JdbcIO.RowMapper { private static final Logger logger = LoggerFactory.getLogger(JdbcSourceRowMapper.class); + private final Counter mapperErrors = + Metrics.counter(JdbcSourceRowMapper.class, MetricCounters.READER_MAPPING_ERRORS); + /** * Construct {@link JdbcSourceRowMapper}. * @@ -87,6 +93,7 @@ long getCurrentTimeMicros() { .getOrDefault(entry.getValue().getName(), JdbcValueMapper.UNSUPPORTED) .mapValue(resultSet, entry.getKey(), schema)); } catch (SQLException e) { + mapperErrors.inc(); logger.error( "Exception while mapping jdbc ResultSet to avro. Check for potential schema changes. Exception: " + e); diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java index 1f9375c15c..7e00c00a22 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java @@ -18,6 +18,7 @@ import com.google.auto.value.AutoValue; import com.google.cloud.spanner.Mutation; import com.google.cloud.spanner.Value; +import com.google.cloud.teleport.v2.constants.MetricCounters; import com.google.cloud.teleport.v2.source.reader.io.row.SourceRow; import com.google.cloud.teleport.v2.source.reader.io.schema.SourceTableReference; import com.google.cloud.teleport.v2.spanner.migrations.avro.GenericRecordTypeConvertor; @@ -25,6 +26,8 @@ import java.io.Serializable; import java.util.Map; import org.apache.avro.generic.GenericRecord; +import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Metrics; import org.apache.beam.sdk.transforms.DoFn; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,6 +42,9 @@ public abstract class SourceRowToMutationDoFn extends DoFn private static final Logger LOG = LoggerFactory.getLogger(SourceRowToMutationDoFn.class); + private final Counter transformerErrors = + Metrics.counter(SourceRowToMutationDoFn.class, MetricCounters.TRANSFORMER_ERRORS); + public abstract ISchemaMapper iSchemaMapper(); public abstract Map tableIdMapper(); @@ -58,6 +64,7 @@ public void processElement(ProcessContext c) { LOG.error( "cannot find valid sourceTable for tableId: {} in tableIdMapper", sourceRow.tableSchemaUUID()); + transformerErrors.inc(); return; } try { @@ -73,7 +80,11 @@ public void processElement(ProcessContext c) { c.output(mutation); } catch (Exception e) { // TODO: Add DLQ integration once supported. - LOG.error("Unable to transform source row to spanner mutation: {}", e.getMessage()); + LOG.error( + "Unable to transform source row to spanner mutation: {} {}", + e.getMessage(), + e.fillInStackTrace()); + transformerErrors.inc(); } } From 3030f70709001ab1f098a4f863428b3e1394ad1a Mon Sep 17 00:00:00 2001 From: Deep1998 Date: Fri, 10 May 2024 15:16:10 +0530 Subject: [PATCH 34/70] Handle null valued strings --- .../transformer/SourceRowToMutationDoFn.java | 5 +- .../migrations/avro/AvroToValueMapper.java | 18 +++++-- .../avro/GenericRecordTypeConvertor.java | 20 +++----- .../avro/AvroToValueMapperTest.java | 23 +++++++++ .../avro/GenericRecordTypeConvertorTest.java | 51 +++++++++++++------ 5 files changed, 80 insertions(+), 37 deletions(-) diff --git a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java index 7e00c00a22..b68d38ae27 100644 --- a/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java +++ b/v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/transformer/SourceRowToMutationDoFn.java @@ -80,10 +80,7 @@ public void processElement(ProcessContext c) { c.output(mutation); } catch (Exception e) { // TODO: Add DLQ integration once supported. - LOG.error( - "Unable to transform source row to spanner mutation: {} {}", - e.getMessage(), - e.fillInStackTrace()); + LOG.error("Unable to transform source row to spanner mutation", e); transformerErrors.inc(); } } diff --git a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java index b76eb7c46a..2d4aa97d96 100644 --- a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java +++ b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapper.java @@ -89,9 +89,9 @@ static Map getGsqlMap() { Type.float64(), (recordValue, fieldSchema) -> Value.float64(avroFieldToDouble(recordValue, fieldSchema))); gsqlFunctions.put( - Type.string(), (recordValue, fieldSchema) -> Value.string(recordValue.toString())); + Type.string(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue))); gsqlFunctions.put( - Type.json(), (recordValue, fieldSchema) -> Value.string(recordValue.toString())); + Type.json(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue))); gsqlFunctions.put( Type.numeric(), (recordValue, fieldSchema) -> @@ -121,11 +121,12 @@ static Map getPgMap() { Type.pgFloat8(), (recordValue, fieldSchema) -> Value.float64(avroFieldToDouble(recordValue, fieldSchema))); pgFunctions.put( - Type.pgVarchar(), (recordValue, fieldSchema) -> Value.string(recordValue.toString())); + Type.pgVarchar(), + (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue))); pgFunctions.put( - Type.pgText(), (recordValue, fieldSchema) -> Value.string(recordValue.toString())); + Type.pgText(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue))); pgFunctions.put( - Type.pgJsonb(), (recordValue, fieldSchema) -> Value.string(recordValue.toString())); + Type.pgJsonb(), (recordValue, fieldSchema) -> Value.string(avroFieldToString(recordValue))); pgFunctions.put( Type.pgNumeric(), (recordValue, fieldSchema) -> @@ -189,6 +190,13 @@ static Double avroFieldToDouble(Object recordValue, Schema fieldSchema) { } } + static String avroFieldToString(Object recordValue) { + if (recordValue == null) { + return null; + } + return recordValue.toString(); + } + static BigDecimal avroFieldToNumericBigDecimal(Object recordValue, Schema fieldSchema) { try { if (recordValue == null) { diff --git a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java index 9822f23276..1d986ef35e 100644 --- a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java +++ b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertor.java @@ -88,21 +88,18 @@ public Map transformChangeEvent(GenericRecord record, String srcT spannerColumnType); result.put(spannerColName, value); } catch (NullPointerException e) { - LOG.info("Unable to transform change event: {}", e.getMessage()); + LOG.error("Unable to transform change event", e); throw e; } catch (IllegalArgumentException e) { - LOG.info("Unable to transform change event: {}", e.getMessage()); + LOG.error("Unable to transform change event", e); throw e; } catch (Exception e) { - LOG.info( - "Unable to convert spanner value for spanner col: {}. {}", - spannerColName, - e.getMessage()); + LOG.error( + String.format("Unable to convert spanner value for spanner col: {}", spannerColName), + e); throw new RuntimeException( - String.format( - "Unable to convert spanner value for spanner col: {}. {}", - spannerColName, - e.getMessage())); + String.format("Unable to convert spanner value for spanner col: {}", spannerColName), + e); } } return result; @@ -123,9 +120,6 @@ public Value getSpannerValue( LOG.debug("found union type: {}", types); // Schema types can only union with Type NULL. Any other UNION is unsupported. if (types.size() == 2 && types.stream().anyMatch(s -> s.getType().equals(Schema.Type.NULL))) { - if (recordValue == null) { - return null; - } fieldSchema = types.stream().filter(s -> !s.getType().equals(Schema.Type.NULL)).findFirst().get(); } else { diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java index d002c5b53d..74b69aca14 100644 --- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java +++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/AvroToValueMapperTest.java @@ -158,6 +158,29 @@ public void testAvroFieldToDouble_UnsupportedType() { AvroToValueMapper.avroFieldToDouble(inputValue, SchemaBuilder.builder().booleanType()); } + @Test + public void testAvroFieldToString_valid() { + String result = AvroToValueMapper.avroFieldToString("Hello"); + assertEquals("Hello", result); + + result = AvroToValueMapper.avroFieldToString(""); + assertEquals("", result); + + result = AvroToValueMapper.avroFieldToString(14); + assertEquals("14", result); + + result = AvroToValueMapper.avroFieldToString(513148134L); + assertEquals("513148134", result); + + result = AvroToValueMapper.avroFieldToString(325.532); + assertEquals("325.532", result); + } + + @Test + public void testAvroFieldToString_NullInput() { + assertNull(AvroToValueMapper.avroFieldToString(null)); + } + @Test public void testAvroFieldToNumericBigDecimal_StringInput() { Map testCases = new HashMap<>(); diff --git a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java index 2b713445dd..67216674bd 100644 --- a/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java +++ b/v2/spanner-common/src/test/java/com/google/cloud/teleport/v2/spanner/migrations/avro/GenericRecordTypeConvertorTest.java @@ -378,6 +378,42 @@ public void transformChangeEventTest_identityMapper() { assertEquals(expected, actual); } + @Test + public void transformChangeEventTest_nullValues() { + GenericRecord genericRecord = new GenericData.Record(getAllSpannerTypesSchema()); + genericRecord.put("bool_col", null); + genericRecord.put("int_col", null); + genericRecord.put("float_col", null); + genericRecord.put("string_col", null); + genericRecord.put("numeric_col", null); + genericRecord.put("bytes_col", null); + genericRecord.put("timestamp_col", null); + genericRecord.put("date_col", null); + GenericRecordTypeConvertor genericRecordTypeConvertor = + new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), ""); + Map actual = + genericRecordTypeConvertor.transformChangeEvent(genericRecord, "all_types"); + Map expected = + Map.of( + "bool_col", + Value.bool(null), + "int_col", + Value.int64(null), + "float_col", + Value.float64(null), + "string_col", + Value.string(null), + "numeric_col", + Value.numeric(null), + "bytes_col", + Value.bytes(null), + "timestamp_col", + Value.timestamp(null), + "date_col", + Value.date(null)); + assertEquals(expected, actual); + } + @Test public void transformChangeEventTest_illegalUnionType() { GenericRecordTypeConvertor genericRecordTypeConvertor = @@ -396,21 +432,6 @@ public void transformChangeEventTest_illegalUnionType() { () -> genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string())); } - @Test - public void transformChangeEventTest_nullType() { - GenericRecordTypeConvertor genericRecordTypeConvertor = - new GenericRecordTypeConvertor(new IdentityMapper(getIdentityDdl()), ""); - Schema schema = - SchemaBuilder.builder() - .unionOf() - .nullType() - .and() - .type(Schema.create(Schema.Type.BOOLEAN)) - .endUnion(); - assertNull( - genericRecordTypeConvertor.getSpannerValue(null, schema, "union_col", Type.string())); - } - @Test(expected = IllegalArgumentException.class) public void transformChangeEventTest_incorrectSpannerType() { From 19c59dd1774cffa63fef8dfcc1e299e8bde0692a Mon Sep 17 00:00:00 2001 From: Akshara Uke Date: Tue, 14 May 2024 11:18:33 +0000 Subject: [PATCH 35/70] updated based on review comments --- .../pom.xml | 31 + .../DataChangeRecordTypeConvertor.java | 3 + .../SpannerChangeStreamToGcsSimpleIT.java | 453 ++++++ .../DataChangeRecordTypeConvertorTest.java | 13 + .../session.json | 1262 +++++++++++++++++ .../spanner-schema-simple.sql | 49 + 6 files changed, 1811 insertions(+) create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json create mode 100644 v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql diff --git a/v2/spanner-change-streams-to-sharded-file-sink/pom.xml b/v2/spanner-change-streams-to-sharded-file-sink/pom.xml index f329528cf8..8da0f5d2f7 100644 --- a/v2/spanner-change-streams-to-sharded-file-sink/pom.xml +++ b/v2/spanner-change-streams-to-sharded-file-sink/pom.xml @@ -53,6 +53,37 @@ json ${json.version} + + + com.google.cloud.teleport + it-google-cloud-platform + ${project.version} + test + + + com.google.cloud.teleport + it-conditions + ${project.version} + test + + + org.apache.beam + beam-it-jdbc + test + + + mysql + mysql-connector-java + 8.0.30 + test + + + + com.google.cloud.teleport + it-jdbc + ${project.version} + test + diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java index 5c02d7a5af..5c26abd968 100644 --- a/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/main/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertor.java @@ -157,6 +157,9 @@ public static BigDecimal toNumericBigDecimal( throws DataChangeRecordConvertorException { String value = toString(changeEvent, key, requiredField); + if (value == null) { + return null; + } if (NumberUtils.isCreatable(value) || NumberUtils.isParsable(value) || isNumeric(value)) { return new BigDecimal(value).setScale(9, RoundingMode.HALF_UP); } diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java new file mode 100644 index 0000000000..7fb40e231f --- /dev/null +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/SpannerChangeStreamToGcsSimpleIT.java @@ -0,0 +1,453 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.v2.templates; + +import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.gcp.artifacts.matchers.ArtifactAsserts.assertThatArtifacts; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.spanner.Mutation; +import com.google.cloud.teleport.metadata.SkipDirectRunnerTest; +import com.google.cloud.teleport.metadata.TemplateIntegrationTest; +import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; +import com.google.common.io.Resources; +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import org.apache.beam.it.common.PipelineLauncher; +import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; +import org.apache.beam.it.common.PipelineOperator; +import org.apache.beam.it.common.utils.PipelineUtils; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.conditions.ChainedConditionCheck; +import org.apache.beam.it.gcp.TemplateTestBase; +import org.apache.beam.it.gcp.artifacts.Artifact; +import org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils; +import org.apache.beam.it.gcp.spanner.SpannerResourceManager; +import org.apache.beam.it.gcp.spanner.conditions.SpannerRowsCheck; +import org.apache.beam.it.gcp.storage.GcsResourceManager; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Integration test for simple test of single shard,single table. */ +@Category({TemplateIntegrationTest.class, SkipDirectRunnerTest.class}) +@TemplateIntegrationTest(SpannerChangeStreamsToShardedFileSink.class) +@RunWith(JUnit4.class) +public class SpannerChangeStreamToGcsSimpleIT extends TemplateTestBase { + private static final Logger LOG = LoggerFactory.getLogger(SpannerChangeStreamToGcsSimpleIT.class); + private static SpannerResourceManager spannerResourceManager; + private static SpannerResourceManager spannerMetadataResourceManager; + private static HashSet testInstances = new HashSet<>(); + private static final String spannerDdl = + "SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql"; + private static final String sessionFileResourceName = + "SpannerChangeStreamToGcsSimpleIT/session.json"; + private static PipelineLauncher.LaunchInfo jobInfo; + private static String spannerDatabaseName = ""; + private static String spannerMetadataDatabaseName = ""; + private static GcsResourceManager gcsResourceManager; + + /** + * Does the following setup: + * + *

1. Creates a Spanner database with a given table 2. Creates a shard file with the connection + * details 3. The session file for the same is taken from the resources and uploaded to GCS 4. + * Places the session file and shard file in GCS 5. Creates the change stream in Spanner database + * 6. Creates the metadata database 8. Launches the job to read from Spanner and write to GCS + * + * @throws IOException + */ + @Before + public void setUp() throws IOException { + skipBaseCleanup = true; + synchronized (SpannerChangeStreamToGcsSimpleIT.class) { + testInstances.add(this); + if (jobInfo == null) { + createGcsResourceManager(); + createSpannerDatabase(); + createAndUploadShardConfigToGcs(); + uploadSessionFileToGcs(); + createSpannerMetadataDatabase(); + launchReaderDataflowJob(); + } + } + } + + @AfterClass + public static void cleanUp() throws IOException { + for (SpannerChangeStreamToGcsSimpleIT instance : testInstances) { + instance.tearDownBase(); + } + ResourceManagerUtils.cleanResources( + spannerResourceManager, spannerMetadataResourceManager, gcsResourceManager); + } + + @Test + public void testSingleRecordWrittenToGcs() throws IOException, java.lang.InterruptedException { + // Construct a ChainedConditionCheck with below stages. + // 1. Wait for the metadata table to have the start time of reader job + // 2. Write a single record to Spanner + // 3. Wait on GCS to have the file + // 4. Match the PK in GCS with the PK written to Spanner + ChainedConditionCheck conditionCheck = + ChainedConditionCheck.builder( + List.of( + SpannerRowsCheck.builder( + spannerMetadataResourceManager, "spanner_to_gcs_metadata") + .setMinRows(1) + .setMaxRows(1) + .build())) + .build(); + // Wait for conditions + PipelineOperator.Result result = + pipelineOperator() + .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(10)), conditionCheck); + // Assert Conditions + assertThatResult(result).meetsConditions(); + // Perform writes to Spanner + writeSpannerDataForSingers(1, "FF", "testShardA"); + // Assert file present in GCS with the needed data + assertFileContentsInGCS(); + } + + private void createGcsResourceManager() { + gcsResourceManager = + GcsResourceManager.builder(artifactBucketName, getClass().getSimpleName(), credentials) + .build(); // DB name is appended with prefix to avoid clashes + } + + private void createSpannerDatabase() throws IOException { + spannerResourceManager = + SpannerResourceManager.builder("rr-main-" + testName, PROJECT, REGION) + .maybeUseStaticInstance() + .build(); // DB name is appended with prefix to avoid clashes + String ddl = + String.join( + " ", Resources.readLines(Resources.getResource(spannerDdl), StandardCharsets.UTF_8)); + ddl = ddl.trim(); + String[] ddls = ddl.split(";"); + for (String d : ddls) { + if (!d.isBlank()) { + spannerResourceManager.executeDdlStatement(d); + } + } + spannerDatabaseName = spannerResourceManager.getDatabaseId(); + } + + private void createAndUploadShardConfigToGcs() throws IOException { + List shardNames = new ArrayList<>(); + shardNames.add("testShardA"); + shardNames.add("testShardB"); + shardNames.add("testShardC"); + shardNames.add("testShardD"); + JsonArray ja = new JsonArray(); + + for (String shardName : shardNames) { + Shard shard = new Shard(); + shard.setLogicalShardId(shardName); + shard.setUser("dummy"); + shard.setHost("dummy"); + shard.setPassword("dummy"); + shard.setPort("3306"); + JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject(); + jsObj.remove("secretManagerUri"); // remove field secretManagerUri + ja.add(jsObj); + } + + String shardFileContents = ja.toString(); + LOG.info("Shard file contents: {}", shardFileContents); + // -DartifactBucket has the bucket name + gcsResourceManager.createArtifact("input/shard.json", shardFileContents); + } + + private void uploadSessionFileToGcs() throws IOException { + gcsResourceManager.uploadArtifact( + "input/session.json", Resources.getResource(sessionFileResourceName).getPath()); + } + + private void createSpannerMetadataDatabase() throws IOException { + spannerMetadataResourceManager = + SpannerResourceManager.builder("rr-meta-" + testName, PROJECT, REGION) + .maybeUseStaticInstance() + .build(); // DB name is appended with prefix to avoid clashes + String dummy = "create table t1(id INT64 ) primary key(id)"; + spannerMetadataResourceManager.executeDdlStatement(dummy); + // needed to create separate metadata database + spannerMetadataDatabaseName = spannerMetadataResourceManager.getDatabaseId(); + } + + private void launchReaderDataflowJob() throws IOException { + // default parameters + Map params = + new HashMap<>() { + { + put("sessionFilePath", getGcsFullPath("input/session.json")); + put("instanceId", spannerResourceManager.getInstanceId()); + put("databaseId", spannerResourceManager.getDatabaseId()); + put("spannerProjectId", PROJECT); + put("metadataDatabase", spannerMetadataResourceManager.getDatabaseId()); + put("metadataInstance", spannerMetadataResourceManager.getInstanceId()); + put("sourceShardsFilePath", getGcsFullPath("input/shard.json")); + put("changeStreamName", "allstream"); + put("runIdentifier", "run1"); + put("gcsOutputDirectory", getGcsFullPath("output")); + } + }; + + // Construct template + String jobName = PipelineUtils.createJobName("rr-it"); + // /-DunifiedWorker=true when using runner v2 + LaunchConfig.Builder options = LaunchConfig.builder(jobName, specPath); + options.setParameters(params); + options.addEnvironment("additionalExperiments", Collections.singletonList("use_runner_v2")); + // Run + jobInfo = launchTemplate(options, false); + assertThatPipeline(jobInfo).isRunning(); + } + + private void writeSpannerDataForSingers(int singerId, String firstName, String shardId) { + // Write a single record to Spanner for the given logical shard + Mutation m = + Mutation.newInsertOrUpdateBuilder("Singers") + .set("SingerId") + .to(singerId) + .set("FirstName") + .to(firstName) + .set("migration_shard_id") + .to(shardId) + .build(); + spannerResourceManager.write(m); + } + + private void assertFileContentsInGCS() throws IOException, java.lang.InterruptedException { + List artifacts = null; + Thread.sleep( + 180000); // wait sufficiently for the file to be generated. It takes about 3 minutes + // at-least. If not present wait additional 3 minutes before failing + for (int i = 0; i < 10; i++) { + Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds + artifacts = + gcsResourceManager.listArtifacts("output/testShardA/", Pattern.compile(".*\\.txt$")); + if (artifacts.size() == 1) { + break; + } + } + assertThat(artifacts).hasSize(1); + assertThatArtifacts(artifacts).hasContent("SingerId\\\":\\\"1"); + } + + @Test + public void testMultiShardsRecordWrittenToGcs() + throws IOException, java.lang.InterruptedException { + // Construct a ChainedConditionCheck with below stages. + // 1. Wait for the metadata table to have the start time of reader job + // 2. Write a 2 records per shard to Spanner + // 3. Wait on GCS to have the files + // 4. Match the PK in GCS with the PK written to Spanner + ChainedConditionCheck conditionCheck = + ChainedConditionCheck.builder( + List.of( + SpannerRowsCheck.builder( + spannerMetadataResourceManager, "spanner_to_gcs_metadata") + .setMinRows(1) + .setMaxRows(1) + .build())) + .build(); + // Wait for conditions + PipelineOperator.Result result = + pipelineOperator() + .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(10)), conditionCheck); + // Assert Conditions + assertThatResult(result).meetsConditions(); + // Perform writes to Spanner + writeSpannerDataForSingers(2, "two", "testShardB"); + writeSpannerDataForSingers(3, "three", "testShardB"); + writeSpannerDataForSingers(4, "four", "testShardC"); + writeSpannerDataForSingers(5, "five", "testShardC"); + + // Assert file present in GCS with the needed data + assertFileContentsInGCSForMultipleShards(); + } + + private void assertFileContentsInGCSForMultipleShards() + throws IOException, java.lang.InterruptedException { + List artifactsShardB = null; + List artifactsShardC = null; + Thread.sleep( + 180000); // wait sufficiently for the file to be generated. It takes about 3 minutes + // at-least. If not present wait additional 3 minutes before failing + for (int i = 0; i < 10; i++) { + Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds + artifactsShardB = + gcsResourceManager.listArtifacts("output/testShardB/", Pattern.compile(".*\\.txt$")); + artifactsShardC = + gcsResourceManager.listArtifacts("output/testShardC/", Pattern.compile(".*\\.txt$")); + + // Ideally both the mutations written to spanner per shard will commit within 10 seconds. + // But that does not guarantee that they will be in the same file, since they can commit + // within 1 second interval boundary + if (artifactsShardB.size() >= 1 && artifactsShardC.size() >= 1) { + break; + } + } + assertThatArtifacts(artifactsShardB).hasFiles(); + assertThatArtifacts(artifactsShardC).hasFiles(); + // checks that any of the artifact has the given content + assertThatArtifacts(artifactsShardB).hasContent("SingerId\\\":\\\"2"); + assertThatArtifacts(artifactsShardB).hasContent("SingerId\\\":\\\"3"); + assertThatArtifacts(artifactsShardC).hasContent("SingerId\\\":\\\"4"); + assertThatArtifacts(artifactsShardC).hasContent("SingerId\\\":\\\"5"); + } + + @Test + public void testAllDatatypes() throws IOException, java.lang.InterruptedException { + // Construct a ChainedConditionCheck with below stages. + // 1. Wait for the metadata table to have the start time of reader job + // 2. Write a record with + // 3. Wait on GCS to have the files + // 4. Match the PK in GCS with the PK written to Spanner + ChainedConditionCheck conditionCheck = + ChainedConditionCheck.builder( + List.of( + SpannerRowsCheck.builder( + spannerMetadataResourceManager, "spanner_to_gcs_metadata") + .setMinRows(1) + .setMaxRows(1) + .build())) + .build(); + // Wait for conditions + PipelineOperator.Result result = + pipelineOperator() + .waitForCondition(createConfig(jobInfo, Duration.ofMinutes(10)), conditionCheck); + // Assert Conditions + assertThatResult(result).meetsConditions(); + // Perform writes to Spanner + writeSpannerDataForAllDatatypes(); + // Assert file present in GCS with the needed data + assertFileContentsInGCSForAllDatatypes(); + } + + private void writeSpannerDataForAllDatatypes() { + // Write a single record to Spanner for logical shard : testD + Mutation m = + Mutation.newInsertOrUpdateBuilder("sample_table") + .set("id") + .to(1) + .set("varchar_column") + .to("abc") + .set("migration_shard_id") + .to("testShardD") + .set("tinyint_column") + .to(1) + .set("text_column") + .to("aaaaaddd") + .set("year_column") + .to("2023") + .set("smallint_column") + .to(22) + .set("bigint_column") + .to(12345678910L) + .set("float_column") + .to(4.2f) + .set("double_column") + .to(42.42d) + .set("blob_column") + .to("abc") + .set("bool_column") + .to(false) + .set("binary_column") + .to(Base64.getEncoder().encodeToString("Hello".getBytes())) + .set("enum_column") + .to("1") + .set("timestamp_column") + .to("2024-05-09T05:40:08.005683553Z") + .set("date_column") + .to("2024-05-09") + .build(); + spannerResourceManager.write(m); + } + + private void assertFileContentsInGCSForAllDatatypes() + throws IOException, java.lang.InterruptedException { + List artifacts = null; + Thread.sleep( + 180000); // wait sufficiently for the file to be generated. It takes about 3 minutes + // at-least. If not present wait additional 3 minutes before failing + for (int i = 0; i < 10; i++) { + Thread.sleep(18000); // wait for total 3 minutes over an interval of 18 seconds + artifacts = + gcsResourceManager.listArtifacts("output/testShardD/", Pattern.compile(".*\\.txt$")); + if (artifacts.size() == 1) { + break; + } + } + assertThat(artifacts).hasSize(1); + assertThatArtifacts(artifacts).hasContent("id\\\":\\\"1"); + assertThatArtifacts(artifacts).hasContent("year_column\\\":\\\"2023"); + assertThatArtifacts(artifacts).hasContent("bigint_column\\\":\\\"12345678910"); + assertThatArtifacts(artifacts).hasContent("binary_column\\\":\\\"SGVsbG8"); + assertThatArtifacts(artifacts).hasContent("blob_column\\\":\\\"abc"); + assertThatArtifacts(artifacts).hasContent("bool_column\\\":false"); + assertThatArtifacts(artifacts).hasContent("char_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("date_column\\\":\\\"2024-05-09"); + assertThatArtifacts(artifacts).hasContent("datetime_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("decimal_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("double_column\\\":42.42"); + assertThatArtifacts(artifacts).hasContent("enum_column\\\":\\\"1"); + assertThatArtifacts(artifacts).hasContent("float_column\\\":4.199999809265137"); + assertThatArtifacts(artifacts).hasContent("longblob_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("longtext_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("mediumblob_column\\\":null"); + + assertThatArtifacts(artifacts).hasContent("mediumint_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("mediumtext_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("smallint_column\\\":\\\"22"); + assertThatArtifacts(artifacts).hasContent("text_column\\\":\\\"aaaaaddd"); + assertThatArtifacts(artifacts).hasContent("time_column\\\":null"); + assertThatArtifacts(artifacts) + .hasContent("timestamp_column\\\":\\\"2024-05-09T05:40:08.005683553Z"); + + assertThatArtifacts(artifacts).hasContent("tinyblob_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("tinyint_column\\\":\\\"1"); + assertThatArtifacts(artifacts).hasContent("tinytext_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("update_ts\\\":null"); + assertThatArtifacts(artifacts).hasContent("varbinary_column\\\":null"); + assertThatArtifacts(artifacts).hasContent("varchar_column\\\":\\\"abc"); + } + + private String getGcsFullPath(String artifactId) { + return ArtifactUtils.getFullGcsPath( + artifactBucketName, getClass().getSimpleName(), gcsResourceManager.runId(), artifactId); + } +} diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java index e55ae3637d..8c150ec65f 100644 --- a/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/java/com/google/cloud/teleport/v2/templates/changestream/DataChangeRecordTypeConvertorTest.java @@ -374,4 +374,17 @@ public void cannotConvertBooleanToDate() throws Exception { DataChangeRecordTypeConvertor.toDate(ce, "field1", /* requiredField= */ true), Timestamp.parseTimestamp("2020-12-30T12:12:12Z")); } + + /* + * Tests null value for NumericBigDecimal + */ + @Test + public void testNullNumericBigDecimal() throws Exception { + String jsonChangeEvent = "{ \"field1\" : null }"; + JsonNode ce = getJsonNode(jsonChangeEvent); + + assertNull( + DataChangeRecordTypeConvertor.toNumericBigDecimal( + ce, "field1", /* requiredField= */ false)); + } } diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json new file mode 100644 index 0000000000..48e11ca1ee --- /dev/null +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/session.json @@ -0,0 +1,1262 @@ +{ + "SpSchema": { + "t1": { + "Name": "Singers", + "ColIds": [ + "c3", + "c4", + "c5", + "c6", + "c7", + "c36" + ], + "ShardIdColumn": "c36", + "ColDefs": { + "c3": { + "Name": "SingerId", + "T": { + "Name": "INT64", + "Len": 0, + "IsArray": false + }, + "NotNull": true, + "Comment": "From: SingerId bigint(19)", + "Id": "c3" + }, + "c36": { + "Name": "migration_shard_id", + "T": { + "Name": "STRING", + "Len": 50, + "IsArray": false + }, + "NotNull": false, + "Comment": "", + "Id": "c36" + }, + "c4": { + "Name": "FirstName", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: FirstName text(65535)", + "Id": "c4" + }, + "c5": { + "Name": "LastName", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: LastName text(65535)", + "Id": "c5" + }, + "c6": { + "Name": "shardId", + "T": { + "Name": "STRING", + "Len": 20, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: shardId varchar(20)", + "Id": "c6" + }, + "c7": { + "Name": "update_ts", + "T": { + "Name": "TIMESTAMP", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: update_ts timestamp", + "Id": "c7" + } + }, + "PrimaryKeys": [ + { + "ColId": "c3", + "Desc": false, + "Order": 2 + }, + { + "ColId": "c36", + "Desc": false, + "Order": 1 + } + ], + "ForeignKeys": null, + "Indexes": null, + "ParentId": "", + "Comment": "Spanner schema for source table Singers", + "Id": "t1" + }, + "t2": { + "Name": "sample_table", + "ColIds": [ + "c8", + "c9", + "c10", + "c11", + "c12", + "c13", + "c14", + "c15", + "c16", + "c17", + "c18", + "c19", + "c20", + "c21", + "c22", + "c23", + "c24", + "c25", + "c26", + "c27", + "c28", + "c29", + "c30", + "c31", + "c32", + "c33", + "c34", + "c35", + "c37" + ], + "ShardIdColumn": "c37", + "ColDefs": { + "c10": { + "Name": "tinyint_column", + "T": { + "Name": "INT64", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: tinyint_column tinyint(3)", + "Id": "c10" + }, + "c11": { + "Name": "text_column", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: text_column text(65535)", + "Id": "c11" + }, + "c12": { + "Name": "date_column", + "T": { + "Name": "DATE", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: date_column date", + "Id": "c12" + }, + "c13": { + "Name": "smallint_column", + "T": { + "Name": "INT64", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: smallint_column smallint(5)", + "Id": "c13" + }, + "c14": { + "Name": "mediumint_column", + "T": { + "Name": "INT64", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: mediumint_column mediumint(7)", + "Id": "c14" + }, + "c15": { + "Name": "bigint_column", + "T": { + "Name": "INT64", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: bigint_column bigint(19)", + "Id": "c15" + }, + "c16": { + "Name": "float_column", + "T": { + "Name": "FLOAT64", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: float_column float(10,2)", + "Id": "c16" + }, + "c17": { + "Name": "double_column", + "T": { + "Name": "FLOAT64", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: double_column double(22)", + "Id": "c17" + }, + "c18": { + "Name": "decimal_column", + "T": { + "Name": "NUMERIC", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: decimal_column decimal(10,2)", + "Id": "c18" + }, + "c19": { + "Name": "datetime_column", + "T": { + "Name": "TIMESTAMP", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: datetime_column datetime", + "Id": "c19" + }, + "c20": { + "Name": "timestamp_column", + "T": { + "Name": "TIMESTAMP", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: timestamp_column timestamp", + "Id": "c20" + }, + "c21": { + "Name": "time_column", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: time_column time", + "Id": "c21" + }, + "c22": { + "Name": "year_column", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: year_column year", + "Id": "c22" + }, + "c23": { + "Name": "char_column", + "T": { + "Name": "STRING", + "Len": 10, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: char_column char(10)", + "Id": "c23" + }, + "c24": { + "Name": "tinyblob_column", + "T": { + "Name": "BYTES", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: tinyblob_column tinyblob(255)", + "Id": "c24" + }, + "c25": { + "Name": "tinytext_column", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: tinytext_column tinytext(255)", + "Id": "c25" + }, + "c26": { + "Name": "blob_column", + "T": { + "Name": "BYTES", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: blob_column blob(65535)", + "Id": "c26" + }, + "c27": { + "Name": "mediumblob_column", + "T": { + "Name": "BYTES", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: mediumblob_column mediumblob(16777215)", + "Id": "c27" + }, + "c28": { + "Name": "mediumtext_column", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: mediumtext_column mediumtext(16777215)", + "Id": "c28" + }, + "c29": { + "Name": "longblob_column", + "T": { + "Name": "BYTES", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: longblob_column longblob(4294967295)", + "Id": "c29" + }, + "c30": { + "Name": "longtext_column", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: longtext_column longtext(4294967295)", + "Id": "c30" + }, + "c31": { + "Name": "enum_column", + "T": { + "Name": "STRING", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: enum_column enum(1)", + "Id": "c31" + }, + "c32": { + "Name": "bool_column", + "T": { + "Name": "BOOL", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: bool_column tinyint(1)", + "Id": "c32" + }, + "c33": { + "Name": "binary_column", + "T": { + "Name": "BYTES", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: binary_column binary(20)", + "Id": "c33" + }, + "c34": { + "Name": "varbinary_column", + "T": { + "Name": "BYTES", + "Len": 9223372036854775807, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: varbinary_column varbinary(20)", + "Id": "c34" + }, + "c35": { + "Name": "update_ts", + "T": { + "Name": "TIMESTAMP", + "Len": 0, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: update_ts timestamp", + "Id": "c35" + }, + "c37": { + "Name": "migration_shard_id", + "T": { + "Name": "STRING", + "Len": 50, + "IsArray": false + }, + "NotNull": false, + "Comment": "", + "Id": "c37" + }, + "c8": { + "Name": "id", + "T": { + "Name": "INT64", + "Len": 0, + "IsArray": false + }, + "NotNull": true, + "Comment": "From: id int(10)", + "Id": "c8" + }, + "c9": { + "Name": "varchar_column", + "T": { + "Name": "STRING", + "Len": 20, + "IsArray": false + }, + "NotNull": false, + "Comment": "From: varchar_column varchar(20)", + "Id": "c9" + } + }, + "PrimaryKeys": [ + { + "ColId": "c8", + "Desc": false, + "Order": 2 + }, + { + "ColId": "c37", + "Desc": false, + "Order": 1 + } + ], + "ForeignKeys": null, + "Indexes": null, + "ParentId": "", + "Comment": "Spanner schema for source table sample_table", + "Id": "t2" + } + }, + "SyntheticPKeys": {}, + "SrcSchema": { + "t1": { + "Name": "Singers", + "Schema": "alltypes", + "ColIds": [ + "c3", + "c4", + "c5", + "c6", + "c7" + ], + "ColDefs": { + "c3": { + "Name": "SingerId", + "Type": { + "Name": "bigint", + "Mods": [ + 19 + ], + "ArrayBounds": null + }, + "NotNull": true, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c3" + }, + "c4": { + "Name": "FirstName", + "Type": { + "Name": "text", + "Mods": [ + 65535 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c4" + }, + "c5": { + "Name": "LastName", + "Type": { + "Name": "text", + "Mods": [ + 65535 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c5" + }, + "c6": { + "Name": "shardId", + "Type": { + "Name": "varchar", + "Mods": [ + 20 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c6" + }, + "c7": { + "Name": "update_ts", + "Type": { + "Name": "timestamp", + "Mods": null, + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c7" + } + }, + "PrimaryKeys": [ + { + "ColId": "c3", + "Desc": false, + "Order": 1 + } + ], + "ForeignKeys": null, + "Indexes": null, + "Id": "t1" + }, + "t2": { + "Name": "sample_table", + "Schema": "alltypes", + "ColIds": [ + "c8", + "c9", + "c10", + "c11", + "c12", + "c13", + "c14", + "c15", + "c16", + "c17", + "c18", + "c19", + "c20", + "c21", + "c22", + "c23", + "c24", + "c25", + "c26", + "c27", + "c28", + "c29", + "c30", + "c31", + "c32", + "c33", + "c34", + "c35" + ], + "ColDefs": { + "c10": { + "Name": "tinyint_column", + "Type": { + "Name": "tinyint", + "Mods": [ + 3 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c10" + }, + "c11": { + "Name": "text_column", + "Type": { + "Name": "text", + "Mods": [ + 65535 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c11" + }, + "c12": { + "Name": "date_column", + "Type": { + "Name": "date", + "Mods": null, + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c12" + }, + "c13": { + "Name": "smallint_column", + "Type": { + "Name": "smallint", + "Mods": [ + 5 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c13" + }, + "c14": { + "Name": "mediumint_column", + "Type": { + "Name": "mediumint", + "Mods": [ + 7 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c14" + }, + "c15": { + "Name": "bigint_column", + "Type": { + "Name": "bigint", + "Mods": [ + 19 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c15" + }, + "c16": { + "Name": "float_column", + "Type": { + "Name": "float", + "Mods": [ + 10, + 2 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c16" + }, + "c17": { + "Name": "double_column", + "Type": { + "Name": "double", + "Mods": [ + 22 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c17" + }, + "c18": { + "Name": "decimal_column", + "Type": { + "Name": "decimal", + "Mods": [ + 10, + 2 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c18" + }, + "c19": { + "Name": "datetime_column", + "Type": { + "Name": "datetime", + "Mods": null, + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c19" + }, + "c20": { + "Name": "timestamp_column", + "Type": { + "Name": "timestamp", + "Mods": null, + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c20" + }, + "c21": { + "Name": "time_column", + "Type": { + "Name": "time", + "Mods": null, + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c21" + }, + "c22": { + "Name": "year_column", + "Type": { + "Name": "year", + "Mods": null, + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c22" + }, + "c23": { + "Name": "char_column", + "Type": { + "Name": "char", + "Mods": [ + 10 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c23" + }, + "c24": { + "Name": "tinyblob_column", + "Type": { + "Name": "tinyblob", + "Mods": [ + 255 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c24" + }, + "c25": { + "Name": "tinytext_column", + "Type": { + "Name": "tinytext", + "Mods": [ + 255 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c25" + }, + "c26": { + "Name": "blob_column", + "Type": { + "Name": "blob", + "Mods": [ + 65535 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c26" + }, + "c27": { + "Name": "mediumblob_column", + "Type": { + "Name": "mediumblob", + "Mods": [ + 16777215 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c27" + }, + "c28": { + "Name": "mediumtext_column", + "Type": { + "Name": "mediumtext", + "Mods": [ + 16777215 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c28" + }, + "c29": { + "Name": "longblob_column", + "Type": { + "Name": "longblob", + "Mods": [ + 4294967295 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c29" + }, + "c30": { + "Name": "longtext_column", + "Type": { + "Name": "longtext", + "Mods": [ + 4294967295 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c30" + }, + "c31": { + "Name": "enum_column", + "Type": { + "Name": "enum", + "Mods": [ + 1 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c31" + }, + "c32": { + "Name": "bool_column", + "Type": { + "Name": "tinyint", + "Mods": [ + 1 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c32" + }, + "c33": { + "Name": "binary_column", + "Type": { + "Name": "binary", + "Mods": [ + 20 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c33" + }, + "c34": { + "Name": "varbinary_column", + "Type": { + "Name": "varbinary", + "Mods": [ + 20 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c34" + }, + "c35": { + "Name": "update_ts", + "Type": { + "Name": "timestamp", + "Mods": null, + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c35" + }, + "c8": { + "Name": "id", + "Type": { + "Name": "int", + "Mods": [ + 10 + ], + "ArrayBounds": null + }, + "NotNull": true, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c8" + }, + "c9": { + "Name": "varchar_column", + "Type": { + "Name": "varchar", + "Mods": [ + 20 + ], + "ArrayBounds": null + }, + "NotNull": false, + "Ignored": { + "Check": false, + "Identity": false, + "Default": false, + "Exclusion": false, + "ForeignKey": false, + "AutoIncrement": false + }, + "Id": "c9" + } + }, + "PrimaryKeys": [ + { + "ColId": "c8", + "Desc": false, + "Order": 1 + } + ], + "ForeignKeys": null, + "Indexes": null, + "Id": "t2" + } + }, + "SchemaIssues": { + "t1": { + "ColumnLevelIssues": { + "c3": [], + "c36": [ + 28 + ], + "c4": [], + "c5": [], + "c6": [], + "c7": [] + }, + "TableLevelIssues": null + }, + "t2": { + "ColumnLevelIssues": { + "c10": [ + 13 + ], + "c13": [ + 13 + ], + "c14": [ + 13 + ], + "c16": [ + 13 + ], + "c19": [ + 12 + ], + "c21": [ + 14 + ], + "c22": [ + 14 + ], + "c37": [ + 28 + ], + "c8": [ + 13 + ] + }, + "TableLevelIssues": null + } + }, + "Location": {}, + "TimezoneOffset": "+00:00", + "SpDialect": "google_standard_sql", + "UniquePKey": {}, + "Rules": [ + { + "Id": "r38", + "Name": "r38", + "Type": "add_shard_id_primary_key", + "ObjectType": "", + "AssociatedObjects": "All Tables", + "Enabled": true, + "Data": { + "AddedAtTheStart": true + }, + "AddedOn": { + "TimeOffset": null + } + } + ], + "IsSharded": true + } \ No newline at end of file diff --git a/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql new file mode 100644 index 0000000000..0be1967989 --- /dev/null +++ b/v2/spanner-change-streams-to-sharded-file-sink/src/test/resources/SpannerChangeStreamToGcsSimpleIT/spanner-schema-simple.sql @@ -0,0 +1,49 @@ +CREATE TABLE Singers ( + SingerId INT64 NOT NULL, + FirstName STRING(MAX), + LastName STRING(MAX), + shardId STRING(20), + update_ts TIMESTAMP, + migration_shard_id STRING(50), +) PRIMARY KEY(SingerId, migration_shard_id); + + +CREATE TABLE sample_table ( + id INT64 NOT NULL, + varchar_column STRING(20), + tinyint_column INT64, + text_column STRING(MAX), + date_column DATE, + smallint_column INT64, + mediumint_column INT64, + bigint_column INT64, + float_column FLOAT64, + double_column FLOAT64, + decimal_column NUMERIC, + datetime_column TIMESTAMP, + timestamp_column TIMESTAMP, + time_column STRING(MAX), + year_column STRING(MAX), + char_column STRING(10), + tinyblob_column BYTES(MAX), + tinytext_column STRING(MAX), + blob_column BYTES(MAX), + mediumblob_column BYTES(MAX), + mediumtext_column STRING(MAX), + longblob_column BYTES(MAX), + longtext_column STRING(MAX), + enum_column STRING(MAX), + bool_column BOOL, + binary_column BYTES(MAX), + varbinary_column BYTES(MAX), + update_ts TIMESTAMP, + migration_shard_id STRING(50), +) PRIMARY KEY(id, migration_shard_id); + + + +CREATE CHANGE STREAM allstream + FOR ALL OPTIONS ( + value_capture_type = 'NEW_ROW', + retention_period = '7d' +); \ No newline at end of file From 73cc74c3140a3f8fa280143a9455e498981f44cd Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Tue, 14 May 2024 10:45:47 -0400 Subject: [PATCH 36/70] Add guidance on deprecating parameters --- contributor-docs/code-contributions.md | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/contributor-docs/code-contributions.md b/contributor-docs/code-contributions.md index d22e6b6596..dbcd309025 100644 --- a/contributor-docs/code-contributions.md +++ b/contributor-docs/code-contributions.md @@ -323,6 +323,33 @@ BulkInsertMethodOptions getBulkInsertMethod(); Note: `order` is relevant for templates that can be used from the UI, and specify the relative order of parameters. +#### Template Parameter Compatibility/Deprecation + +Dataflow Templates attempt to maintain full backwards compatibility with previous versions of the template, such that templates launched by automation or a schedule will continue to function indefinitely. If you need to deprecate a TemplateParameter, you can do so by adding the `@Deprecated` annotation to the template, for example: + +```java +@TemplateParameter.Text( + order = 21, + optional = true, + description = "Deprecated, use XYZ instead", + helpText = "This parameter has been deprecated, please use XYZ parameter instead") +@Deprecated +BulkInsertMethodOptions getBulkInsertMethod(); +``` + +You can optionally add a `hiddenUi` attribute to hide it in the UI. This will still keep it available via gcloud/REST API calls: + +```java +@TemplateParameter.Text( + order = 21, + optional = true, + description = "Deprecated, use XYZ instead", + helpText = "This parameter has been deprecated, please use XYZ parameter instead", + hiddenUi = true) +@Deprecated +BulkInsertMethodOptions getBulkInsertMethod(); +``` + #### @TemplateIntegrationTest Annotation This annotation should be used by classes that are used for integration tests of From 60d9dd742ffa88427c92d4ecad3cf5a347438996 Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Tue, 14 May 2024 16:51:03 -0400 Subject: [PATCH 37/70] Add more guidance on adding new templates --- .../add-flex-template.md | 2 +- .../add-load-test.md | 2 +- contributor-docs/code-contributions.md | 24 ++++++++++++++++++ contributor-docs/maintainers-guide.md | 5 ++-- tutorials/README.md | 25 ------------------- 5 files changed, 29 insertions(+), 29 deletions(-) rename tutorials/flex-template.md => contributor-docs/add-flex-template.md (99%) rename tutorials/load-test.md => contributor-docs/add-load-test.md (99%) delete mode 100644 tutorials/README.md diff --git a/tutorials/flex-template.md b/contributor-docs/add-flex-template.md similarity index 99% rename from tutorials/flex-template.md rename to contributor-docs/add-flex-template.md index 8646ca5ef0..ab77d1f8e3 100644 --- a/tutorials/flex-template.md +++ b/contributor-docs/add-flex-template.md @@ -1,4 +1,4 @@ -# Flex Template Tutorial +# Adding a Flex Template ## Overview diff --git a/tutorials/load-test.md b/contributor-docs/add-load-test.md similarity index 99% rename from tutorials/load-test.md rename to contributor-docs/add-load-test.md index 6b04e7f275..a1dd508806 100644 --- a/tutorials/load-test.md +++ b/contributor-docs/add-load-test.md @@ -1,4 +1,4 @@ -# Load Test Tutorial +# Adding a Load Test ## Overview diff --git a/contributor-docs/code-contributions.md b/contributor-docs/code-contributions.md index dbcd309025..b50aea0849 100644 --- a/contributor-docs/code-contributions.md +++ b/contributor-docs/code-contributions.md @@ -243,6 +243,30 @@ The parameter `-Dtest=` can be given to test a single class (e.g., `-Dtest=Pubsu The same happens when the test is executed from an IDE, just make sure to add the parameters `-Dproject=`, `-DartifactBucket=` and `-Dregion=` as program or VM arguments. +### Running Load Tests + +For information on adding and running load tests, see [Adding a Load Test](./add-load-test.md). + +### Adding New Templates + +If you are interested in introducing a new template, please file an issue using the [Google Issue Tracker](https://issuetracker.google.com/issues/new?component=187168&template=0) before doing so. You need approval before moving forward. Any new templates must be flex templates in the v2 directory. + +Once you have approval, follow these steps from the root of the repo to get started on your template. + +First, install the [maven plugin](#templates-plugin), then create a subdirectory in the repository: `v2/`. `` should follow the pattern `-to-`. From there, you can follow the steps in [Adding a Flex Template](./add-flex-template.md) to develop your template. + +All new templates must comply with the following guidance: + +- [ ] Template addition has been approved by the core Dataflow Templates team. +- [ ] The template must be a Flex Template located in the `v2/