From 7980113f1c02169079bf1a99b027ad4d1454e2f7 Mon Sep 17 00:00:00 2001 From: Ryan Kuo Date: Thu, 6 Jun 2024 14:33:04 -0400 Subject: [PATCH] move MOLT docs into molt directory; add redirects --- src/current/_data/redirects.yml | 17 +- .../_includes/v23.1/sidebar-data/migrate.json | 4 +- .../_includes/v23.2/sidebar-data/migrate.json | 6 +- .../_includes/v24.1/sidebar-data/migrate.json | 6 +- .../{v24.1 => molt}/live-migration-service.md | 44 +- src/current/{v24.1 => molt}/molt-fetch.md | 122 +-- src/current/{v24.1 => molt}/molt-verify.md | 30 +- src/current/v23.1/live-migration-service.md | 676 ------------- src/current/v23.1/migrate-from-mysql.md | 10 +- src/current/v23.1/migrate-from-postgres.md | 6 +- src/current/v23.1/migration-overview.md | 12 +- src/current/v23.1/molt-verify.md | 110 --- src/current/v23.2/live-migration-service.md | 892 ------------------ src/current/v23.2/migrate-from-mysql.md | 10 +- src/current/v23.2/migrate-from-postgres.md | 6 +- src/current/v23.2/migration-overview.md | 29 +- src/current/v23.2/molt-fetch.md | 750 --------------- src/current/v23.2/molt-verify.md | 135 --- src/current/v24.1/migrate-from-mysql.md | 10 +- src/current/v24.1/migrate-from-postgres.md | 6 +- src/current/v24.1/migration-overview.md | 29 +- 21 files changed, 182 insertions(+), 2728 deletions(-) rename src/current/{v24.1 => molt}/live-migration-service.md (93%) rename src/current/{v24.1 => molt}/molt-fetch.md (87%) rename src/current/{v24.1 => molt}/molt-verify.md (76%) delete mode 100644 src/current/v23.1/live-migration-service.md delete mode 100644 src/current/v23.1/molt-verify.md delete mode 100644 src/current/v23.2/live-migration-service.md delete mode 100644 src/current/v23.2/molt-fetch.md delete mode 100644 src/current/v23.2/molt-verify.md diff --git a/src/current/_data/redirects.yml b/src/current/_data/redirects.yml index e210ae50b86..3e71eeede2d 100644 --- a/src/current/_data/redirects.yml +++ b/src/current/_data/redirects.yml @@ -77,7 +77,22 @@ - destination: advisories/index.md sources: ['advisories/advisories.md'] -# Pages undergoing maintenance +- destination: molt/molt-fetch.md + sources: + - v23.2/molt-fetch.md + - v24.1/molt-fetch.md + +- destination: molt/molt-verify.md + sources: + - v23.1/molt-verify.md + - v23.2/molt-verify.md + - v24.1/molt-verify.md + +- destination: molt/live-migration-service.md + sources: + - v23.1/live-migration-service.md + - v23.2/live-migration-service.md + - v24.1/live-migration-service.md # Renamed pages diff --git a/src/current/_includes/v23.1/sidebar-data/migrate.json b/src/current/_includes/v23.1/sidebar-data/migrate.json index 92a72d90371..071a8e4acc3 100644 --- a/src/current/_includes/v23.1/sidebar-data/migrate.json +++ b/src/current/_includes/v23.1/sidebar-data/migrate.json @@ -20,13 +20,13 @@ { "title": "Verify", "urls": [ - "/${VERSION}/molt-verify.html" + "/molt/molt-verify.html" ] }, { "title": "Live Migration Service", "urls": [ - "/${VERSION}/live-migration-service.html" + "/molt/live-migration-service.html" ] } ] diff --git a/src/current/_includes/v23.2/sidebar-data/migrate.json b/src/current/_includes/v23.2/sidebar-data/migrate.json index 5b6338a628b..e673eb97005 100644 --- a/src/current/_includes/v23.2/sidebar-data/migrate.json +++ b/src/current/_includes/v23.2/sidebar-data/migrate.json @@ -20,19 +20,19 @@ { "title": "Fetch", "urls": [ - "/${VERSION}/molt-fetch.html" + "/molt/molt-fetch.html" ] }, { "title": "Verify", "urls": [ - "/${VERSION}/molt-verify.html" + "/molt/molt-verify.html" ] }, { "title": "Live Migration Service", "urls": [ - "/${VERSION}/live-migration-service.html" + "/molt/live-migration-service.html" ] } ] diff --git a/src/current/_includes/v24.1/sidebar-data/migrate.json b/src/current/_includes/v24.1/sidebar-data/migrate.json index 5b6338a628b..e673eb97005 100644 --- a/src/current/_includes/v24.1/sidebar-data/migrate.json +++ b/src/current/_includes/v24.1/sidebar-data/migrate.json @@ -20,19 +20,19 @@ { "title": "Fetch", "urls": [ - "/${VERSION}/molt-fetch.html" + "/molt/molt-fetch.html" ] }, { "title": "Verify", "urls": [ - "/${VERSION}/molt-verify.html" + "/molt/molt-verify.html" ] }, { "title": "Live Migration Service", "urls": [ - "/${VERSION}/live-migration-service.html" + "/molt/live-migration-service.html" ] } ] diff --git a/src/current/v24.1/live-migration-service.md b/src/current/molt/live-migration-service.md similarity index 93% rename from src/current/v24.1/live-migration-service.md rename to src/current/molt/live-migration-service.md index 8bd9808e340..c44665f432e 100644 --- a/src/current/v24.1/live-migration-service.md +++ b/src/current/molt/live-migration-service.md @@ -9,7 +9,7 @@ docs_area: migrate {% include feature-phases/preview.md %} {{site.data.alerts.end}} -MOLT LMS (Live Migration Service) is used during a [live migration]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime) to CockroachDB. +MOLT LMS (Live Migration Service) is used during a [live migration]({% link {{site.current_cloud_version}}/migration-overview.md %}#minimal-downtime) to CockroachDB. The LMS is a self-hosted, horizontally scalable proxy that routes traffic between an application, a source database, and a target CockroachDB database. You use the LMS to control which database, as the "source of truth", is serving reads and writes to an application. You can optionally configure the LMS to [shadow production traffic](#shadowing-modes) from the source database and validate the query results on CockroachDB. When you have sufficiently tested your application and are confident with its consistency and performance on CockroachDB, you use the LMS to [perform the cutover](#perform-a-cutover) to CockroachDB. @@ -18,7 +18,7 @@ MOLT LMS is self-hosted on [Kubernetes](https://kubernetes.io/) and [configured - A number of proxy [instances](#lms-instances) (running in separate Kubernetes pods) across which application traffic is distributed and routed to the source and target databases. - An "orchestrator" service (running in a single Kubernetes pod) that coordinates the proxy instances and sends the cutover commands. -This page describes how to [install](#installation), [configure](#configuration), [secure](#security), and [use the LMS](#molt-lms-cli) to perform a live migration. {% comment %}For more information, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}).{% endcomment %} +This page describes how to [install](#installation), [configure](#configuration), [secure](#security), and [use the LMS](#molt-lms-cli) to perform a live migration. {% comment %}For more information, see [Migration Strategy: Live Migration]({% link {{site.current_cloud_version}}/migration-strategy-live-migration.md %}).{% endcomment %} For a demo of the Live Migration Service in action, watch the following video: @@ -37,8 +37,8 @@ For a demo of the Live Migration Service in action, watch the following video: #### Supported database technologies -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) (source) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) (source) +- [PostgreSQL]({% link {{site.current_cloud_version}}/migrate-from-postgres.md %}) (source) +- [MySQL]({% link {{site.current_cloud_version}}/migrate-from-mysql.md %}) (source) - CockroachDB (source and target) ## Installation @@ -261,7 +261,7 @@ spec: The connection strings are specified with the following keys inside `config.json`: - `INIT_SOURCE`: External connection string for the source database, including the paths to your client certificate and keys. -- `INIT_TARGET`: External [connection string for the CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. +- `INIT_TARGET`: External [connection string for the CockroachDB database]({% link {{site.current_cloud_version}}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. The remote secret `lms-secret` will contain the full connection strings and paths, such that the `config.json` keys resolve to: @@ -326,7 +326,7 @@ The connection strings are specified with the following keys inside `config.json If you named the release `lms` during [installation](#installation), exclude `{releasename}-` from the LMS connection string. {{site.data.alerts.end}} -- `CRDB_URL`: External [connection string for the CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. +- `CRDB_URL`: External [connection string for the CockroachDB database]({% link {{site.current_cloud_version}}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. The remote secret `orch-secret` will contain the full connection strings, such that the `config.json` keys resolve to: @@ -644,16 +644,16 @@ You can use this mode to perform an [immediate cutover](#immediate-cutover). ### Consistent cutover -A consistent cutover maintains data consistency with [minimal downtime]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime). The goal of consistent cutover is to stop application traffic long enough for replication to catch up and ensure that the cutover achieves consistency across the two databases. +A consistent cutover maintains data consistency with [minimal downtime]({% link {{site.current_cloud_version}}/migration-overview.md %}#minimal-downtime). The goal of consistent cutover is to stop application traffic long enough for replication to catch up and ensure that the cutover achieves consistency across the two databases. When using the LMS, consistent cutover is handled using the [`molt-lms-cli`](#molt-lms-cli) commands `cutover consistent begin` and `cutover consistent commit`, during which application requests are queued and will be responded to after cutover. This delay in response time is related to the maximum duration of any transactions and queries that need to complete, and the time it takes for replication to catch up from the source to the target database. {% comment %} -For more information about the consistent cutover approach, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}). +For more information about the consistent cutover approach, see [Migration Strategy: Live Migration]({% link {{site.current_cloud_version}}/migration-strategy-live-migration.md %}). {% endcomment %} {{site.data.alerts.callout_info}} -These steps assume you have already followed the overall steps to [prepare for migration]({% link {{ page.version.version }}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{ page.version.version }}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. +These steps assume you have already followed the overall steps to [prepare for migration]({% link {{site.current_cloud_version}}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{site.current_cloud_version}}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. {{site.data.alerts.end}} To perform a consistent cutover with the LMS: @@ -672,7 +672,7 @@ To perform a consistent cutover with the LMS: 1. Send application requests to the LMS, which routes the traffic to the source database. The source database is designated the source of truth. -1. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. +1. Use [MOLT Verify]({% link molt/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. 1. Begin the consistent cutover. **Requests are now queued in the LMS**, including queries from existing connections and new connection requests to the LMS: @@ -720,7 +720,7 @@ To perform a consistent cutover with the LMS: To verify that CockroachDB is now the source of truth, you can run `molt-lms-cli status`. -1. Again, use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the data on the source database and CockroachDB are consistent. +1. Again, use [MOLT Verify]({% link molt/molt-verify.md %}) to validate that the data on the source database and CockroachDB are consistent. If any problems arise during a consistent cutover: @@ -740,14 +740,14 @@ If any problems arise during a consistent cutover: {% comment %} ### Immediate cutover -An immediate cutover can potentially [reduce downtime to zero]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime), at the likely risk of introducing data inconsistencies between the source and target databases. The LMS is configured to dual write to the source and target databases, while the [`molt-lms-cli`](#molt-lms-cli) command `cutover immediate` initiates cutover. +An immediate cutover can potentially [reduce downtime to zero]({% link {{site.current_cloud_version}}/migration-overview.md %}#minimal-downtime), at the likely risk of introducing data inconsistencies between the source and target databases. The LMS is configured to dual write to the source and target databases, while the [`molt-lms-cli`](#molt-lms-cli) command `cutover immediate` initiates cutover. -For more information about the immediate cutover approach, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}). +For more information about the immediate cutover approach, see [Migration Strategy: Live Migration]({% link {{site.current_cloud_version}}/migration-strategy-live-migration.md %}). To perform an immediate cutover with the LMS: {{site.data.alerts.callout_info}} -These steps assume you have already followed the overall steps to [prepare for migration]({% link {{ page.version.version }}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{ page.version.version }}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. +These steps assume you have already followed the overall steps to [prepare for migration]({% link {{site.current_cloud_version}}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{site.current_cloud_version}}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. {{site.data.alerts.end}} 1. [Configure the LMS](#configuration) with your deployment details, and follow our [security recommendations](#security). @@ -756,9 +756,9 @@ These steps assume you have already followed the overall steps to [prepare for m 1. Send application requests to the LMS, which routes the traffic to the source database and to CockroachDB. The source database is designated the source of truth. -1. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. +1. Use [MOLT Verify]({% link molt/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. - To ensure data integrity, shadowing must be enabled for a sufficient duration with a low error rate. All LMS instances should have been continuously shadowing your workload for the past **seven days** at minimum, with only transient inconsistencies caused by events such as [transaction retry errors]({% link {{ page.version.version }}/transaction-retry-error-reference.md %}). The longer shadowing has been enabled, the better this allows you to evaluate consistency. + To ensure data integrity, shadowing must be enabled for a sufficient duration with a low error rate. All LMS instances should have been continuously shadowing your workload for the past **seven days** at minimum, with only transient inconsistencies caused by events such as [transaction retry errors]({% link {{site.current_cloud_version}}/transaction-retry-error-reference.md %}). The longer shadowing has been enabled, the better this allows you to evaluate consistency. 1. Once nearly all data from the source database is replicated to CockroachDB (for example, with a <1 second delay or <1000 rows), initiate the cutover: @@ -769,7 +769,7 @@ These steps assume you have already followed the overall steps to [prepare for m This command tells the LMS to switch the source of truth to CockroachDB. Application traffic is immediately directed to CockroachDB. -1. Any writes that were made during the cutover will have been missed on CockroachDB. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to identify the inconsistencies. These will need to be manually reconciled. +1. Any writes that were made during the cutover will have been missed on CockroachDB. Use [MOLT Verify]({% link molt/molt-verify.md %}) to identify the inconsistencies. These will need to be manually reconciled. {% endcomment %} ### Monitor cutover @@ -884,9 +884,9 @@ Individual LMS Proxy Status. ## See also -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -{% comment %}- [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}){% endcomment %} +- [Migration Overview]({% link {{site.current_cloud_version}}/migration-overview.md %}) +{% comment %}- [Migration Strategy: Live Migration]({% link {{site.current_cloud_version}}/migration-strategy-live-migration.md %}){% endcomment %} - [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) +- [MOLT Verify]({% link molt/molt-verify.md %}) +- [Migrate from PostgreSQL]({% link {{site.current_cloud_version}}/migrate-from-postgres.md %}) +- [Migrate from MySQL]({% link {{site.current_cloud_version}}/migrate-from-mysql.md %}) diff --git a/src/current/v24.1/molt-fetch.md b/src/current/molt/molt-fetch.md similarity index 87% rename from src/current/v24.1/molt-fetch.md rename to src/current/molt/molt-fetch.md index 1438b0b30c6..227f4a061c7 100644 --- a/src/current/v24.1/molt-fetch.md +++ b/src/current/molt/molt-fetch.md @@ -5,16 +5,16 @@ toc: true docs_area: migrate --- -MOLT Fetch moves data from a source database into CockroachDB as part of a [database migration]({% link {{ page.version.version }}/migration-overview.md %}). +MOLT Fetch moves data from a source database into CockroachDB as part of a [database migration]({% link {{site.current_cloud_version}}/migration-overview.md %}). -MOLT Fetch uses [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}) or [`COPY FROM`]({% link {{ page.version.version }}/copy-from.md %}) to move the source data to cloud storage (Google Cloud Storage or Amazon S3), a local file server, or local memory. Once the data is exported, MOLT Fetch loads the data onto a target CockroachDB database. For details, see [Usage](#usage). +MOLT Fetch uses [`IMPORT INTO`]({% link {{site.current_cloud_version}}/import-into.md %}) or [`COPY FROM`]({% link {{site.current_cloud_version}}/copy-from.md %}) to move the source data to cloud storage (Google Cloud Storage or Amazon S3), a local file server, or local memory. Once the data is exported, MOLT Fetch loads the data onto a target CockroachDB database. For details, see [Usage](#usage). ## Supported databases The following source databases are currently supported: -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) +- [PostgreSQL]({% link {{site.current_cloud_version}}/migrate-from-postgres.md %}) +- [MySQL]({% link {{site.current_cloud_version}}/migrate-from-mysql.md %}) - CockroachDB ## Installation @@ -41,7 +41,7 @@ Complete the following items before using MOLT Fetch: - Ensure that the source and target schemas are identical, unless you enable automatic schema creation with the [`'drop-on-target-and-recreate'`](#target-table-handling) option. If you are creating the target schema manually, review the behaviors in [Mismatch handling](#mismatch-handling). -- Ensure that the SQL user running MOLT Fetch has [`SELECT` privileges]({% link {{ page.version.version }}/grant.md %}#supported-privileges) on the source and target CockroachDB databases, along with the required privileges to run [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}#required-privileges) or [`COPY FROM`]({% link {{ page.version.version }}/copy-from.md %}#required-privileges) (depending on the [fetch mode](#fetch-mode)) on CockroachDB, as described on their respective pages. +- Ensure that the SQL user running MOLT Fetch has [`SELECT` privileges]({% link {{site.current_cloud_version}}/grant.md %}#supported-privileges) on the source and target CockroachDB databases, along with the required privileges to run [`IMPORT INTO`]({% link {{site.current_cloud_version}}/import-into.md %}#required-privileges) or [`COPY FROM`]({% link {{site.current_cloud_version}}/copy-from.md %}#required-privileges) (depending on the [fetch mode](#fetch-mode)) on CockroachDB, as described on their respective pages. - If you plan to use continuous replication (using either [`--ongoing-replication`](#replication) or the [CDC cursor](#cdc-cursor)): @@ -49,7 +49,7 @@ Complete the following items before using MOLT Fetch: - If you are migrating from MySQL, enable [GTID](https://dev.mysql.com/doc/refman/8.0/en/replication-options-gtids.html) consistency. In `mysql.cnf`, in the SQL shell, or as flags in the `mysql` start command, set `gtid-mode` and `enforce-gtid-consistency` to `ON` and set `binlog_row_metadata` to `full`. -- Percent-encode the connection strings for the source database and [CockroachDB]({% link {{ page.version.version }}/connect-to-the-database.md %}). This ensures that the MOLT tools can parse special characters in your password. +- Percent-encode the connection strings for the source database and [CockroachDB]({% link {{site.current_cloud_version}}/connect-to-the-database.md %}). This ensures that the MOLT tools can parse special characters in your password. - Given a password `a$52&`, pass it to the `molt escape-password` command with single quotes: @@ -113,7 +113,7 @@ Complete the following items before using MOLT Fetch: - If a table in the source database is much larger than the other tables, [filter and export the largest table](#schema-and-table-selection) in its own `molt fetch` task. Repeat this for each of the largest tables. Then export the remaining tables in another task. -- When using [`IMPORT INTO` mode](#fetch-mode) to load tables into CockroachDB, if the fetch process terminates before the import job completes, the hanging import job on the target database will keep the table offline. To make this table accessible again, [manually resume or cancel the job]({% link {{ page.version.version }}/import-into.md %}#view-and-control-import-jobs). Then resume `molt fetch` using [continuation](#fetch-continuation), or restart the process from the beginning. +- When using [`IMPORT INTO` mode](#fetch-mode) to load tables into CockroachDB, if the fetch process terminates before the import job completes, the hanging import job on the target database will keep the table offline. To make this table accessible again, [manually resume or cancel the job]({% link {{site.current_cloud_version}}/import-into.md %}#view-and-control-import-jobs). Then resume `molt fetch` using [continuation](#fetch-continuation), or restart the process from the beginning. ## Security recommendations @@ -121,7 +121,7 @@ Cockroach Labs **strongly** recommends the following: ### Secure connections -- Use secure connections to the source and [target CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#additional-connection-parameters) whenever possible. +- Use secure connections to the source and [target CockroachDB database]({% link {{site.current_cloud_version}}/connection-parameters.md %}#additional-connection-parameters) whenever possible. - By default, insecure connections (i.e., `sslmode=disable` on PostgreSQL; `sslmode` not set on MySQL) are disallowed. When using an insecure connection, `molt fetch` returns an error. To override this check, you can enable the `--allow-tls-mode-disable` flag. Do this **only** for testing, or if a secure SSL/TLS connection to the source or target database is not possible. ### Connection strings @@ -246,7 +246,7 @@ MySQL: --source 'mysql://{username}:{password}@{protocol}({host}:{port})/{database}' ~~~ -`--target` specifies the [CockroachDB connection string]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url): +`--target` specifies the [CockroachDB connection string]({% link {{site.current_cloud_version}}/connection-parameters.md %}#connect-using-a-url): {% include_cached copy-clipboard.html %} ~~~ @@ -255,16 +255,16 @@ MySQL: ### Fetch mode -MOLT Fetch can use either [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}) or [`COPY FROM`]({% link {{ page.version.version }}/copy-from.md %}) to load data into CockroachDB. +MOLT Fetch can use either [`IMPORT INTO`]({% link {{site.current_cloud_version}}/import-into.md %}) or [`COPY FROM`]({% link {{site.current_cloud_version}}/copy-from.md %}) to load data into CockroachDB. By default, MOLT Fetch uses `IMPORT INTO`: -- `IMPORT INTO` mode achieves the highest throughput, but [requires taking the tables **offline**]({% link {{ page.version.version }}/import-into.md %}#considerations) to achieve its import speed. Tables are taken back online once an [import job]({% link {{ page.version.version }}/import-into.md %}#view-and-control-import-jobs) completes successfully. See [Best practices](#best-practices). +- `IMPORT INTO` mode achieves the highest throughput, but [requires taking the tables **offline**]({% link {{site.current_cloud_version}}/import-into.md %}#considerations) to achieve its import speed. Tables are taken back online once an [import job]({% link {{site.current_cloud_version}}/import-into.md %}#view-and-control-import-jobs) completes successfully. See [Best practices](#best-practices). - `IMPORT INTO` mode supports compression using the `--compression` flag, which reduces the amount of storage used. `--use-copy` configures MOLT Fetch to use `COPY FROM`: -- `COPY FROM` mode enables your tables to remain online and accessible. However, it is slower than using [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}). +- `COPY FROM` mode enables your tables to remain online and accessible. However, it is slower than using [`IMPORT INTO`]({% link {{site.current_cloud_version}}/import-into.md %}). - `COPY FROM` mode does not support compression. {{site.data.alerts.callout_info}} @@ -301,7 +301,7 @@ Cloud storage can be used with either the [`IMPORT INTO` or `COPY FROM` modes](# #### Local file server -`--local-path` specifies that MOLT Fetch should write intermediate files to a path within a [local file server]({% link {{ page.version.version }}/use-a-local-file-server.md %}). `local-path-listen-addr` specifies the address of the local file server. For example: +`--local-path` specifies that MOLT Fetch should write intermediate files to a path within a [local file server]({% link {{site.current_cloud_version}}/use-a-local-file-server.md %}). `local-path-listen-addr` specifies the address of the local file server. For example: {% include_cached copy-clipboard.html %} ~~~ @@ -368,7 +368,7 @@ To load the data without changing the existing data in the tables, use `'none'`: --table-handling 'none' ~~~ -To [truncate]({% link {{ page.version.version }}/truncate.md %}) tables before loading the data, use `'truncate-if-exists'`: +To [truncate]({% link {{site.current_cloud_version}}/truncate.md %}) tables before loading the data, use `'truncate-if-exists'`: {% include_cached copy-clipboard.html %} ~~~ @@ -396,7 +396,7 @@ This does not apply when [`'drop-on-target-and-recreate'`](#target-table-handlin - A source table is missing a primary key. - A source and table primary key have mismatching types. -- A [`STRING`]({% link {{ page.version.version }}/string.md %}) primary key has a different [collation]({% link {{ page.version.version }}/collate.md %}) on the source and target. +- A [`STRING`]({% link {{site.current_cloud_version}}/string.md %}) primary key has a different [collation]({% link {{site.current_cloud_version}}/collate.md %}) on the source and target. - A source and target column have mismatching types that are not [allowable mappings](#type-mapping). - A target table is missing a column that is in the corresponding source table. - A source column is nullable, but the corresponding target column is not nullable (i.e., the constraint is more strict on the target). @@ -405,54 +405,54 @@ This does not apply when [`'drop-on-target-and-recreate'`](#target-table-handlin - A target table has a column that is not in the corresponding source table. - A source column has a `NOT NULL` constraint, and the corresponding target column is nullable (i.e., the constraint is less strict on the target). -- A [`DEFAULT`]({% link {{ page.version.version }}/default-value.md %}), [`CHECK`]({% link {{ page.version.version }}/check.md %}), [`FOREIGN KEY`]({% link {{ page.version.version }}/foreign-key.md %}), or [`UNIQUE`]({% link {{ page.version.version }}/unique.md %}) constraint is specified on a target column and not on the source column. +- A [`DEFAULT`]({% link {{site.current_cloud_version}}/default-value.md %}), [`CHECK`]({% link {{site.current_cloud_version}}/check.md %}), [`FOREIGN KEY`]({% link {{site.current_cloud_version}}/foreign-key.md %}), or [`UNIQUE`]({% link {{site.current_cloud_version}}/unique.md %}) constraint is specified on a target column and not on the source column. #### Type mapping If [`'drop-on-target-and-recreate'`](#target-table-handling) is set, MOLT Fetch automatically creates a CockroachDB schema that is compatible with the source data. The column types are determined as follows: -- PostgreSQL types are mapped to existing CockroachDB [types]({% link {{ page.version.version }}/data-types.md %}) that have the same [`OID`]({% link {{ page.version.version }}/oid.md %}). +- PostgreSQL types are mapped to existing CockroachDB [types]({% link {{site.current_cloud_version}}/data-types.md %}) that have the same [`OID`]({% link {{site.current_cloud_version}}/oid.md %}). - The following MySQL types are mapped to corresponding CockroachDB types: | MySQL type | CockroachDB type | |-----------------------------------------------------|----------------------------------------------------------------------------------------------------------------| - | `CHAR`, `CHARACTER`, `VARCHAR`, `NCHAR`, `NVARCHAR` | [`VARCHAR`]({% link {{ page.version.version }}/string.md %}) | - | `TINYTEXT`, `TEXT`, `MEDIUMTEXT`, `LONGTEXT` | [`STRING`]({% link {{ page.version.version }}/string.md %}) | - | `GEOMETRY` | [`GEOMETRY`]({% link {{ page.version.version }}/architecture/glossary.md %}#geometry) | - | `LINESTRING` | [`LINESTRING`]({% link {{ page.version.version }}/linestring.md %}) | - | `POINT` | [`POINT`]({% link {{ page.version.version }}/point.md %}) | - | `POLYGON` | [`POLYGON`]({% link {{ page.version.version }}/polygon.md %}) | - | `MULTIPOINT` | [`MULTIPOINT`]({% link {{ page.version.version }}/multipoint.md %}) | - | `MULTILINESTRING` | [`MULTILINESTRING`]({% link {{ page.version.version }}/multilinestring.md %}) | - | `MULTIPOLYGON` | [`MULTIPOLYGON`]({% link {{ page.version.version }}/multipolygon.md %}) | - | `GEOMETRYCOLLECTION`, `GEOMCOLLECTION` | [`GEOMETRYCOLLECTION`]({% link {{ page.version.version }}/geometrycollection.md %}) | - | `JSON` | [`JSONB`]({% link {{ page.version.version }}/jsonb.md %}) | - | `TINYINT`, `INT1` | [`INT2`]({% link {{ page.version.version }}/int.md %}) | - | `BLOB` | [`BYTES`]({% link {{ page.version.version }}/bytes.md %}) | - | `SMALLINT`, `INT2` | [`INT2`]({% link {{ page.version.version }}/int.md %}) | - | `MEDIUMINT`, `INT`, `INTEGER`, `INT4` | [`INT4`]({% link {{ page.version.version }}/int.md %}) | - | `BIGINT`, `INT8` | [`INT`]({% link {{ page.version.version }}/int.md %}) | - | `FLOAT` | [`FLOAT4`]({% link {{ page.version.version }}/float.md %}) | - | `DOUBLE` | [`FLOAT`]({% link {{ page.version.version }}/float.md %}) | - | `DECIMAL`, `NUMERIC`, `REAL` | [`DECIMAL`]({% link {{ page.version.version }}/decimal.md %}) (Negative scale values are autocorrected to `0`) | - | `BINARY`, `VARBINARY` | [`BYTES`]({% link {{ page.version.version }}/bytes.md %}) | - | `DATETIME` | [`TIMESTAMP`]({% link {{ page.version.version }}/timestamp.md %}) | - | `TIMESTAMP` | [`TIMESTAMPTZ`]({% link {{ page.version.version }}/timestamp.md %}) | - | `TIME` | [`TIME`]({% link {{ page.version.version }}/time.md %}) | - | `BIT` | [`VARBIT`]({% link {{ page.version.version }}/bit.md %}) | - | `DATE` | [`DATE`]({% link {{ page.version.version }}/date.md %}) | - | `TINYBLOB`, `MEDIUMBLOB`, `LONGBLOB` | [`BYTES`]({% link {{ page.version.version }}/bytes.md %}) | - | `BOOL`, `BOOLEAN` | [`BOOL`]({% link {{ page.version.version }}/bool.md %}) | - | `ENUM` | [`ANY_ENUM`]({% link {{ page.version.version }}/enum.md %}) | - -- To override the default mappings for automatic schema creation, you can map source to target CockroachDB types explicitly. These are specified using a JSON file and `--type-map-file`. The allowable custom mappings are valid CockroachDB aliases, casts, and the following mappings specific to MOLT Fetch and [Verify]({% link {{ page.version.version }}/molt-verify.md %}): - - - [`TIMESTAMP`]({% link {{ page.version.version }}/timestamp.md %}) <> [`TIMESTAMPTZ`]({% link {{ page.version.version }}/timestamp.md %}) - - [`VARCHAR`]({% link {{ page.version.version }}/string.md %}) <> [`UUID`]({% link {{ page.version.version }}/uuid.md %}) - - [`BOOL`]({% link {{ page.version.version }}/bool.md %}) <> [`INT2`]({% link {{ page.version.version }}/int.md %}) - - [`VARBIT`]({% link {{ page.version.version }}/bit.md %}) <> [`TEXT`]({% link {{ page.version.version }}/string.md %}) - - [`JSONB`]({% link {{ page.version.version }}/jsonb.md %}) <> [`TEXT`]({% link {{ page.version.version }}/string.md %}) - - [`INET`]({% link {{ page.version.version }}/inet.md %}) <> [`TEXT`]({% link {{ page.version.version }}/string.md %}) + | `CHAR`, `CHARACTER`, `VARCHAR`, `NCHAR`, `NVARCHAR` | [`VARCHAR`]({% link {{site.current_cloud_version}}/string.md %}) | + | `TINYTEXT`, `TEXT`, `MEDIUMTEXT`, `LONGTEXT` | [`STRING`]({% link {{site.current_cloud_version}}/string.md %}) | + | `GEOMETRY` | [`GEOMETRY`]({% link {{site.current_cloud_version}}/architecture/glossary.md %}#geometry) | + | `LINESTRING` | [`LINESTRING`]({% link {{site.current_cloud_version}}/linestring.md %}) | + | `POINT` | [`POINT`]({% link {{site.current_cloud_version}}/point.md %}) | + | `POLYGON` | [`POLYGON`]({% link {{site.current_cloud_version}}/polygon.md %}) | + | `MULTIPOINT` | [`MULTIPOINT`]({% link {{site.current_cloud_version}}/multipoint.md %}) | + | `MULTILINESTRING` | [`MULTILINESTRING`]({% link {{site.current_cloud_version}}/multilinestring.md %}) | + | `MULTIPOLYGON` | [`MULTIPOLYGON`]({% link {{site.current_cloud_version}}/multipolygon.md %}) | + | `GEOMETRYCOLLECTION`, `GEOMCOLLECTION` | [`GEOMETRYCOLLECTION`]({% link {{site.current_cloud_version}}/geometrycollection.md %}) | + | `JSON` | [`JSONB`]({% link {{site.current_cloud_version}}/jsonb.md %}) | + | `TINYINT`, `INT1` | [`INT2`]({% link {{site.current_cloud_version}}/int.md %}) | + | `BLOB` | [`BYTES`]({% link {{site.current_cloud_version}}/bytes.md %}) | + | `SMALLINT`, `INT2` | [`INT2`]({% link {{site.current_cloud_version}}/int.md %}) | + | `MEDIUMINT`, `INT`, `INTEGER`, `INT4` | [`INT4`]({% link {{site.current_cloud_version}}/int.md %}) | + | `BIGINT`, `INT8` | [`INT`]({% link {{site.current_cloud_version}}/int.md %}) | + | `FLOAT` | [`FLOAT4`]({% link {{site.current_cloud_version}}/float.md %}) | + | `DOUBLE` | [`FLOAT`]({% link {{site.current_cloud_version}}/float.md %}) | + | `DECIMAL`, `NUMERIC`, `REAL` | [`DECIMAL`]({% link {{site.current_cloud_version}}/decimal.md %}) (Negative scale values are autocorrected to `0`) | + | `BINARY`, `VARBINARY` | [`BYTES`]({% link {{site.current_cloud_version}}/bytes.md %}) | + | `DATETIME` | [`TIMESTAMP`]({% link {{site.current_cloud_version}}/timestamp.md %}) | + | `TIMESTAMP` | [`TIMESTAMPTZ`]({% link {{site.current_cloud_version}}/timestamp.md %}) | + | `TIME` | [`TIME`]({% link {{site.current_cloud_version}}/time.md %}) | + | `BIT` | [`VARBIT`]({% link {{site.current_cloud_version}}/bit.md %}) | + | `DATE` | [`DATE`]({% link {{site.current_cloud_version}}/date.md %}) | + | `TINYBLOB`, `MEDIUMBLOB`, `LONGBLOB` | [`BYTES`]({% link {{site.current_cloud_version}}/bytes.md %}) | + | `BOOL`, `BOOLEAN` | [`BOOL`]({% link {{site.current_cloud_version}}/bool.md %}) | + | `ENUM` | [`ANY_ENUM`]({% link {{site.current_cloud_version}}/enum.md %}) | + +- To override the default mappings for automatic schema creation, you can map source to target CockroachDB types explicitly. These are specified using a JSON file and `--type-map-file`. The allowable custom mappings are valid CockroachDB aliases, casts, and the following mappings specific to MOLT Fetch and [Verify]({% link molt/molt-verify.md %}): + + - [`TIMESTAMP`]({% link {{site.current_cloud_version}}/timestamp.md %}) <> [`TIMESTAMPTZ`]({% link {{site.current_cloud_version}}/timestamp.md %}) + - [`VARCHAR`]({% link {{site.current_cloud_version}}/string.md %}) <> [`UUID`]({% link {{site.current_cloud_version}}/uuid.md %}) + - [`BOOL`]({% link {{site.current_cloud_version}}/bool.md %}) <> [`INT2`]({% link {{site.current_cloud_version}}/int.md %}) + - [`VARBIT`]({% link {{site.current_cloud_version}}/bit.md %}) <> [`TEXT`]({% link {{site.current_cloud_version}}/string.md %}) + - [`JSONB`]({% link {{site.current_cloud_version}}/jsonb.md %}) <> [`TEXT`]({% link {{site.current_cloud_version}}/string.md %}) + - [`INET`]({% link {{site.current_cloud_version}}/inet.md %}) <> [`TEXT`]({% link {{site.current_cloud_version}}/string.md %}) `--type-map-file` specifies the path to the JSON file containing the explicit type mappings. For example: @@ -537,7 +537,7 @@ Continuation is not possible when using [direct copy mode](#direct-copy). #### List active continuation tokens -To view all active continuation tokens, issue a `molt fetch tokens list` command along with `--conn-string`, which specifies the [connection string]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url) for the target CockroachDB database. For example: +To view all active continuation tokens, issue a `molt fetch tokens list` command along with `--conn-string`, which specifies the [connection string]({% link {{site.current_cloud_version}}/connection-parameters.md %}#connect-using-a-url) for the target CockroachDB database. For example: {% include_cached copy-clipboard.html %} ~~~ shell @@ -600,7 +600,7 @@ You can use the `cdc_cursor` value with an external change data capture (CDC) to The following examples demonstrate how to issue `molt fetch` commands to load data into CockroachDB. These examples assume that [secure connections](#secure-connections) to the source and target database are used. {{site.data.alerts.callout_success}} -After successfully running MOLT Fetch, you can run [`molt verify`]({% link {{ page.version.version }}/molt-verify.md %}) to confirm that replication worked successfully without missing or mismatched rows. +After successfully running MOLT Fetch, you can run [`molt verify`]({% link molt/molt-verify.md %}) to confirm that replication worked successfully without missing or mismatched rows. {{site.data.alerts.end}} ### Load PostgreSQL data via S3 with ongoing replication @@ -743,8 +743,8 @@ molt fetch \ ## See also -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) +- [MOLT Verify]({% link molt/molt-verify.md %}) +- [Migration Overview]({% link {{site.current_cloud_version}}/migration-overview.md %}) +- [Migrate from PostgreSQL]({% link {{site.current_cloud_version}}/migrate-from-postgres.md %}) +- [Migrate from MySQL]({% link {{site.current_cloud_version}}/migrate-from-mysql.md %}) +- [Migrate from CSV]({% link {{site.current_cloud_version}}/migrate-from-csv.md %}) diff --git a/src/current/v24.1/molt-verify.md b/src/current/molt/molt-verify.md similarity index 76% rename from src/current/v24.1/molt-verify.md rename to src/current/molt/molt-verify.md index 8a56403f29c..76a0fdac110 100644 --- a/src/current/v24.1/molt-verify.md +++ b/src/current/molt/molt-verify.md @@ -9,7 +9,7 @@ docs_area: migrate {% include feature-phases/preview.md %} {{site.data.alerts.end}} -MOLT Verify checks for data discrepancies between a source database and CockroachDB during a [database migration]({% link {{ page.version.version }}/migration-overview.md %}). +MOLT Verify checks for data discrepancies between a source database and CockroachDB during a [database migration]({% link {{site.current_cloud_version}}/migration-overview.md %}). The tool performs the following verifications to ensure data integrity during a migration: @@ -25,8 +25,8 @@ For a demo of MOLT Verify, watch the following video: The following databases are currently supported: -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) +- [PostgreSQL]({% link {{site.current_cloud_version}}/migrate-from-postgres.md %}) +- [MySQL]({% link {{site.current_cloud_version}}/migrate-from-mysql.md %}) - CockroachDB ## Install and run MOLT Verify @@ -45,9 +45,9 @@ For previous binaries, see the [MOLT version manifest](https://molt.cockroachdb. Complete the following items before using MOLT Verify: -- The SQL user running MOLT Verify must have the [`SELECT` privilege]({% link {{ page.version.version }}/grant.md %}#supported-privileges) on both the source and target CockroachDB tables. +- The SQL user running MOLT Verify must have the [`SELECT` privilege]({% link {{site.current_cloud_version}}/grant.md %}#supported-privileges) on both the source and target CockroachDB tables. -- Percent-encode the connection strings for the source database and [CockroachDB]({% link {{ page.version.version }}/connect-to-the-database.md %}). This ensures that the MOLT tools can parse special characters in your password. +- Percent-encode the connection strings for the source database and [CockroachDB]({% link {{site.current_cloud_version}}/connect-to-the-database.md %}). This ensures that the MOLT tools can parse special characters in your password. - Given a password `a$52&`, pass it to the `molt escape-password` command with single quotes: @@ -73,7 +73,7 @@ Flag | Description ----------|------------ `--source` | (Required) Connection string for the source database. `--target` | (Required) Connection string for the target database. -`--concurrency` | Number of threads to process at a time when reading the tables.
**Default:** 16
For faster verification, set this flag to a higher value. {% comment %}
Note: Table splitting by shard only works for [`INT`]({% link {{ page.version.version }}/int.md %}), [`UUID`]({% link {{ page.version.version }}/uuid.md %}), and [`FLOAT`]({% link {{ page.version.version }}/float.md %}) data types.{% endcomment %} +`--concurrency` | Number of threads to process at a time when reading the tables.
**Default:** 16
For faster verification, set this flag to a higher value. {% comment %}
Note: Table splitting by shard only works for [`INT`]({% link {{site.current_cloud_version}}/int.md %}), [`UUID`]({% link {{site.current_cloud_version}}/uuid.md %}), and [`FLOAT`]({% link {{site.current_cloud_version}}/float.md %}) data types.{% endcomment %} `--row-batch-size` | Number of rows to get from a table at a time.
**Default:** 20000 `--table-filter` | Verify tables that match a specified [regular expression](https://wikipedia.org/wiki/Regular_expression). `--schema-filter` | Verify schemas that match a specified [regular expression](https://wikipedia.org/wiki/Regular_expression). @@ -110,19 +110,19 @@ When verification completes, the output displays a summary message like the foll {"level":"info","type":"summary","table_schema":"public","table_name":"common_table","num_truth_rows":6,"num_success":3,"num_conditional_success":0,"num_missing":2,"num_mismatch":1,"num_extraneous":2,"num_live_retry":0,"num_column_mismatch":0,"message":"finished row verification on public.common_table (shard 1/1)"} ~~~ -- `num_missing` is the number of rows that are missing on the target database. You can [add any missing data]({% link {{ page.version.version }}/insert.md %}) to the target database and run `molt verify` again. +- `num_missing` is the number of rows that are missing on the target database. You can [add any missing data]({% link {{site.current_cloud_version}}/insert.md %}) to the target database and run `molt verify` again. - `num_mismatch` is the number of rows with mismatched values on the target database. - `num_extraneous` is the number of extraneous tables on the target database. -- `num_column_mismatch` is the number of columns with mismatched types on the target database, preventing `molt verify` from comparing the column's rows. For example, if your source table uses an auto-incrementing ID, MOLT Verify will identify a mismatch with CockroachDB's [`UUID`]({% link {{ page.version.version }}/uuid.md %}) type. In such cases, you might fix the mismatch by [creating a composite type]({% link {{ page.version.version }}/create-type.md %}#create-a-composite-data-type) on CockroachDB that uses the auto-incrementing ID. +- `num_column_mismatch` is the number of columns with mismatched types on the target database, preventing `molt verify` from comparing the column's rows. For example, if your source table uses an auto-incrementing ID, MOLT Verify will identify a mismatch with CockroachDB's [`UUID`]({% link {{site.current_cloud_version}}/uuid.md %}) type. In such cases, you might fix the mismatch by [creating a composite type]({% link {{site.current_cloud_version}}/create-type.md %}#create-a-composite-data-type) on CockroachDB that uses the auto-incrementing ID. - `num_success` is the number of rows that matched. - `num_conditional_success` is the number of rows that matched while having a column mismatch due to a type difference. This value indicates that all other columns that could be compared have matched successfully. You should manually review the warnings and errors in the output to determine whether the column mismatches can be ignored. ## Known limitations - MOLT Verify compares 20,000 rows at a time by default, and row values can change between batches, potentially resulting in temporary inconsistencies in data. If `--live` mode is enabled, MOLT Verify retries verification on these rows. To configure the row batch size, use the `--row_batch_size` [flag](#flags). -- MOLT Verify checks for collation mismatches on [primary key]({% link {{ page.version.version }}/primary-key.md %}) columns. This may cause validation to fail when a [`STRING`]({% link {{ page.version.version }}/string.md %}) is used as a primary key and the source and target databases are using different [collations]({% link {{ page.version.version }}/collate.md %}). +- MOLT Verify checks for collation mismatches on [primary key]({% link {{site.current_cloud_version}}/primary-key.md %}) columns. This may cause validation to fail when a [`STRING`]({% link {{site.current_cloud_version}}/string.md %}) is used as a primary key and the source and target databases are using different [collations]({% link {{site.current_cloud_version}}/collate.md %}). - MOLT Verify might give an error in case of schema changes on either the source or target database. -- [Geospatial types]({% link {{ page.version.version }}/spatial-data-overview.md %}#spatial-objects) cannot yet be compared. +- [Geospatial types]({% link {{site.current_cloud_version}}/spatial-data-overview.md %}#spatial-objects) cannot yet be compared. The following limitations are specific to MySQL: @@ -131,8 +131,8 @@ The following limitations are specific to MySQL: ## See also -- [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) +- [MOLT Fetch]({% link molt/molt-fetch.md %}) +- [Migration Overview]({% link {{site.current_cloud_version}}/migration-overview.md %}) +- [Migrate from PostgreSQL]({% link {{site.current_cloud_version}}/migrate-from-postgres.md %}) +- [Migrate from MySQL]({% link {{site.current_cloud_version}}/migrate-from-mysql.md %}) +- [Migrate from CSV]({% link {{site.current_cloud_version}}/migrate-from-csv.md %}) diff --git a/src/current/v23.1/live-migration-service.md b/src/current/v23.1/live-migration-service.md deleted file mode 100644 index dec3c51062d..00000000000 --- a/src/current/v23.1/live-migration-service.md +++ /dev/null @@ -1,676 +0,0 @@ ---- -title: Use the Live Migration Service -summary: Learn how to use the Live Migration Service to shadow application traffic, perform cutover, and migrate a database to CockroachDB. -toc: true -docs_area: migrate ---- - -{{site.data.alerts.callout_info}} -{% include feature-phases/preview.md %} -{{site.data.alerts.end}} - -MOLT LMS (Live Migration Service) is used during a [live migration]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime) to CockroachDB. - -The LMS is a self-hosted, horizontally scalable proxy that routes traffic between an application, a source database, and a target CockroachDB database. You use the LMS to control which database, as the "source of truth", is serving reads and writes to an application. You can optionally configure the LMS to [shadow production traffic](#shadowing-modes) from the source database and validate the query results on CockroachDB. When you have sufficiently tested your application and are confident with its consistency and performance on CockroachDB, you use the LMS to [perform the cutover](#perform-a-cutover) to CockroachDB. - -MOLT LMS is self-hosted on [Kubernetes](https://kubernetes.io/) and [configured using Helm](#configuration). At a high level, the LMS consists of the following: - -- A number of proxy [instances](#lms-instances) (running in separate Kubernetes pods) across which application traffic is distributed and routed to the source and target databases. -- An "orchestrator" service (running in a single Kubernetes pod) that coordinates the proxy instances and sends the cutover commands. - -This page describes how to [install](#installation), [configure](#configuration), [secure](#security), and [use the LMS](#molt-lms-cli) to perform a live migration. {% comment %}For more information, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}).{% endcomment %} - -## Terminology - -- A *live migration* keeps two production databases online (a source and a target database) and uses either replication or dual writing to keep data identical between them until a final cutover. -- The *source of truth* is the database that serves reads and writes to the application during a live migration. A cutover switches the source of truth. -- *Shadowing* is the execution of source SQL statements on the target database in parallel. The LMS supports multiple [shadowing modes](#shadowing-modes). - -## Requirements - -- [Kubernetes](https://kubernetes.io/) cluster -- [Helm](https://helm.sh/docs/intro/install/) package manager for Kubernetes - -#### Supported database technologies - -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) (source) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) (source) -- CockroachDB (source and target) - -## Installation - -1. Add the Helm chart repository at `https://molt.cockroachdb.com/lms/charts/` with [`helm repo add`](https://helm.sh/docs/helm/helm_repo_add/). Then install the chart with [`helm install`](https://helm.sh/docs/helm/helm_install/). For example: - - {% include_cached copy-clipboard.html %} - ~~~ shell - helm repo add lms https://molt.cockroachdb.com/lms/charts/ - helm install lms lms/lms - ~~~ - -1. Port-forward from your local machine to the orchestrator, using the release name that you specified with `helm install`. The orchestrator port is configurable and is [`4200` by default](#service-type). - - {% include_cached copy-clipboard.html %} - ~~~ shell - kubectl port-forward svc/{releasename}-lms-orchestrator 4200:4200 - ~~~ - - {{site.data.alerts.callout_success}} - If you named the release `lms`, exclude `{releasename}-` from the command. - {{site.data.alerts.end}} - -1. To set up the LMS resources, [install `molt-lms-cli`](#molt-lms-cli) and run the following command, specifying the orchestrator URL: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli initialize --orchestrator-url localhost:4200 - ~~~ - -1. The LMS proxy instances and orchestrator are initialized as Kubernetes pods: - - {% include_cached copy-clipboard.html %} - ~~~ shell - kubectl get pods - ~~~ - - ~~~ - NAME READY STATUS RESTARTS AGE - lms-orchestrator-86779b87f7-qrk9q 1/1 Running 0 52s - lms-576bffdd8c-pmh6g 1/1 Running 0 52s - lms-576bffdd8c-pbdvl 1/1 Running 0 52s - lms-576bffdd8c-s7kx4 1/1 Running 0 52s - ... - ~~~ - - You will see `lms` pods that match the configured [number of LMS instances](#lms-instances), along with one `lms-orchestrator` pod. - - The pod names are prefixed with the release name you specified when running `helm install`, unless you named the release `lms`. - -## Configuration - -To configure the LMS, override the [Helm chart values](https://github.com/cockroachdb/molt-helm-charts/blob/main/lms/values.yaml). This involves a rolling restart of your pods. For information on setting Helm chart values, see the [Helm documentation](https://helm.sh/docs/helm/helm_upgrade/). - -This section describes the most important and commonly used values. For details on all configurable values, refer to the [`values.yaml`](https://github.com/cockroachdb/molt-helm-charts/blob/main/lms/values.yaml) file. - -#### Source dialect - -~~~ yaml -lms: - sourceDialect: "" -... -orchestrator: - sourceDialect: "" -~~~ - -You **must** provide a string value for `sourceDialect`, which specifies the dialect of your source database. Supported dialects are: - -- `postgres`: PostgreSQL -- `mysql`: MySQL -- `cockroach`: CockroachDB - -#### Shadowing - -~~~ yaml -lms: - shadowMode: none -~~~ - -`lms.shadowMode` specifies the shadowing behavior used by the LMS. This should depend on your specific migration requirements. For details, see [Shadowing modes](#shadowing-modes). - -#### LMS instances - -~~~ yaml -lms: - replicaCount: 3 -~~~ - -`lms.replicaCount` determines the number of LMS instances created as `lms` pods on the Kubernetes cluster, across which application traffic is distributed. This defaults to `3`. - -#### Connection strings - -The following connection strings are specific to your configuration: - -- External connection string for the source database. -- External connection string for the target CockroachDB database. -- Internal connection string for the LMS. - -You should specify these in external Kubernetes secrets. For details, see [Manage external secret](#manage-external-secrets). - -{{site.data.alerts.callout_danger}} -Storing sensitive keys in external secrets is **strongly** recommended. -{{site.data.alerts.end}} - -#### Service type - -~~~ yaml -lms: - service: - type: ClusterIP - port: 9043 - metricsPort: 9044 -... -orchestrator: - service: - type: ClusterIP - port: 4200 - metricsPort: 4201 -~~~ - -`service` specifies the [Kubernetes service type](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) and ports for the LMS instances and orchestrator. - -#### Prometheus Operator - -~~~ yaml -serviceMonitor: - enabled: false - labels: {} - annotations: {} - interval: 30s - namespaced: false -~~~ - -`serviceMonitor` is a custom resource used with the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/tree/main) for monitoring Kubernetes. For more information, see the [Prometheus Operator documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md). - -## Security - -{{site.data.alerts.callout_danger}} -Cockroach Labs **strongly** recommends the following: - -- Manage your LMS and orchestrator configurations in [external Kubernetes secrets](#manage-external-secrets). -- To establish secure connections between the LMS pods and with your client, generate and set up TLS certificates for the [source database and CockroachDB](#configure-an-lms-secret), [LMS](#configure-the-lms-certificates), and [orchestrator](#configure-the-orchestrator-and-client-certificates). -{{site.data.alerts.end}} - -#### Manage external secrets - -Cockroach Labs recommends using [External Secrets Operator](https://external-secrets.io/latest/) to [create and manage Kubernetes secrets](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-externalsecret) that contain: - -- [Your LMS configuration](#configure-an-lms-secret), which includes the source and target database connection strings. -- [Your orchestrator configuration](#configure-an-orchestrator-secret), which includes the LMS and target database connection strings. -- Your [LMS](#configure-the-lms-certificates) and [orchestrator](#configure-the-orchestrator-and-client-certificates) certificates, which you should have generated separately. - -For information on Kubernetes secrets, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/). - -#### Configure an LMS secret - -Create an external secret that specifies the connection strings for the source and target CockroachDB database. - -For example, the following `ExternalSecret` called `lms-config` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `lms-secret`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: lms-config -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: lms-config - creationPolicy: Owner - template: - engineVersion: v2 - data: - config.json: | - { - "INIT_SOURCE": "{% raw %}{{ .source }}{% endraw %}", - "INIT_TARGET": "{% raw %}{{ .target }}{% endraw %}" - } - data: - - secretKey: source - remoteRef: - key: lms-secret - property: INIT_SOURCE - - secretKey: target - remoteRef: - key: lms-secret - property: INIT_TARGET -~~~ - -The connection strings are specified with the following keys inside `config.json`: - -- `INIT_SOURCE`: External connection string for the source database, including the paths to your client certificate and keys. -- `INIT_TARGET`: External [connection string for the CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. - -The remote secret `lms-secret` will contain the full connection strings and paths, such that the `config.json` keys resolve to: - -~~~ json -"INIT_SOURCE": "mysql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full?sslrootcert=path/to/mysql.ca&sslcert=path/to/mysql.crt&sslkey=path/to/mysql.key", -"INIT_TARGET": "postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full?sslrootcert=path/to/ca.crt&sslcert=path/to/client.username.crt&sslkey=path/to/client.username.key" -~~~ - -In the [Helm configuration](#configuration), `lms.configSecretName` must specify the external secret `name`: - -~~~ yaml -lms: - configSecretName: "lms-config" -~~~ - -#### Configure an orchestrator secret - -Create an external secret that specifies the connection strings for the LMS and target CockroachDB database. - -For example, the following `ExternalSecret` called `orch-config` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `orch-secret`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: orch-config -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: orch-config - creationPolicy: Owner - template: - engineVersion: v2 - data: - config.json: | - { - "LMS_URL": "{% raw %}{{ .lmsUrl }}{% endraw %}", - "CRDB_URL": "{% raw %}{{ .crdbUrl }}{% endraw %}" - } - data: - - secretKey: lmsUrl - remoteRef: - key: orch-secret - property: LMS_URL - - secretKey: crdbUrl - remoteRef: - key: orch-secret - property: CRDB_URL -~~~ - -The connection strings are specified with the following keys inside `config.json`: - -- `LMS_URL`: Internal connection string for the LMS, specifying the username and password of the source database. The format depends on your source dialect: - - - MySQL: `{username}:{password}@({releasename}-lms.{namespace}.svc.cluster.local:{port})/{database}` - - PostgreSQL: `postgresql://{username}:{password}@{releasename}-lms.{namespace}.svc.cluster.local:{port}/{database}` - - {{site.data.alerts.callout_success}} - If you named the release `lms` during [installation](#installation), exclude `{releasename}-` from the LMS connection string. - {{site.data.alerts.end}} - -- `CRDB_URL`: External [connection string for the CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. - -The remote secret `orch-secret` will contain the full connection strings, such that the `config.json` keys resolve to: - -~~~ json -"LMS_URL": "{username}:{password}@({releasename}-molt-lms.{namespace}.svc.cluster.local:{port})/{database}", -"CRDB_URL": "postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full?sslrootcert=path/to/ca.crt&sslcert=path/to/client.username.crt&sslkey=path/to/client.username.key" -~~~ - -In the [Helm configuration](#configuration), `orchestrator.configSecretName` must specify the external secret `name`: - -~~~ yaml -orchestrator: - configSecretName: "orch-config" -~~~ - -#### Configure the LMS certificates - -Create an external secret that specifies the LMS certificate, key, and (optional) CA certificate. - -For example, the following `ExternalSecret` called `lms-tls` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `lms-certs`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: lms-tls -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: lms-tls - creationPolicy: Owner - template: - engineVersion: v2 - data: - lms-ca.crt: '{% raw %}{{ .caCert }}{% endraw %}' - lms-tls.crt: '{% raw %}{{ .serverCert }}{% endraw %}' - lms-tls.key: '{% raw %}{{ .serverKey }}{% endraw %}' - data: - - secretKey: caCert - remoteRef: - key: lms-certs - property: caCert - - secretKey: serverCert - remoteRef: - key: lms-certs - property: serverCert - - secretKey: serverKey - remoteRef: - key: lms-certs - property: serverKey -~~~ - -In the preceding example, each `.crt` and `.key` filename is associated with its corresponding value in the remote secret `lms-certs`. - -In the [Helm configuration](#configuration), `lms.sslVolumes` and `lms.sslVolumeMounts` must specify [volumes](https://kubernetes.io/docs/concepts/storage/volumes/#secret) and mount paths that contain the server-side certificates. The path to each file is specified as an environment variable in `lms.env`. Cockroach Labs recommends mounting certificates to `/app/certs`. - -~~~ yaml -lms: - sslVolumes: - - name: lms-tls - secret: - secretName: lms-tls - sslVolumeMounts: - - mountPath: "/app/certs" - name: lms-tls - readOnly: true - env: - - name: LMS_SSL_CA - value: /app/certs/lms-ca.crt - - name: LMS_SSL_CERT - value: /app/certs/lms-tls.crt - - name: LMS_SSL_KEY - value: /app/certs/lms-tls.key -~~~ - -#### Configure the orchestrator and client certificates - -Create an external secret that specifies the orchestrator certificate, key, and (optional) CA certificate. - -For example, the following `ExternalSecret` called `orch-tls` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `orch-certs`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: orch-tls -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: orch-tls - creationPolicy: Owner - template: - engineVersion: v2 - data: - orch-ca.crt: '{% raw %}{{ .caCert }}{% endraw %}' - orch-tls.crt: '{% raw %}{{ .serverCert }}{% endraw %}' - orch-tls.key: '{% raw %}{{ .serverKey }}{% endraw %}' - data: - - secretKey: caCert - remoteRef: - key: orch-certs - property: caCert - - secretKey: serverCert - remoteRef: - key: orch-certs - property: serverCert - - secretKey: serverKey - remoteRef: - key: orch-certs - property: serverKey -~~~ - -In the preceding example, each `.crt` and `.key` filename is associated with its corresponding value in the remote secret `orch-certs`. - -In the [Helm configuration](#configuration), `orchestrator.sslVolumes` and `orchestrator.sslVolumeMounts` must specify [volumes](https://kubernetes.io/docs/concepts/storage/volumes/#secret) and mount paths that contain the server-side certificates. The path to each file is specified as an environment variable in `orchestrator.env`. Cockroach Labs recommends mounting certificates to `/app/certs`. - -~~~ yaml -orchestrator: - sslVolumes: - - name: orch-tls - secret: - secretName: orch-tls - sslVolumeMounts: - - mountPath: "/app/certs" - name: orch-tls - readOnly: true - env: - - name: ORCH_CA_TLS_CERT - value: /app/certs/orch-ca.crt - - name: ORCH_TLS_CERT - value: /app/certs/orch-tls.crt - - name: ORCH_TLS_KEY - value: /app/certs/orch-tls.key -~~~ - -You will also need to create and specify a CLI client certificate, key, and (optional) CA certificate. It's easiest to specify these as environment variables in the shell that is running `molt-lms-cli`: - -{% include_cached copy-clipboard.html %} -~~~ shell -export CLI_TLS_CA_CERT="{path-to-cli-ca-cert}" -~~~ - -{% include_cached copy-clipboard.html %} -~~~ shell -export CLI_TLS_CLIENT_CERT="{path-to-cli-client-cert}" -~~~ - -{% include_cached copy-clipboard.html %} -~~~ shell -export CLI_TLS_CLIENT_KEY="{path-to-cli-client-key}" -~~~ - -## `molt-lms-cli` - -The `molt-lms-cli` command-line interface is used to inspect the LMS instances and [perform cutover](#perform-a-cutover). - -To install `molt-lms-cli`, download the binary that matches your system. To download the latest binary: - -| Operating System | AMD 64-bit | ARM 64-bit | -|------------------|----------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------| -| Windows | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.windows-amd64.tgz) | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.windows-arm64.tgz) | -| Linux | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.linux-amd64.tgz) | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.linux-arm64.tgz) | -| Mac | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.darwin-amd64.tgz) | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.darwin-arm64.tgz) | - -{{site.data.alerts.callout_success}} -For previous binaries, see the [MOLT version manifest](https://molt.cockroachdb.com/lms/cli/versions.html). -{{site.data.alerts.end}} - -### Commands - -| Command | Usage | -|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `initialize` | Set up the required objects for running the LMS. You must run this before using the LMS. | -| `connections list` | List all client connections to the LMS and their most recent queries. | -| `cutover consistent` | Specify a [consistent cutover](#consistent-cutover). You must also specify `begin`, `commit`, or `abort`. For usage details, see [Consistent cutover](#consistent-cutover). | -| `begin` | Begin a consistent cutover. This pauses traffic to the source database. | -| `commit` | Commit a consistent cutover. This resumes traffic on the target database. This is only effective after running `cutover consistent begin`. | -| `abort` | Abort a consistent cutover after running `consistent cutover begin`, unless you have also run `consistent cutover commit`. This resumes traffic to the source database. | -| `status` | Display the current configuration of the LMS instances. | - -{% comment %} -| `cutover immediate` | Initiate an [immediate cutover](#immediate-cutover). This switches the source of truth to the target database. For usage details, see [Immediate cutover](#immediate-cutover). | -{% endcomment %} - -### Flags - -| Flag | Description | -|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `--orchestrator-url` | The URL for the orchestrator, using the [configured port](#service-type). Prefix the URL with `https` instead of `http` when using [certificates](#security). This flag is required unless the value is exported as an environment variable using `export CLI_ORCHESTRATOR_URL="{orchestrator-URL}"`. | -| `--tls-ca-cert` | The path to the CA certificate. This can also be [exported](#configure-the-orchestrator-and-client-certificates) as an environment variable using `export CLI_TLS_CA_CERT="{path-to-cli-ca-cert}"`. | -| `--tls-client-cert` | The path to the client certificate. This can also be [exported](#configure-the-orchestrator-and-client-certificates) as an environment variable using `export CLI_TLS_CLIENT_CERT="{path-to-cli-client-cert}"`. | -| `--tls-client-key` | The path to the client key. This can also be [exported](#configure-the-orchestrator-and-client-certificates) as an environment variable using `export CLI_TLS_CLIENT_KEY="{path-to-cli-client-key}"`. | - -## Shadowing modes - -The LMS can be configured to shadow production traffic from the source database and validate the query results on the target. The exact behavior is configured with the [`shadowMode`](#shadowing) Helm value. - -### `none` - -MOLT LMS shadowing mode - none - -`shadowMode: none` disables shadowing. - -- The LMS sends application requests to the source of truth only. -- Query results from the source of truth are returned to the application. -- Writes must be manually replicated from the source database to the target database. - -You can use this mode to perform a [consistent cutover](#consistent-cutover), along with a database replication technology that replicates writes to the target database. {% comment %}For an example, see [Consistent cutover without shadowing](#consistent-cutover-without-shadowing).{% endcomment %} - -### `async` - -MOLT LMS shadowing mode - async - -`shadowMode: async` writes to both databases. - -- The LMS sends application requests to the source of truth and target database in asynchronous threads, and waits only for the source of truth to respond. -- Query results from the source of truth are returned to the application. -- If an asynchronous request has not yet completed, subsequent asynchronous requests will be permanently dropped. - -You can use this mode to confirm that your queries succeed on CockroachDB without verifying performance or correctness. - -{{site.data.alerts.callout_info}} -`async` mode is intended for testing purposes. -{{site.data.alerts.end}} - -### `sync` - -MOLT LMS shadowing mode - sync - -`shadowMode: sync` writes to both databases. - -- The LMS sends application requests to the source of truth and the target database, and waits for each to respond. -- Query results from the source of truth are returned to the application. -- Query results from the non-source of truth are discarded. - -{% comment %} -You can use this mode to perform an [immediate cutover](#immediate-cutover). -{% endcomment %} - -### `strict-sync` - -MOLT LMS shadowing mode - strict-sync - -`shadowMode: strict-sync` writes to both databases and enforces correctness on both databases. - -- The LMS sends application requests to the source of truth and the target database, and waits for each to respond. -- Query results from the source of truth are returned to the application. -- If the query returns an error on the source of truth, that error is returned to the application. If the query succeeds on the source of truth but fails on the target, the error from the target is returned to the application. -- If the query fails on both databases, the target will return the error from the source of truth. - -{% comment %} -You can use this mode to perform an [immediate cutover](#immediate-cutover). -{% endcomment %} - -## Perform a cutover - -### Consistent cutover - -A consistent cutover maintains data consistency with [minimal downtime]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime). The goal of consistent cutover is to stop application traffic long enough for replication to catch up and ensure that the cutover achieves consistency across the two databases. - -When using the LMS, consistent cutover is handled using the [`molt-lms-cli`](#molt-lms-cli) commands `cutover consistent begin` and `cutover consistent commit`, during which application requests are queued and will be responded to after cutover. This delay in response time is related to the maximum duration of any transactions and queries that need to complete, and the time it takes for replication to catch up from the source to the target database. - -{% comment %} -For more information about the consistent cutover approach, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}). -{% endcomment %} - -{{site.data.alerts.callout_info}} -These steps assume you have already followed the overall steps to [prepare for migration]({% link {{ page.version.version }}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{ page.version.version }}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. -{{site.data.alerts.end}} - -To perform a consistent cutover with the LMS: - -1. [Configure the LMS](#configuration) with your deployment details, and follow our [security recommendations](#security). - -1. Set the shadowing mode to [`none`](#none). - - {% comment %} - {{site.data.alerts.callout_danger}} - Do not use the [`sync`](#sync) or [`strict-sync`](#strict-sync) shadowing modes when performing a consistent cutover. Data correctness and consistency cannot be guaranteed in these configurations. - {{site.data.alerts.end}} - {% endcomment %} - -1. Set up ongoing replication between the source database and CockroachDB, using a tool that replicates writes to the target database. - -1. Send application requests to the LMS, which routes the traffic to the source database. The source database is designated the source of truth. - -1. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. - -1. Begin the consistent cutover. **Requests are now queued in the LMS**, including queries from existing connections and new connection requests to the LMS: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover consistent begin {flags} - ~~~ - - This command tells the LMS to pause all application traffic to the source of truth. The LMS then waits for transactions to complete and prepared statements to close. - -1. Verify that replication on CockroachDB has caught up with the source of truth. For example, insert a row on the source database and check that the row exists on CockroachDB. - - If you have an implementation that replicates back to the source database, this should be enabled before committing the cutover. - -1. Once all writes have been replicated to the target database, commit the consistent cutover: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover consistent commit {flags} - ~~~ - - This command tells the LMS to switch the source of truth to the target database. Application traffic is now routed to the target database, and requests are processed from the queue in the LMS. - - To verify that CockroachDB is now the source of truth, you can run `molt-lms-cli status`. - -1. Again, use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the data on the source database and CockroachDB are consistent. - -If any problems arise during a consistent cutover: - -- After running `cutover consistent begin`: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover consistent abort {flags} - ~~~ - - This command tells the LMS to resume application traffic to the source of truth, which has not yet been switched. Cutover **cannot** be aborted after running `cutover consistent commit`. - -- After running `cutover consistent commit`: - - Reissue the `cutover consistent begin` and `cutover consistent commit` commands to revert the source of truth to the source database. - -{% comment %} -### Immediate cutover - -An immediate cutover can potentially [reduce downtime to zero]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime), at the likely risk of introducing data inconsistencies between the source and target databases. The LMS is configured to dual write to the source and target databases, while the [`molt-lms-cli`](#molt-lms-cli) command `cutover immediate` initiates cutover. - -For more information about the immediate cutover approach, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}). - -To perform an immediate cutover with the LMS: - -{{site.data.alerts.callout_info}} -These steps assume you have already followed the overall steps to [prepare for migration]({% link {{ page.version.version }}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{ page.version.version }}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. -{{site.data.alerts.end}} - -1. [Configure the LMS](#configuration) with your deployment details, and follow our [security recommendations](#security). - -1. Set the shadowing mode to [`sync`](#sync) or [`strict-sync`](#strict-sync). - -1. Send application requests to the LMS, which routes the traffic to the source database and to CockroachDB. The source database is designated the source of truth. - -1. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. - - To ensure data integrity, shadowing must be enabled for a sufficient duration with a low error rate. All LMS instances should have been continuously shadowing your workload for the past **seven days** at minimum, with only transient inconsistencies caused by events such as [transaction retry errors]({% link {{ page.version.version }}/transaction-retry-error-reference.md %}). The longer shadowing has been enabled, the better this allows you to evaluate consistency. - -1. Once nearly all data from the source database is replicated to CockroachDB (for example, with a <1 second delay or <1000 rows), initiate the cutover: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover immediate {flags} - ~~~ - - This command tells the LMS to switch the source of truth to CockroachDB. Application traffic is immediately directed to CockroachDB. - -1. Any writes that were made during the cutover will have been missed on CockroachDB. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to identify the inconsistencies. These will need to be manually reconciled. -{% endcomment %} - -## See also - -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -{% comment %}- [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}){% endcomment %} -- [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) \ No newline at end of file diff --git a/src/current/v23.1/migrate-from-mysql.md b/src/current/v23.1/migrate-from-mysql.md index 18150b7df1e..2a1adcde9a2 100644 --- a/src/current/v23.1/migrate-from-mysql.md +++ b/src/current/v23.1/migrate-from-mysql.md @@ -34,7 +34,7 @@ Identifiers are case-sensitive in MySQL and [case-insensitive in CockroachDB]({% The MySQL [`AUTO_INCREMENT`](https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html) attribute, which creates sequential column values, is not supported in CockroachDB. When [using the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page?filters=mysql#convert-a-schema), columns with `AUTO_INCREMENT` can be converted to use [sequences]({% link {{ page.version.version }}/create-sequence.md %}), `UUID` values with [`gen_random_uuid()`]({% link {{ page.version.version }}/functions-and-operators.md %}#id-generation-functions), or unique `INT8` values using [`unique_rowid()`]({% link {{ page.version.version }}/functions-and-operators.md %}#id-generation-functions). Cockroach Labs does not recommend using a sequence to define a primary key column. For more information, see [Unique ID best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#unique-id-best-practices). {{site.data.alerts.callout_info}} -Changing a column type during schema conversion will cause [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to identify a type mismatch during [data validation](#step-3-validate-the-migrated-data). This is expected behavior. +Changing a column type during schema conversion will cause [MOLT Verify]({% link molt/molt-verify.md %}) to identify a type mismatch during [data validation](#step-3-validate-the-migrated-data). This is expected behavior. {{site.data.alerts.end}} #### `ENUM` type @@ -158,7 +158,7 @@ Use the [Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachclo Click **Save**. - This is a workaround to prevent [data validation](#step-3-validate-the-migrated-data) from failing due to collation mismatches. For more details, see the [MOLT Verify] ({% link {{ page.version.version }}/molt-verify.md %}#limitations) documentation. + This is a workaround to prevent [data validation](#step-3-validate-the-migrated-data) from failing due to collation mismatches. For more details, see the [MOLT Verify] ({% link molt/molt-verify.md %}#known-limitations) documentation. 1. Click [**Migrate Schema**](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page?filters=mysql#migrate-the-schema) to create a new {{ site.data.products.serverless }} cluster with the converted schema. Name the database `world`. @@ -358,9 +358,9 @@ By default, [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %} ### Step 3. Validate the migrated data -Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to check that the data on MySQL and CockroachDB are consistent. +Use [MOLT Verify]({% link molt/molt-verify.md %}) to check that the data on MySQL and CockroachDB are consistent. -1. [Install MOLT Verify.]({% link {{ page.version.version }}/molt-verify.md %}) +1. [Install MOLT Verify.]({% link molt/molt-verify.md %}) 1. In the directory where you installed MOLT Verify, use the following command to compare the two databases, specifying the [JDBC connection string for MySQL](https://dev.mysql.com/doc/connector-j/8.1/en/connector-j-reference-jdbc-url-format.html) with `--source` and the SQL connection string for CockroachDB with `--target`: @@ -403,7 +403,7 @@ To learn more, see the [Migration Overview]({% link {{ page.version.version }}/m - [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) - [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [Use the MOLT Verify tool]({% link {{ page.version.version }}/molt-verify.md %}) +- [Use the MOLT Verify tool]({% link molt/molt-verify.md %}) - [Import Performance Best Practices]({% link {{ page.version.version }}/import-performance-best-practices.md %}) - [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) - [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) diff --git a/src/current/v23.1/migrate-from-postgres.md b/src/current/v23.1/migrate-from-postgres.md index 396ebdfdcb6..8d35c7c7dd4 100644 --- a/src/current/v23.1/migrate-from-postgres.md +++ b/src/current/v23.1/migrate-from-postgres.md @@ -247,9 +247,9 @@ By default, [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %} ### Step 3. Validate the migrated data -Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to check that the data on PostgreSQL and CockroachDB are consistent. +Use [MOLT Verify]({% link molt/molt-verify.md %}) to check that the data on PostgreSQL and CockroachDB are consistent. -1. [Install MOLT Verify.]({% link {{ page.version.version }}/molt-verify.md %}) +1. [Install MOLT Verify.]({% link molt/molt-verify.md %}) 1. In the directory where you installed MOLT Verify, use the following command to compare the two databases, specifying the PostgreSQL connection string with `--source` and the CockroachDB connection string with `--target`: @@ -288,7 +288,7 @@ To learn more, see the [Migration Overview]({% link {{ page.version.version }}/m - [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) - [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [Use the MOLT Verify tool]({% link {{ page.version.version }}/molt-verify.md %}) +- [Use the MOLT Verify tool]({% link molt/molt-verify.md %}) - [Import Performance Best Practices]({% link {{ page.version.version }}/import-performance-best-practices.md %}) - [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) - [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) diff --git a/src/current/v23.1/migration-overview.md b/src/current/v23.1/migration-overview.md index 7a8b7d05487..9b3f6bb6763 100644 --- a/src/current/v23.1/migration-overview.md +++ b/src/current/v23.1/migration-overview.md @@ -216,7 +216,7 @@ In the following order: You can use the following MOLT (Migrate Off Legacy Technology) tools to simplify these steps: - [Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) +- [MOLT Verify]({% link molt/molt-verify.md %}) #### Convert the schema @@ -255,7 +255,7 @@ After you [load the test data](#load-test-data), validate your queries on Cockro ##### Shadowing -You can "shadow" your production workload by executing your source SQL statements on CockroachDB in parallel. [MOLT LMS (Live Migration Service)]({% link {{ page.version.version }}/live-migration-service.md %}) can perform shadowing. You can then [test the queries](#test-query-results-and-performance) on CockroachDB for consistency, performance, and potential issues with the migration. +You can "shadow" your production workload by executing your source SQL statements on CockroachDB in parallel. [MOLT LMS (Live Migration Service)]({% link molt/live-migration-service.md %}) can perform shadowing. You can then [test the queries](#test-query-results-and-performance) on CockroachDB for consistency, performance, and potential issues with the migration. Shadowing may not be necessary or practical for your workload. For example, because transactions are serialized on CockroachDB, this will limit your ability to validate the performance of high-throughput workloads. @@ -265,7 +265,7 @@ You can manually validate your queries by testing a subset of "critical queries" - Check the application logs for error messages and the API response time. If application requests are slower than expected, use the **SQL Activity** page on the [CockroachDB {{ site.data.products.cloud }} Console](https://www.cockroachlabs.com/docs/cockroachcloud/statements-page) or [DB Console]({% link {{ page.version.version }}/ui-statements-page.md %}) to find the longest-running queries that are part of that application request. If necessary, tune the queries according to our best practices for [SQL performance]({% link {{ page.version.version }}/performance-best-practices-overview.md %}). -- Compare the results of the queries and check that they are identical in both the source database and CockroachDB. To do this, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}). +- Compare the results of the queries and check that they are identical in both the source database and CockroachDB. To do this, you can use [MOLT Verify]({% link molt/molt-verify.md %}). Test performance on a CockroachDB cluster that is appropriately [sized](#capacity-planning) for your workload: @@ -310,7 +310,7 @@ The following is a high-level overview of the migration steps. For consideration - {% include {{ page.version.version }}/migration/load-data-import-into.md %} - {% include {{ page.version.version }}/migration/load-data-third-party.md %} - {% include {{ page.version.version }}/migration/load-data-copy-from.md %} -1. After the data is migrated, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. After the data is migrated, you can use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. 1. Perform a [cutover](#cutover-strategy) by resuming application traffic, now to CockroachDB. {% comment %}1. If you want the ability to [roll back](#all-at-once-rollback) the migration, replicate data back to the source database.{% endcomment %} @@ -325,7 +325,7 @@ The following is a high-level overview of the migration steps. {% comment %}For To prioritize consistency and minimize downtime: 1. {% include {{ page.version.version }}/migration/load-data-third-party.md %} Select the tool's option to **replicate ongoing changes** after performing the initial load of data into CockroachDB. -1. As the data is migrating, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. As the data is migrating, you can use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. 1. Once nearly all data from your source database has been moved to CockroachDB (for example, with a <1 second delay or <1000 rows), stop application traffic to your source database. **This begins downtime.** 1. Wait for replication to CockroachDB to complete. 1. Perform a [cutover](#cutover-strategy) by resuming application traffic, now to CockroachDB. @@ -333,7 +333,7 @@ To prioritize consistency and minimize downtime: To achieve zero downtime with inconsistency: 1. {% include {{ page.version.version }}/migration/load-data-third-party.md %} Select the tool's option to replicate ongoing changes after performing the initial load of data into CockroachDB. -1. As the data is migrating, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. As the data is migrating, you can use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. 1. Once nearly all data from your source database has been moved to CockroachDB (for example, with a <1 second delay or <1000 rows), perform a [cutover](#cutover-strategy) by pointing application traffic to CockroachDB. 1. Manually reconcile any inconsistencies caused by writes that were not replicated during the cutover. 1. Close the connection to the source database when you are ready to finish the migration. diff --git a/src/current/v23.1/molt-verify.md b/src/current/v23.1/molt-verify.md deleted file mode 100644 index 7bf7b9ab489..00000000000 --- a/src/current/v23.1/molt-verify.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: Use the MOLT Verify tool -summary: Learn how to use the MOLT Verify tool to check for data discrepancies during and after a migration. -toc: true -docs_area: migrate ---- - -{{site.data.alerts.callout_info}} -{% include feature-phases/preview.md %} -{{site.data.alerts.end}} - -MOLT Verify checks for data discrepancies between a source database and CockroachDB during a [database migration]({% link {{ page.version.version }}/migration-overview.md %}). - -The tool performs the following verifications to ensure data integrity during a migration: - -- **Table Verification:** Check that the structure of tables between the source database and the target database are the same. -- **Column Definition Verification:** Check that the column names, data types, constraints, nullability, and other attributes between the source database and the target database are the same. -- **Row Value Verification:** Check that the actual data in the tables is the same between the source database and the target database. - -For a demo of MOLT Verify, watch the following video: - -{% include_cached youtube.html video_id="6mfebmCLClY" %} - -## Supported databases - -The following databases are currently supported: - -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- CockroachDB - -## Install and run MOLT Verify - -To install MOLT Verify, download the binary that matches your system. To download the latest binary: - -| Operating System | AMD 64-bit | ARM 64-bit | -|------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------| -| Windows | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.windows-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.windows-arm64.tgz) | -| Linux | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.linux-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.linux-arm64.tgz) | -| Mac | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.darwin-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.darwin-arm64.tgz) | - -{{site.data.alerts.callout_success}} -For previous binaries, refer to the [MOLT version manifest](https://molt.cockroachdb.com/molt/cli/versions.html). -{{site.data.alerts.end}} - -To set up MOLT Verify: - -1. Rename the binary to `molt` and add it to your `PATH` so you can execute the `molt verify` command from any shell. -1. Get the connection strings for the source database and [CockroachDB]({% link {{ page.version.version }}/connect-to-the-database.md %}). -1. Make sure the SQL user running MOLT Verify has read privileges on the necessary tables. -1. Run MOLT Verify: - - The `molt verify` command takes two SQL connection strings as `--source` and `--target` arguments. - - To compare a PostgreSQL database with a CockroachDB database: - - {% include_cached copy-clipboard.html %} - ~~~ shell - ./molt verify \ - --source 'postgresql://{username}:{password}@{host}:{port}/{database}' \ - --target 'postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full' - ~~~ - - To compare a MySQL database with a CockroachDB database: - - {% include_cached copy-clipboard.html %} - ~~~ shell - ./molt verify \ - --source 'mysql://{username}:{password}@{protocol}({host}:{port})/{database}' \ - --target 'postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full' - ~~~ - - You can use the optional [supported flags](#supported-flags) to customize the verification results. - -1. Review the verification results: - - Running the MOLT Verify tool will show if there are any missing rows or extraneous tables in the target database. If any data is missing, you can [add the missing data]({% link {{ page.version.version }}/insert.md %}) to the target database and run `./molt verify` again. - - {{site.data.alerts.callout_info}} - Be aware of data type differences. For example, if your source MySQL table uses an auto-incrementing ID, MOLT Verify will identify a difference in the table definitions when comparing with CockroachDB's [`UUID`]({% link {{ page.version.version }}/uuid.md %}) type. In such cases, you might have to perform extra steps, such as [creating composite types]({% link {{ page.version.version }}/create-type.md %}#create-a-composite-data-type) within the target database that use the auto-incrementing ID and other types to maintain referential integrity. - {{site.data.alerts.end}} - -## Supported flags - -Flag | Description -----------|------------ -`--source` | (Required) Connection string for the source database. -`--target` | (Required) Connection string for the target database. -`--concurrency` | Number of shards to process at a time.
**Default:** 16
For faster verification, set this flag to a higher value. {% comment %}
Note: Table splitting by shard only works for [`INT`]({% link {{ page.version.version }}/int.md %}), [`UUID`]({% link {{ page.version.version }}/uuid.md %}), and [`FLOAT`]({% link {{ page.version.version }}/float.md %}) data types.{% endcomment %} -`--row-batch-size` | Number of rows to get from a table at a time.
**Default:** 20000 -`--table-filter` | Verify tables that match a specified [regular expression](https://wikipedia.org/wiki/Regular_expression). -`--schema-filter` | Verify schemas that match a specified [regular expression](https://wikipedia.org/wiki/Regular_expression). -`--continuous` | Verify tables in a continuous loop.
**Default:** `false` -`--live` | Retry verification on rows before emitting warnings or errors. This is useful during live data import, when temporary mismatches can occur.
**Default:** `false` - -## Limitations - -- While verifying data, MOLT Verify pages 20,000 rows at a time by default, and row values can change in between, which can lead to temporary inconsistencies in data. Enable `--live` mode to have the tool retry verification on these rows. You can also change the row batch size using the `--row_batch_size` [flag](#supported-flags). -- MySQL enums and set types are not supported. -- MOLT Verify checks for collation mismatches on [primary key]({% link {{ page.version.version }}/primary-key.md %}) columns. This may cause validation to fail when a [`STRING`]({% link {{ page.version.version }}/string.md %}) is used as a primary key and the source and target databases are using different [collations]({% link {{ page.version.version }}/collate.md %}). -- MOLT Verify only supports comparing one MySQL database to a whole CockroachDB schema (which is assumed to be `public`). -- MOLT Verify might give an error in case of schema changes on either the source or target database. -- [Geospatial types]({% link {{ page.version.version }}/spatial-data-overview.md %}#spatial-objects) cannot yet be compared. - -## See also - -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) diff --git a/src/current/v23.2/live-migration-service.md b/src/current/v23.2/live-migration-service.md deleted file mode 100644 index 57c1f1ea14e..00000000000 --- a/src/current/v23.2/live-migration-service.md +++ /dev/null @@ -1,892 +0,0 @@ ---- -title: Use the Live Migration Service -summary: Learn how to use the Live Migration Service to shadow application traffic, perform cutover, and migrate a database to CockroachDB. -toc: true -docs_area: migrate ---- - -{{site.data.alerts.callout_info}} -{% include feature-phases/preview.md %} -{{site.data.alerts.end}} - -MOLT LMS (Live Migration Service) is used during a [live migration]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime) to CockroachDB. - -The LMS is a self-hosted, horizontally scalable proxy that routes traffic between an application, a source database, and a target CockroachDB database. You use the LMS to control which database, as the "source of truth", is serving reads and writes to an application. You can optionally configure the LMS to [shadow production traffic](#shadowing-modes) from the source database and validate the query results on CockroachDB. When you have sufficiently tested your application and are confident with its consistency and performance on CockroachDB, you use the LMS to [perform the cutover](#perform-a-cutover) to CockroachDB. - -MOLT LMS is self-hosted on [Kubernetes](https://kubernetes.io/) and [configured using Helm](#configuration). At a high level, the LMS consists of the following: - -- A number of proxy [instances](#lms-instances) (running in separate Kubernetes pods) across which application traffic is distributed and routed to the source and target databases. -- An "orchestrator" service (running in a single Kubernetes pod) that coordinates the proxy instances and sends the cutover commands. - -This page describes how to [install](#installation), [configure](#configuration), [secure](#security), and [use the LMS](#molt-lms-cli) to perform a live migration. {% comment %}For more information, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}).{% endcomment %} - -For a demo of the Live Migration Service in action, watch the following video: - -{% include_cached youtube.html video_id="HA8ec9e_a-s" %} - -## Terminology - -- A *live migration* keeps two production databases online (a source and a target database) and uses either replication or dual writing to keep data identical between them until a final cutover. -- The *source of truth* is the database that serves reads and writes to the application during a live migration. A cutover switches the source of truth. -- *Shadowing* is the execution of source SQL statements on the target database in parallel. The LMS supports multiple [shadowing modes](#shadowing-modes). - -## Requirements - -- [Kubernetes](https://kubernetes.io/) cluster -- [Helm](https://helm.sh/docs/intro/install/) package manager for Kubernetes - -#### Supported database technologies - -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) (source) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) (source) -- CockroachDB (source and target) - -## Installation - -1. Add the Helm chart repository at `https://molt.cockroachdb.com/lms/charts/` with [`helm repo add`](https://helm.sh/docs/helm/helm_repo_add/). Then install the chart with [`helm install`](https://helm.sh/docs/helm/helm_install/). For example: - - {% include_cached copy-clipboard.html %} - ~~~ shell - helm repo add lms https://molt.cockroachdb.com/lms/charts/ - helm install lms lms/lms - ~~~ - -1. Port-forward from your local machine to the orchestrator, using the release name that you specified with `helm install`. The orchestrator port is configurable and is [`4200` by default](#service-type). - - {% include_cached copy-clipboard.html %} - ~~~ shell - kubectl port-forward svc/{releasename}-lms-orchestrator 4200:4200 - ~~~ - - {{site.data.alerts.callout_success}} - If you named the release `lms`, exclude `{releasename}-` from the command. - {{site.data.alerts.end}} - -1. The LMS proxy instances and orchestrator are initialized as Kubernetes pods: - - {% include_cached copy-clipboard.html %} - ~~~ shell - kubectl get pods - ~~~ - - ~~~ - NAME READY STATUS RESTARTS AGE - lms-orchestrator-86779b87f7-qrk9q 1/1 Running 0 52s - lms-576bffdd8c-pmh6g 1/1 Running 0 52s - lms-576bffdd8c-pbdvl 1/1 Running 0 52s - lms-576bffdd8c-s7kx4 1/1 Running 0 52s - ... - ~~~ - - You will see `lms` pods that match the configured [number of LMS instances](#lms-instances), along with one `lms-orchestrator` pod. - - The pod names are prefixed with the release name you specified when running `helm install`, unless you named the release `lms`. - -## Configuration - -To configure the LMS, override the [Helm chart values](https://github.com/cockroachdb/molt-helm-charts/blob/main/lms/values.yaml). This involves a rolling restart of your pods. For information on setting Helm chart values, see the [Helm documentation](https://helm.sh/docs/helm/helm_upgrade/). - -This section describes the most important and commonly used values. For details on all configurable values, refer to the [`values.yaml`](https://github.com/cockroachdb/molt-helm-charts/blob/main/lms/values.yaml) file. - -#### LMS version - -~~~ yaml -image: - tag: 0.2.4 -~~~ - -`image.tag` specifies the LMS version. This **must** match the installed [`molt-lms-cli`](#molt-lms-cli) version, which can be queried with `molt-lms-cli version`. - -{% comment %} -For release details, see the [MOLT changelog]({% link releases/molt-releases.md %}). -{% endcomment %} - -#### Source dialect - -~~~ yaml -lms: - sourceDialect: "" -... -orchestrator: - sourceDialect: "" -~~~ - -You **must** provide a string value for `sourceDialect`, which specifies the dialect of your source database. Supported dialects are: - -- `postgres`: PostgreSQL -- `mysql`: MySQL -- `cockroach`: CockroachDB - -#### Shadowing - -~~~ yaml -lms: - shadowMode: none -~~~ - -`lms.shadowMode` specifies the shadowing behavior used by the LMS. This should depend on your specific migration requirements. For details, see [Shadowing modes](#shadowing-modes). - -#### LMS instances - -~~~ yaml -lms: - replicaCount: 3 -~~~ - -`lms.replicaCount` determines the number of LMS instances created as `lms` pods on the Kubernetes cluster, across which application traffic is distributed. This defaults to `3`. - -#### LMS connection - -~~~ yaml -lms: - allowTLSDisable: false -~~~ - -`lms.allowTLSDisable` enables insecure LMS connections to databases, and is disabled by default. Enable insecure connections **only** if a secure SSL/TLS connection to the source or target database is not possible. When possible, [secure SSL/TLS connections](#security) should be used. - -{{site.data.alerts.callout_success}} -You can also set the `--allow-tls-mode-disable` [flag](#global-flags) to enable insecure LMS connections. -{{site.data.alerts.end}} - -By default, `allowTLSDisable` is set to `false`, and initiating an insecure connection will return the following error: - -~~~ -SSL key or cert is not found. By default, secure (TLS) connections are required. If a secure connection is not possible, set the --allow-tls-mode-disable flag to skip this check. -~~~ - -#### Connection strings - -The following connection strings are specific to your configuration: - -- External connection string for the source database. -- External connection string for the target CockroachDB database. -- Internal connection string for the LMS. - -You should specify these in external Kubernetes secrets. For details, see [Manage external secret](#manage-external-secrets). - -{{site.data.alerts.callout_danger}} -Storing sensitive keys in external secrets is **strongly** recommended. -{{site.data.alerts.end}} - -#### Service type - -~~~ yaml -lms: - service: - type: ClusterIP - port: 9043 - metricsPort: 9044 -... -orchestrator: - service: - type: ClusterIP - port: 4200 - metricsPort: 4201 -~~~ - -`service` specifies the [Kubernetes service type](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) and ports for the LMS instances and orchestrator. - -#### Prometheus Operator - -~~~ yaml -serviceMonitor: - enabled: false - labels: {} - annotations: {} - interval: 30s - namespaced: false -~~~ - -`serviceMonitor` is a custom resource used with the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/tree/main) for monitoring Kubernetes. For more information, see the [Prometheus Operator documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md). - -## Security - -{{site.data.alerts.callout_danger}} -Cockroach Labs **strongly** recommends the following: - -- Manage your LMS and orchestrator configurations in [external Kubernetes secrets](#manage-external-secrets). -- To establish secure connections between the LMS pods and with your client, generate and set up TLS certificates for the [source database and CockroachDB](#configure-an-lms-secret), [LMS](#configure-the-lms-certificates), and [orchestrator](#configure-the-orchestrator-and-client-certificates). -{{site.data.alerts.end}} - -By default, initiating an insecure connection will return an error. You can override this check by setting either the `--allow-tls-mode-disable` [flag](#global-flags) or `allowTLSDisable` in the [Helm configuration](#lms-connection). Enable insecure connections **only** if secure SSL/TLS connections to the source or target database are not possible. - -#### Manage external secrets - -Cockroach Labs recommends using [External Secrets Operator](https://external-secrets.io/latest/) to [create and manage Kubernetes secrets](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-externalsecret) that contain: - -- [Your LMS configuration](#configure-an-lms-secret), which includes the source and target database connection strings. -- [Your orchestrator configuration](#configure-an-orchestrator-secret), which includes the LMS and target database connection strings. -- Your [LMS](#configure-the-lms-certificates) and [orchestrator](#configure-the-orchestrator-and-client-certificates) certificates, which you should have generated separately. - -For information on Kubernetes secrets, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/). - -#### Configure an LMS secret - -Create an external secret that specifies the connection strings for the source and target CockroachDB database. - -For example, the following `ExternalSecret` called `lms-config` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `lms-secret`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: lms-config -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: lms-config - creationPolicy: Owner - template: - engineVersion: v2 - data: - config.json: | - { - "INIT_SOURCE": "{% raw %}{{ .source }}{% endraw %}", - "INIT_TARGET": "{% raw %}{{ .target }}{% endraw %}" - } - data: - - secretKey: source - remoteRef: - key: lms-secret - property: INIT_SOURCE - - secretKey: target - remoteRef: - key: lms-secret - property: INIT_TARGET -~~~ - -The connection strings are specified with the following keys inside `config.json`: - -- `INIT_SOURCE`: External connection string for the source database, including the paths to your client certificate and keys. -- `INIT_TARGET`: External [connection string for the CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. - -The remote secret `lms-secret` will contain the full connection strings and paths, such that the `config.json` keys resolve to: - -~~~ json -"INIT_SOURCE": "mysql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full?sslrootcert=path/to/mysql.ca&sslcert=path/to/mysql.crt&sslkey=path/to/mysql.key", -"INIT_TARGET": "postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full?sslrootcert=path/to/ca.crt&sslcert=path/to/client.username.crt&sslkey=path/to/client.username.key" -~~~ - -In the [Helm configuration](#configuration), `lms.configSecretName` must specify the external secret `name`: - -~~~ yaml -lms: - configSecretName: "lms-config" -~~~ - -#### Configure an orchestrator secret - -Create an external secret that specifies the connection strings for the LMS and target CockroachDB database. - -For example, the following `ExternalSecret` called `orch-config` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `orch-secret`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: orch-config -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: orch-config - creationPolicy: Owner - template: - engineVersion: v2 - data: - config.json: | - { - "LMS_URL": "{% raw %}{{ .lmsUrl }}{% endraw %}", - "CRDB_URL": "{% raw %}{{ .crdbUrl }}{% endraw %}" - } - data: - - secretKey: lmsUrl - remoteRef: - key: orch-secret - property: LMS_URL - - secretKey: crdbUrl - remoteRef: - key: orch-secret - property: CRDB_URL -~~~ - -The connection strings are specified with the following keys inside `config.json`: - -- `LMS_URL`: Internal connection string for the LMS, specifying the username and password of the source database. The format depends on your source dialect: - - - MySQL: `{username}:{password}@({releasename}-lms.{namespace}.svc.cluster.local:{port})/{database}` - - PostgreSQL: `postgresql://{username}:{password}@{releasename}-lms.{namespace}.svc.cluster.local:{port}/{database}` - - {{site.data.alerts.callout_success}} - If you named the release `lms` during [installation](#installation), exclude `{releasename}-` from the LMS connection string. - {{site.data.alerts.end}} - -- `CRDB_URL`: External [connection string for the CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url), including the paths to your client certificate and keys. - -The remote secret `orch-secret` will contain the full connection strings, such that the `config.json` keys resolve to: - -~~~ json -"LMS_URL": "{username}:{password}@({releasename}-molt-lms.{namespace}.svc.cluster.local:{port})/{database}", -"CRDB_URL": "postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full?sslrootcert=path/to/ca.crt&sslcert=path/to/client.username.crt&sslkey=path/to/client.username.key" -~~~ - -In the [Helm configuration](#configuration), `orchestrator.configSecretName` must specify the external secret `name`: - -~~~ yaml -orchestrator: - configSecretName: "orch-config" -~~~ - -#### Configure the LMS certificates - -Create an external secret that specifies the LMS certificate, key, and (optional) CA certificate. - -For example, the following `ExternalSecret` called `lms-tls` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `lms-certs`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: lms-tls -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: lms-tls - creationPolicy: Owner - template: - engineVersion: v2 - data: - lms-ca.crt: '{% raw %}{{ .caCert }}{% endraw %}' - lms-tls.crt: '{% raw %}{{ .serverCert }}{% endraw %}' - lms-tls.key: '{% raw %}{{ .serverKey }}{% endraw %}' - data: - - secretKey: caCert - remoteRef: - key: lms-certs - property: caCert - - secretKey: serverCert - remoteRef: - key: lms-certs - property: serverCert - - secretKey: serverKey - remoteRef: - key: lms-certs - property: serverKey -~~~ - -In the preceding example, each `.crt` and `.key` filename is associated with its corresponding value in the remote secret `lms-certs`. - -In the [Helm configuration](#configuration), `lms.sslVolumes` and `lms.sslVolumeMounts` must specify [volumes](https://kubernetes.io/docs/concepts/storage/volumes/#secret) and mount paths that contain the server-side certificates. The path to each file is specified as an environment variable in `lms.env`. Cockroach Labs recommends mounting certificates to `/app/certs`. - -{{site.data.alerts.callout_info}} -Certificates **must** be mounted in a readable format, or the LMS will error. The format should match the output of `cat {certificate}` on your host machine. -{{site.data.alerts.end}} - -~~~ yaml -lms: - sslVolumes: - - name: lms-tls - secret: - secretName: lms-tls - sslVolumeMounts: - - mountPath: "/app/certs" - name: lms-tls - readOnly: true - env: - - name: LMS_SSL_CA - value: /app/certs/lms-ca.crt - - name: LMS_SSL_CERT - value: /app/certs/lms-tls.crt - - name: LMS_SSL_KEY - value: /app/certs/lms-tls.key -~~~ - -#### Configure the orchestrator and client certificates - -Create an external secret that specifies the orchestrator certificate, key, and (optional) CA certificate. - -For example, the following `ExternalSecret` called `orch-tls` uses AWS Secrets Manager as the [`SecretStore`](https://external-secrets.io/latest/introduction/getting-started/#create-your-first-secretstore), and references a remote [AWS secret](https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html) called `orch-certs`: - -~~~ yaml -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: orch-tls -spec: - refreshInterval: 1h - secretStoreRef: - name: aws-secret-store - kind: SecretStore - target: - name: orch-tls - creationPolicy: Owner - template: - engineVersion: v2 - data: - orch-ca.crt: '{% raw %}{{ .caCert }}{% endraw %}' - orch-tls.crt: '{% raw %}{{ .serverCert }}{% endraw %}' - orch-tls.key: '{% raw %}{{ .serverKey }}{% endraw %}' - data: - - secretKey: caCert - remoteRef: - key: orch-certs - property: caCert - - secretKey: serverCert - remoteRef: - key: orch-certs - property: serverCert - - secretKey: serverKey - remoteRef: - key: orch-certs - property: serverKey -~~~ - -In the preceding example, each `.crt` and `.key` filename is associated with its corresponding value in the remote secret `orch-certs`. - -In the [Helm configuration](#configuration), `orchestrator.sslVolumes` and `orchestrator.sslVolumeMounts` must specify [volumes](https://kubernetes.io/docs/concepts/storage/volumes/#secret) and mount paths that contain the server-side certificates. The path to each file is specified as an environment variable in `orchestrator.env`. Cockroach Labs recommends mounting certificates to `/app/certs`. - -{{site.data.alerts.callout_info}} -Certificates **must** be mounted in a readable format, or the LMS will error. The format should match the output of `cat {certificate}`. -{{site.data.alerts.end}} - -~~~ yaml -orchestrator: - sslVolumes: - - name: orch-tls - secret: - secretName: orch-tls - sslVolumeMounts: - - mountPath: "/app/certs" - name: orch-tls - readOnly: true - env: - - name: ORCH_CA_TLS_CERT - value: /app/certs/orch-ca.crt - - name: ORCH_TLS_CERT - value: /app/certs/orch-tls.crt - - name: ORCH_TLS_KEY - value: /app/certs/orch-tls.key -~~~ - -You will also need to create and specify a CLI client certificate, key, and (optional) CA certificate. It's easiest to specify these as environment variables in the shell that is running `molt-lms-cli`: - -{% include_cached copy-clipboard.html %} -~~~ shell -export CLI_TLS_CA_CERT="{path-to-cli-ca-cert}" -~~~ - -{% include_cached copy-clipboard.html %} -~~~ shell -export CLI_TLS_CLIENT_CERT="{path-to-cli-client-cert}" -~~~ - -{% include_cached copy-clipboard.html %} -~~~ shell -export CLI_TLS_CLIENT_KEY="{path-to-cli-client-key}" -~~~ - -## `molt-lms-cli` - -The `molt-lms-cli` command-line interface is used to inspect the LMS instances and [perform cutover](#perform-a-cutover). - -To install `molt-lms-cli`, download the binary that matches your system. To download the latest binary: - -| Operating System | AMD 64-bit | ARM 64-bit | -|------------------|----------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------| -| Windows | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.windows-amd64.tgz) | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.windows-arm64.tgz) | -| Linux | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.linux-amd64.tgz) | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.linux-arm64.tgz) | -| Mac | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.darwin-amd64.tgz) | [Download](https://molt.cockroachdb.com/lms/cli/molt-lms-cli-latest.darwin-arm64.tgz) | - -For previous binaries, see the [MOLT version manifest](https://molt.cockroachdb.com/lms/cli/versions.html). The `molt-lms-cli` version **must** match the [configured LMS version](#lms-version). - -### Commands - -| Command | Usage | -|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `connections list` | List all client connections to the LMS and their most recent queries. See additional [flags](#connections-list-flags) and [usage example](#connections-list). | -| `cutover consistent` | Specify a [consistent cutover](#consistent-cutover). You must also specify `begin`, `commit`, or `abort`. See [subcommands](#subcommands) and [usage example](#consistent-cutover). | -| `cutover get_metadata` | Display metadata for a cutover attempt, specified with its cutover attempt ID. For example, `cutover get_metadata -i {cutover attempt ID}`. See additional [flags](#cutover-get_metadata-flags) and [usage example](#cutover-get_metadata). | -| `status` | Display the current configuration of the LMS instances. See additonal [flags](#status-flags) and [usage example](#status). | - - -{% comment %} -| `cutover immediate` | Initiate an [immediate cutover](#immediate-cutover). This switches the source of truth to the target database. For usage details, see [Immediate cutover](#immediate-cutover). | -{% endcomment %} - -#### Subcommands - -The following subcommands are run after the `cutover consistent` command. - -| Subcommand | Usage | -|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `begin` | Begin a consistent cutover. This pauses traffic to the source database. See additional [flags](#cutover-consistent-begin-flags) and [example](#consistent-cutover). | -| `commit` | Commit a consistent cutover. This resumes traffic and sends it to the **target** database, which becomes the source of truth. This is only effective after running `cutover consistent begin`. See [example](#consistent-cutover). | -| `abort` | Abort a consistent cutover after running `cutover consistent begin`, unless you have also run `cutover consistent commit`. This resumes traffic to the source database. | - -### Flags - -#### Global flags - -| Flag | Description | -|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `--allow-tls-mode-disable` | Allow insecure LMS connections to databases. [Secure SSL/TLS connections](#security) should be used by default. This should be enabled **only** if secure SSL/TLS connections to the source or target database are not possible.

Alternatively, set `lms.allowTLSDisable` in the [Helm configuration](#lms-connection). | -| `--orchestrator-url` | The URL for the orchestrator, using the [configured port](#service-type). Prefix the URL with `https` instead of `http` when using [certificates](#security). This flag is **required** unless the value is exported as an environment variable using `export CLI_ORCHESTRATOR_URL="{orchestrator-URL}"`. | -| `--tls-ca-cert` | The path to the CA certificate. This can also be [exported](#configure-the-orchestrator-and-client-certificates) as an environment variable using `export CLI_TLS_CA_CERT="{path-to-cli-ca-cert}"`. | -| `--tls-client-cert` | The path to the client certificate. This can also be [exported](#configure-the-orchestrator-and-client-certificates) as an environment variable using `export CLI_TLS_CLIENT_CERT="{path-to-cli-client-cert}"`. | -| `--tls-client-key` | The path to the client key. This can also be [exported](#configure-the-orchestrator-and-client-certificates) as an environment variable using `export CLI_TLS_CLIENT_KEY="{path-to-cli-client-key}"`. | - -#### `connections list` flags - -| Flag | Description | -|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `-l`, `--lms-addresses` | LMS instances to run the command against. This can be a comma-separated list of IP addresses (e.g., `127.0.0.1`), IP addresses and ports (e.g., `127.0.0.1:1024`), or hostnames (e.g., `https://lms.net`). | -| `-o`, `--output-type` | Specify whether `molt-lms-cli` output is formatted in `json` or `table` format.

**Default:** `table` | - -#### `cutover consistent begin` flags - - -| Flag | Description | -|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `--abort-begin-timeout` | Maximum duration for the orchestrator to wait for confirmation from all LMS instances that cutover successfully aborted, after receiving a `ctrl-c` command from `molt-lms-cli`. This affects the performance of the `ctrl-c` command only.

**Default:** `"2s"` | -| `--begin-timeout` | Maximum duration to wait before traffic to the LMS is paused for consistent cutover; i.e., time limit for all connections to be transaction- and query-free. This should be the approximate length of the longest-running transaction, e.g., `30s`. If no `--begin-timeout` value is specified, the LMS waits for all transactions and queries to finish before pausing traffic. | -| `-l`, `--lms-addresses` | LMS instances to run the command against. This can be a comma-separated list of IP addresses (e.g., `127.0.0.1`), IP addresses and ports (e.g., `127.0.0.1:1024`), or hostnames (e.g., `https://lms.net`). By default, cutover is performed on all LMS instances deployed in the same namespace as the orchestrator. For example, if the orchestrator and all LMS instances are deployed on the same namespace on a Kubernetes cluster, the orchestrator automatically detects the addresses of all LMS instances, and no user configuration is needed. LMS addresses should only be manually specified if the orchestrator is deployed in a different namespace than the instances. | - -#### `cutover consistent abort` flags - -| Flag | Description | -|-----------------------|-------------------------------------------------------------------------------------------------------------| -| `-o`, `--output-type` | Specify whether `molt-lms-cli` output is formatted in `json` or `table` format.

**Default:** `table` | - -#### `cutover get_metadata` flags - -| Flag | Description | -|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `-i`, `--cutover-id` | ID of the cutover attempt. | -| `-o`, `--output-type` | Specify whether `molt-lms-cli` output is formatted in `json` or `table` format.

**Default:** `table` | - -#### `status` flags - -| Flag | Description | -|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `-l`, `--lms-addresses` | LMS instances to run the command against. This can be a comma-separated list of IP addresses (e.g., `127.0.0.1`), IP addresses and ports (e.g., `127.0.0.1:1024`), or hostnames (e.g., `https://lms.net`). | -| `-o`, `--output-type` | Specify whether `molt-lms-cli` output is formatted in `json` or `table` format.

**Default:** `table` | - -## Shadowing modes - -The LMS can be configured to shadow production traffic from the source database and validate the query results on the target. The exact behavior is configured with the [`shadowMode`](#shadowing) Helm value. - -### `none` - -MOLT LMS shadowing mode - none - -`shadowMode: none` disables shadowing. - -- The LMS sends application requests to the source of truth only. -- Query results from the source of truth are returned to the application. -- Writes must be manually replicated from the source database to the target database. - -You **must** use the `none` shadowing mode to perform a [consistent cutover](#consistent-cutover), along with a database replication technology that replicates writes to the target database. - -### `async` - -MOLT LMS shadowing mode - async - -`shadowMode: async` writes to both databases. - -- The LMS sends application requests to the source of truth and target database in asynchronous threads, and waits only for the source of truth to respond. -- Query results from the source of truth are returned to the application. -- If an asynchronous request has not yet completed, subsequent asynchronous requests will be permanently dropped. - -You can use this mode to confirm that your queries succeed on CockroachDB without verifying performance or correctness. - -{{site.data.alerts.callout_info}} -`async` mode is intended for testing purposes. -{{site.data.alerts.end}} - -### `sync` - -MOLT LMS shadowing mode - sync - -`shadowMode: sync` writes to both databases. - -- The LMS sends application requests to the source of truth and the target database, and waits for each to respond. -- Query results from the source of truth are returned to the application. -- Query results from the non-source of truth are discarded. - -{% comment %} -You can use this mode to perform an [immediate cutover](#immediate-cutover). -{% endcomment %} - -### `strict-sync` - -MOLT LMS shadowing mode - strict-sync - -`shadowMode: strict-sync` writes to both databases and enforces correctness on both databases. - -- The LMS sends application requests to the source of truth and the target database, and waits for each to respond. -- Query results from the source of truth are returned to the application. -- If the query returns an error on the source of truth, that error is returned to the application. If the query succeeds on the source of truth but fails on the target, the error from the target is returned to the application. -- If the query fails on both databases, the target will return the error from the source of truth. - -{% comment %} -You can use this mode to perform an [immediate cutover](#immediate-cutover). -{% endcomment %} - -## Perform a cutover - -### Consistent cutover - -A consistent cutover maintains data consistency with [minimal downtime]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime). The goal of consistent cutover is to stop application traffic long enough for replication to catch up and ensure that the cutover achieves consistency across the two databases. - -When using the LMS, consistent cutover is handled using the [`molt-lms-cli`](#molt-lms-cli) commands `cutover consistent begin` and `cutover consistent commit`, during which application requests are queued and will be responded to after cutover. This delay in response time is related to the maximum duration of any transactions and queries that need to complete, and the time it takes for replication to catch up from the source to the target database. - -{% comment %} -For more information about the consistent cutover approach, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}). -{% endcomment %} - -{{site.data.alerts.callout_info}} -These steps assume you have already followed the overall steps to [prepare for migration]({% link {{ page.version.version }}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{ page.version.version }}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. -{{site.data.alerts.end}} - -To perform a consistent cutover with the LMS: - -1. [Configure the LMS](#configuration) with your deployment details, and follow our [security recommendations](#security). - -1. Set the shadowing mode to [`none`](#none). - - {% comment %} - {{site.data.alerts.callout_danger}} - Do not use the [`sync`](#sync) or [`strict-sync`](#strict-sync) shadowing modes when performing a consistent cutover. Data correctness and consistency cannot be guaranteed in these configurations. - {{site.data.alerts.end}} - {% endcomment %} - -1. Set up ongoing replication between the source database and CockroachDB, using a tool that replicates writes to the target database. - -1. Send application requests to the LMS, which routes the traffic to the source database. The source database is designated the source of truth. - -1. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. - -1. Begin the consistent cutover. **Requests are now queued in the LMS**, including queries from existing connections and new connection requests to the LMS: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover consistent begin {flags} - ~~~ - - ~~~ - Pausing traffic, press Ctrl-C to exit cutover and resume the traffic. - Successfully began consistent cutover with ID 1. - - To check the status of this command, please run: - molt-lms-cli cutover get_metadata -i 1 - ~~~ - - This command tells the LMS to pause all application traffic to the source of truth. The LMS then waits for transactions to complete and prepared statements to close. - -1. Verify that replication on CockroachDB has caught up with the source of truth. For example, insert a row on the source database and check that the row exists on CockroachDB. - - If you have an implementation that replicates back to the source database, this should be enabled before committing the cutover. - -1. Once all writes have been replicated to the target database, commit the consistent cutover: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover consistent commit {flags} - ~~~ - - ~~~ - |-----------------|-----------------|-----------------|----------------------------|----------------------------|----------------------------| - | LMS ADDRESS | SOURCE OF TRUTH | STATUS | TRIGGER TIME | TRAFFIC PAUSED TIME | COMMIT TIME | - |-----------------|-----------------|-----------------|----------------------------|----------------------------|----------------------------| - | demo-lms-1:9043 | Cockroach | CommitCompleted | 2024-02-15 19:10:06.991048 | 2024-02-15 19:10:06.991048 | 2024-02-15 19:14:22.95108 | - | | | | +0000 UTC | +0000 UTC | +0000 UTC | - | demo-lms-2:9043 | Cockroach | CommitCompleted | 2024-02-15 19:10:06.991063 | 2024-02-15 19:10:06.991063 | 2024-02-15 19:14:22.950753 | - | | | | +0000 UTC | +0000 UTC | +0000 UTC | - | demo-lms-3:9043 | Cockroach | CommitCompleted | 2024-02-15 19:10:06.992059 | 2024-02-15 19:10:06.992059 | 2024-02-15 19:14:22.950753 | - | | | | +0000 UTC | +0000 UTC | +0000 UTC | - |-----------------|-----------------|-----------------|----------------------------|----------------------------|----------------------------| - LMS-Specific Consistent Cutover Metadata. - ~~~ - - This command tells the LMS to switch the source of truth to the target database. Application traffic is now routed to the target database, and requests are processed from the queue in the LMS. - - To verify that CockroachDB is now the source of truth, you can run `molt-lms-cli status`. - -1. Again, use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the data on the source database and CockroachDB are consistent. - -If any problems arise during a consistent cutover: - -- After running `cutover consistent begin`: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover consistent abort {flags} - ~~~ - - This command tells the LMS to resume application traffic to the source of truth, which has not yet been switched. Cutover **cannot** be aborted after running `cutover consistent commit`. - -- After running `cutover consistent commit`: - - Reissue the `cutover consistent begin` and `cutover consistent commit` commands to revert the source of truth to the source database. - -{% comment %} -### Immediate cutover - -An immediate cutover can potentially [reduce downtime to zero]({% link {{ page.version.version }}/migration-overview.md %}#minimal-downtime), at the likely risk of introducing data inconsistencies between the source and target databases. The LMS is configured to dual write to the source and target databases, while the [`molt-lms-cli`](#molt-lms-cli) command `cutover immediate` initiates cutover. - -For more information about the immediate cutover approach, see [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}). - -To perform an immediate cutover with the LMS: - -{{site.data.alerts.callout_info}} -These steps assume you have already followed the overall steps to [prepare for migration]({% link {{ page.version.version }}/migration-overview.md %}#prepare-for-migration). In particular, [update your schema and application queries]({% link {{ page.version.version }}/migration-overview.md %}#update-the-schema-and-queries) to work with CockroachDB. -{{site.data.alerts.end}} - -1. [Configure the LMS](#configuration) with your deployment details, and follow our [security recommendations](#security). - -1. Set the shadowing mode to [`sync`](#sync) or [`strict-sync`](#strict-sync). - -1. Send application requests to the LMS, which routes the traffic to the source database and to CockroachDB. The source database is designated the source of truth. - -1. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate that the replicated data on CockroachDB is consistent with the source of truth. - - To ensure data integrity, shadowing must be enabled for a sufficient duration with a low error rate. All LMS instances should have been continuously shadowing your workload for the past **seven days** at minimum, with only transient inconsistencies caused by events such as [transaction retry errors]({% link {{ page.version.version }}/transaction-retry-error-reference.md %}). The longer shadowing has been enabled, the better this allows you to evaluate consistency. - -1. Once nearly all data from the source database is replicated to CockroachDB (for example, with a <1 second delay or <1000 rows), initiate the cutover: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt-lms-cli cutover immediate {flags} - ~~~ - - This command tells the LMS to switch the source of truth to CockroachDB. Application traffic is immediately directed to CockroachDB. - -1. Any writes that were made during the cutover will have been missed on CockroachDB. Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to identify the inconsistencies. These will need to be manually reconciled. -{% endcomment %} - -### Monitor cutover - -You can monitor your cutover attempts with the following commands. - -#### `connections list` - -`molt-lms-cli connections list` outputs client connection details from each LMS instance, including the most recent query and error, if any. - -For example: - -{% include_cached copy-clipboard.html %} -~~~ shell -molt-lms-cli connections list --output-type=json -~~~ - -~~~ json -{ - "error": "", - "last_query": "", - "remote_address": "10.42.0.2:9043", - "source_address": "10.42.0.5:36676", - "source_of_truth": "MySQL" -} -{ - "error": "", - "last_query": "", - "remote_address": "10.42.0.4:9043", - "source_address": "10.42.0.5:34620", - "source_of_truth": "MySQL" -} -{ - "error": "", - "last_query": "", - "remote_address": "10.42.0.6:9043", - "source_address": "10.42.0.5:41052", - "source_of_truth": "MySQL" -} -Connections Details. -~~~ - -In the preceding output, `remote_address` is the address of the LMS instance, and `source_address` is the address of the client connected to the LMS instance. - -#### `cutover get_metadata` - -`molt-lms-cli cutover get_metadata` outputs the metadata for a specific cutover attempt initiated with `cutover consistent begin`. - -For example, specifying cutover attempt `1`: - -{% include_cached copy-clipboard.html %} -~~~ shell -molt-lms-cli cutover get_metadata --cutover-id=1 -~~~ - -~~~ -cutover metadata for attempt id 1: -+-----------------+-----------------+--------------------------------+--------------------------------+--------------------------------+ -| SOURCE OF TRUTH | STATUS | TRIGGER TIME | TRAFFIC PAUSED TIME | COMMIT TIME | -+-----------------+-----------------+--------------------------------+--------------------------------+--------------------------------+ -| Cockroach | CommitCompleted | 2024-02-15 19:10:06.974503749 | 2024-02-15 19:10:06.998836442 | 2024-02-15 19:14:22.951534482 | -| | | +0000 UTC m=+736.110121156 | +0000 UTC m=+736.134453848 | +0000 UTC m=+992.087151888 | -+-----------------+-----------------+--------------------------------+--------------------------------+--------------------------------+ -Summarized Consistent Cutover Metadata. -+-----------------+-----------------+-----------------+--------------------------------+--------------------------------+--------------------------------+ -| LMS ADDRESS | SOURCE OF TRUTH | STATUS | TRIGGER TIME | TRAFFIC PAUSED TIME | COMMIT TIME | -+-----------------+-----------------+-----------------+--------------------------------+--------------------------------+--------------------------------+ -| demo-lms-1:9043 | Cockroach | CommitCompleted | 2024-02-15 19:10:06.991048 | 2024-02-15 19:10:06.991048 | 2024-02-15 19:14:22.95108 | -| | | | +0000 UTC | +0000 UTC | +0000 UTC | -| demo-lms-2:9043 | Cockroach | CommitCompleted | 2024-02-15 19:10:06.991063 | 2024-02-15 19:10:06.991063 | 2024-02-15 19:14:22.950753 | -| | | | +0000 UTC | +0000 UTC | +0000 UTC | -| demo-lms-3:9043 | Cockroach | CommitCompleted | 2024-02-15 19:10:06.992059 | 2024-02-15 19:10:06.992059 | 2024-02-15 19:14:22.950753 | -| | | | +0000 UTC | +0000 UTC | +0000 UTC | -+-----------------+-----------------+-----------------+--------------------------------+--------------------------------+--------------------------------+ -LMS-Specific Consistent Cutover Metadata. -~~~ - -In the preceding output: - -- `LMS ADDRESS` is the IP address and port number of the running LMS instance. -- `SOURCE OF TRUTH` is the database that was serving reads and writes to the LMS instance at the time that `cutover get_metadata` was run. -- `STATUS` is the status of the cutover attempt. This can indicate whether the cutover attempt began, aborted, paused traffic, committed, or encountered an error. In this example, `CommitCompleted` indicates that a consistent cutover was committed successfully with `cutover consistent commit`. -- `TRIGGER TIME` is the timestamp when the cutover attempt was initiated on the LMS instance. -- `TRAFFIC PAUSED TIME` is the timestamp when traffic to the LMS instance was paused during a consistent cutover attempt. -- `COMMIT TIME` is the timestamp when the cutover attempt was completed on the LMS instance. -- `ERROR` is the error encountered by the cutover attempt, if any. - -#### `status` - -`molt-lms-cli status` outputs the overall status of the LMS, including its [shadowing mode](#shadowing-modes), source and target database addresses, and any errors encountered on the LMS instances. - -{% include_cached copy-clipboard.html %} -~~~ shell -molt-lms-cli status --lms-addresses="demo-lms-1:9043,demo-lms-2:9043" -~~~ - -~~~ -+----------------+----------------+---------------+ -| SOURCE DIALECT | TARGET DIALECT | TRANSITIONING | -+----------------+----------------+---------------+ -| MySQL | Cockroach | false | -+----------------+----------------+---------------+ -Proxy Settings Table. -+-----------------+-------------+----------------+----------------+----------------+----------------+-------+ -| LMS ADDRESS | SHADOW MODE | SOURCE DIALECT | TARGET DIALECT | SOURCE ADDRESS | TARGET ADDRESS | ERROR | -+-----------------+-------------+----------------+----------------+----------------+----------------+-------+ -| demo-lms-1:9043 | None | MySQL | Cockroach | mysql:3306 | crdb:26257 | | -| demo-lms-2:9043 | None | MySQL | Cockroach | mysql:3306 | crdb:26257 | | -+-----------------+-------------+----------------+----------------+----------------+----------------+-------+ -Individual LMS Proxy Status. -~~~ - -## See also - -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -{% comment %}- [Migration Strategy: Live Migration]({% link {{ page.version.version }}/migration-strategy-live-migration.md %}){% endcomment %} -- [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) \ No newline at end of file diff --git a/src/current/v23.2/migrate-from-mysql.md b/src/current/v23.2/migrate-from-mysql.md index 18150b7df1e..2a1adcde9a2 100644 --- a/src/current/v23.2/migrate-from-mysql.md +++ b/src/current/v23.2/migrate-from-mysql.md @@ -34,7 +34,7 @@ Identifiers are case-sensitive in MySQL and [case-insensitive in CockroachDB]({% The MySQL [`AUTO_INCREMENT`](https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html) attribute, which creates sequential column values, is not supported in CockroachDB. When [using the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page?filters=mysql#convert-a-schema), columns with `AUTO_INCREMENT` can be converted to use [sequences]({% link {{ page.version.version }}/create-sequence.md %}), `UUID` values with [`gen_random_uuid()`]({% link {{ page.version.version }}/functions-and-operators.md %}#id-generation-functions), or unique `INT8` values using [`unique_rowid()`]({% link {{ page.version.version }}/functions-and-operators.md %}#id-generation-functions). Cockroach Labs does not recommend using a sequence to define a primary key column. For more information, see [Unique ID best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#unique-id-best-practices). {{site.data.alerts.callout_info}} -Changing a column type during schema conversion will cause [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to identify a type mismatch during [data validation](#step-3-validate-the-migrated-data). This is expected behavior. +Changing a column type during schema conversion will cause [MOLT Verify]({% link molt/molt-verify.md %}) to identify a type mismatch during [data validation](#step-3-validate-the-migrated-data). This is expected behavior. {{site.data.alerts.end}} #### `ENUM` type @@ -158,7 +158,7 @@ Use the [Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachclo Click **Save**. - This is a workaround to prevent [data validation](#step-3-validate-the-migrated-data) from failing due to collation mismatches. For more details, see the [MOLT Verify] ({% link {{ page.version.version }}/molt-verify.md %}#limitations) documentation. + This is a workaround to prevent [data validation](#step-3-validate-the-migrated-data) from failing due to collation mismatches. For more details, see the [MOLT Verify] ({% link molt/molt-verify.md %}#known-limitations) documentation. 1. Click [**Migrate Schema**](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page?filters=mysql#migrate-the-schema) to create a new {{ site.data.products.serverless }} cluster with the converted schema. Name the database `world`. @@ -358,9 +358,9 @@ By default, [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %} ### Step 3. Validate the migrated data -Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to check that the data on MySQL and CockroachDB are consistent. +Use [MOLT Verify]({% link molt/molt-verify.md %}) to check that the data on MySQL and CockroachDB are consistent. -1. [Install MOLT Verify.]({% link {{ page.version.version }}/molt-verify.md %}) +1. [Install MOLT Verify.]({% link molt/molt-verify.md %}) 1. In the directory where you installed MOLT Verify, use the following command to compare the two databases, specifying the [JDBC connection string for MySQL](https://dev.mysql.com/doc/connector-j/8.1/en/connector-j-reference-jdbc-url-format.html) with `--source` and the SQL connection string for CockroachDB with `--target`: @@ -403,7 +403,7 @@ To learn more, see the [Migration Overview]({% link {{ page.version.version }}/m - [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) - [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [Use the MOLT Verify tool]({% link {{ page.version.version }}/molt-verify.md %}) +- [Use the MOLT Verify tool]({% link molt/molt-verify.md %}) - [Import Performance Best Practices]({% link {{ page.version.version }}/import-performance-best-practices.md %}) - [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) - [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) diff --git a/src/current/v23.2/migrate-from-postgres.md b/src/current/v23.2/migrate-from-postgres.md index 396ebdfdcb6..8d35c7c7dd4 100644 --- a/src/current/v23.2/migrate-from-postgres.md +++ b/src/current/v23.2/migrate-from-postgres.md @@ -247,9 +247,9 @@ By default, [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %} ### Step 3. Validate the migrated data -Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to check that the data on PostgreSQL and CockroachDB are consistent. +Use [MOLT Verify]({% link molt/molt-verify.md %}) to check that the data on PostgreSQL and CockroachDB are consistent. -1. [Install MOLT Verify.]({% link {{ page.version.version }}/molt-verify.md %}) +1. [Install MOLT Verify.]({% link molt/molt-verify.md %}) 1. In the directory where you installed MOLT Verify, use the following command to compare the two databases, specifying the PostgreSQL connection string with `--source` and the CockroachDB connection string with `--target`: @@ -288,7 +288,7 @@ To learn more, see the [Migration Overview]({% link {{ page.version.version }}/m - [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) - [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [Use the MOLT Verify tool]({% link {{ page.version.version }}/molt-verify.md %}) +- [Use the MOLT Verify tool]({% link molt/molt-verify.md %}) - [Import Performance Best Practices]({% link {{ page.version.version }}/import-performance-best-practices.md %}) - [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) - [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) diff --git a/src/current/v23.2/migration-overview.md b/src/current/v23.2/migration-overview.md index 3e1372af46a..cf12e36472b 100644 --- a/src/current/v23.2/migration-overview.md +++ b/src/current/v23.2/migration-overview.md @@ -218,7 +218,8 @@ In the following order: You can use the following MOLT (Migrate Off Legacy Technology) tools to simplify these steps: - [Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) +- [MOLT Fetch]({% link molt/molt-fetch.md %}) +- [MOLT Verify]({% link molt/molt-verify.md %}) #### Convert the schema @@ -245,7 +246,7 @@ Then import the converted schema to a CockroachDB cluster: Before moving data, Cockroach Labs recommends [dropping any indexes]({% link {{ page.version.version }}/drop-index.md %}) on the CockroachDB database. The indexes can be [recreated]({% link {{ page.version.version }}/create-index.md %}) after the data is loaded. Doing so will optimize performance. {{site.data.alerts.end}} -After [converting the schema](#convert-the-schema), load your data into CockroachDB so that you can [test your application queries](#validate-queries). Then use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. +After [converting the schema](#convert-the-schema), load your data into CockroachDB so that you can [test your application queries](#validate-queries). Then use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. Alternatively, you can use one of the following methods to migrate the data. Additional tooling may be required to extract or convert the data to a supported file format. @@ -263,7 +264,7 @@ Note that CockroachDB defaults to the [`SERIALIZABLE`]({% link {{ page.version.v You can "shadow" your production workload by executing your source SQL statements on CockroachDB in parallel. You can then [validate the queries](#test-query-results-and-performance) on CockroachDB for consistency, performance, and potential issues with the migration. -The [CockroachDB Live Migration Service (MOLT LMS)]({% link {{ page.version.version }}/live-migration-service.md %}) can [perform shadowing]({% link {{ page.version.version }}/live-migration-service.md %}#shadowing-modes). This is intended only for [testing](#test-query-results-and-performance) or [performing a dry run](#perform-a-dry-run). Shadowing should **not** be used in production when performing a [live migration](#zero-downtime). +The [CockroachDB Live Migration Service (MOLT LMS)]({% link molt/live-migration-service.md %}) can [perform shadowing]({% link molt/live-migration-service.md %}#shadowing-modes). This is intended only for [testing](#test-query-results-and-performance) or [performing a dry run](#perform-a-dry-run). Shadowing should **not** be used in production when performing a [live migration](#zero-downtime). ##### Test query results and performance @@ -271,7 +272,7 @@ You can manually validate your queries by testing a subset of "critical queries" - Check the application logs for error messages and the API response time. If application requests are slower than expected, use the **SQL Activity** page on the [CockroachDB {{ site.data.products.cloud }} Console](https://www.cockroachlabs.com/docs/cockroachcloud/statements-page) or [DB Console]({% link {{ page.version.version }}/ui-statements-page.md %}) to find the longest-running queries that are part of that application request. If necessary, tune the queries according to our best practices for [SQL performance]({% link {{ page.version.version }}/performance-best-practices-overview.md %}). -- Compare the results of the queries and check that they are identical in both the source database and CockroachDB. To do this, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}). +- Compare the results of the queries and check that they are identical in both the source database and CockroachDB. To do this, you can use [MOLT Verify]({% link molt/molt-verify.md %}). Test performance on a CockroachDB cluster that is appropriately [sized](#capacity-planning) for your workload: @@ -312,8 +313,8 @@ Using this method, consistency is achieved by only performing the cutover once a The following is a high-level overview of the migration steps. For considerations and details about the pros and cons of this approach, see [Migration Strategy: Lift and Shift]({% link {{ page.version.version }}/migration-strategy-lift-and-shift.md %}). 1. Stop application traffic to your source database. **This begins downtime.** -1. Use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. -1. After the data is migrated, use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. Use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. +1. After the data is migrated, use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. 1. Perform a [cutover](#cutover-strategy) by resuming application traffic, now to CockroachDB. {% comment %}1. If you want the ability to [roll back](#all-at-once-rollback) the migration, replicate data back to the source database.{% endcomment %} @@ -325,18 +326,18 @@ The following is a high-level overview of the migration steps. The two approache To prioritize consistency and minimize downtime: -1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link {{ page.version.version }}/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Do **not** shadow the application traffic. -1. Use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to [**replicate ongoing changes**]({% link {{ page.version.version }}/molt-fetch.md %}#replication) after it performs the initial load of data into CockroachDB. -1. As the data is migrating, use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. -1. After nearly all data from your source database has been moved to CockroachDB (for example, with a <1-second delay or <1000 rows), use MOLT LMS to begin a [*consistent cutover*]({% link {{ page.version.version }}/live-migration-service.md %}#consistent-cutover) and stop application traffic to your source database. **This begins downtime.** +1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link molt/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Do **not** shadow the application traffic. +1. Use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to [**replicate ongoing changes**]({% link molt/molt-fetch.md %}#replication) after it performs the initial load of data into CockroachDB. +1. As the data is migrating, use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. After nearly all data from your source database has been moved to CockroachDB (for example, with a <1-second delay or <1000 rows), use MOLT LMS to begin a [*consistent cutover*]({% link molt/live-migration-service.md %}#consistent-cutover) and stop application traffic to your source database. **This begins downtime.** 1. Wait for MOLT Fetch to finish replicating changes to CockroachDB. -1. Use MOLT LMS to commit the [consistent cutover]({% link {{ page.version.version }}/live-migration-service.md %}#consistent-cutover). This resumes application traffic, now to CockroachDB. +1. Use MOLT LMS to commit the [consistent cutover]({% link molt/live-migration-service.md %}#consistent-cutover). This resumes application traffic, now to CockroachDB. To achieve zero downtime with inconsistency: -1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link {{ page.version.version }}/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Use a [shadowing mode]({% link {{ page.version.version }}/live-migration-service.md %}#shadowing-modes) to run application queries simultaneously on your source database and CockroachDB. -1. Use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to **replicate ongoing changes** after performing the initial load of data into CockroachDB. -1. As the data is migrating, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link molt/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Use a [shadowing mode]({% link molt/live-migration-service.md %}#shadowing-modes) to run application queries simultaneously on your source database and CockroachDB. +1. Use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to **replicate ongoing changes** after performing the initial load of data into CockroachDB. +1. As the data is migrating, you can use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. 1. After nearly all data from your source database has been moved to CockroachDB (for example, with a <1 second delay or <1000 rows), perform an [*immediate cutover*](#cutover-strategy) by pointing application traffic to CockroachDB. 1. Manually reconcile any inconsistencies caused by writes that were not replicated during the cutover. 1. Close the connection to the source database when you are ready to finish the migration. diff --git a/src/current/v23.2/molt-fetch.md b/src/current/v23.2/molt-fetch.md deleted file mode 100644 index 1438b0b30c6..00000000000 --- a/src/current/v23.2/molt-fetch.md +++ /dev/null @@ -1,750 +0,0 @@ ---- -title: MOLT Fetch -summary: Learn how to use the MOLT Fetch tool to move data from a source database to CockroachDB. -toc: true -docs_area: migrate ---- - -MOLT Fetch moves data from a source database into CockroachDB as part of a [database migration]({% link {{ page.version.version }}/migration-overview.md %}). - -MOLT Fetch uses [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}) or [`COPY FROM`]({% link {{ page.version.version }}/copy-from.md %}) to move the source data to cloud storage (Google Cloud Storage or Amazon S3), a local file server, or local memory. Once the data is exported, MOLT Fetch loads the data onto a target CockroachDB database. For details, see [Usage](#usage). - -## Supported databases - -The following source databases are currently supported: - -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- CockroachDB - -## Installation - -To install MOLT Fetch, download the binary that matches your system. To download the latest binary: - -| Operating System | AMD 64-bit | ARM 64-bit | -|------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------| -| Windows | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.windows-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.windows-arm64.tgz) | -| Linux | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.linux-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.linux-arm64.tgz) | -| Mac | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.darwin-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.darwin-arm64.tgz) | - -For previous binaries, refer to the [MOLT version manifest](https://molt.cockroachdb.com/molt/cli/versions.html). - -{{site.data.alerts.callout_info}} -MOLT Fetch is supported on Red Hat Enterprise Linux (RHEL) 9 and above. -{{site.data.alerts.end}} - -## Setup - -Complete the following items before using MOLT Fetch: - -- Follow the recommendations in [Best practices](#best-practices) and [Security recommendations](#security-recommendations). - -- Ensure that the source and target schemas are identical, unless you enable automatic schema creation with the [`'drop-on-target-and-recreate'`](#target-table-handling) option. If you are creating the target schema manually, review the behaviors in [Mismatch handling](#mismatch-handling). - -- Ensure that the SQL user running MOLT Fetch has [`SELECT` privileges]({% link {{ page.version.version }}/grant.md %}#supported-privileges) on the source and target CockroachDB databases, along with the required privileges to run [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}#required-privileges) or [`COPY FROM`]({% link {{ page.version.version }}/copy-from.md %}#required-privileges) (depending on the [fetch mode](#fetch-mode)) on CockroachDB, as described on their respective pages. - -- If you plan to use continuous replication (using either [`--ongoing-replication`](#replication) or the [CDC cursor](#cdc-cursor)): - - - If you are migrating from PostgreSQL, enable logical replication. In `postgresql.conf` or in the SQL shell, set [`wal_level`](https://www.postgresql.org/docs/current/runtime-config-wal.html) to `logical`. - - - If you are migrating from MySQL, enable [GTID](https://dev.mysql.com/doc/refman/8.0/en/replication-options-gtids.html) consistency. In `mysql.cnf`, in the SQL shell, or as flags in the `mysql` start command, set `gtid-mode` and `enforce-gtid-consistency` to `ON` and set `binlog_row_metadata` to `full`. - -- Percent-encode the connection strings for the source database and [CockroachDB]({% link {{ page.version.version }}/connect-to-the-database.md %}). This ensures that the MOLT tools can parse special characters in your password. - - - Given a password `a$52&`, pass it to the `molt escape-password` command with single quotes: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt escape-password 'a$52&' - ~~~ - - ~~~ - Substitute the following encoded password in your original connection url string: - a%2452%26 - ~~~ - - - Use the encoded password in your connection string. For example: - - ~~~ - postgres://postgres:a%2452%26@localhost:5432/replicationload - ~~~ - - - If you are using Amazon S3 for [cloud storage](#cloud-storage): - - - Ensure that the environment variable and access tokens are set appropriately in the terminal running `molt fetch`. For example: - - {% include_cached copy-clipboard.html %} - ~~~ shell - export AWS_REGION='us-east-1' - export AWS_SECRET_ACCESS_KEY='key' - export AWS_ACCESS_KEY_ID='id' - ~~~ - - - Ensure the S3 bucket is created and accessible to CockroachDB. - - - If you are using Google Cloud Storage for [cloud storage](#cloud-storage): - - - Ensure that your local environment is authenticated using [Application Default Credentials](https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login): - - {% include_cached copy-clipboard.html %} - ~~~ shell - gcloud init - gcloud auth application-default login - ~~~ - - - Ensure the Google Cloud Storage bucket is created and accessible to CockroachDB. - -## Best practices - -- To prevent connections from terminating prematurely during data export, set the following to high values on the source database: - - - **Maximum allowed number of connections:** MOLT Fetch can export data across multiple connections. The number of connections it will create is the number of shards ([`--export-concurrency`](#global-flags)) multiplied by the number of tables ([`--table-concurrency`](#global-flags)) being exported concurrently. - - **Maximum lifetime of a connection:** This is particularly important for MySQL sources, which can only use a single connection to move data. See the following note. - -- If a MySQL database is set as a [source](#source-and-target-databases), the [`--table-concurrency`](#global-flags) and [`--export-concurrency`](#global-flags) flags **cannot** be set above `1`. If these values are changed, MOLT Fetch returns an error. This guarantees consistency when moving data from MySQL, due to MySQL limitations. MySQL data is migrated to CockroachDB one table and shard at a time, using [`WITH CONSISTENT SNAPSHOT`](https://dev.mysql.com/doc/refman/8.0/en/commit.html) transactions. - -- To prevent memory outages during data export of tables with large rows, estimate the amount of memory used to export a table: - - ~~~ - --row-batch-size * --export-concurrency * average size of the table rows - ~~~ - - If you are exporting more than one table at a time (i.e., [`--table-concurrency`](#global-flags) is set higher than `1`), add the estimated memory usage for the tables with the largest row sizes. Ensure that you have sufficient memory to run `molt fetch`, and adjust `--row-batch-size` accordingly. - -- If a table in the source database is much larger than the other tables, [filter and export the largest table](#schema-and-table-selection) in its own `molt fetch` task. Repeat this for each of the largest tables. Then export the remaining tables in another task. - -- When using [`IMPORT INTO` mode](#fetch-mode) to load tables into CockroachDB, if the fetch process terminates before the import job completes, the hanging import job on the target database will keep the table offline. To make this table accessible again, [manually resume or cancel the job]({% link {{ page.version.version }}/import-into.md %}#view-and-control-import-jobs). Then resume `molt fetch` using [continuation](#fetch-continuation), or restart the process from the beginning. - -## Security recommendations - -Cockroach Labs **strongly** recommends the following: - -### Secure connections - -- Use secure connections to the source and [target CockroachDB database]({% link {{ page.version.version }}/connection-parameters.md %}#additional-connection-parameters) whenever possible. -- By default, insecure connections (i.e., `sslmode=disable` on PostgreSQL; `sslmode` not set on MySQL) are disallowed. When using an insecure connection, `molt fetch` returns an error. To override this check, you can enable the `--allow-tls-mode-disable` flag. Do this **only** for testing, or if a secure SSL/TLS connection to the source or target database is not possible. - -### Connection strings - -- Avoid plaintext connection strings. -- Provide your connection strings as environment variables. -- If possible within your security infrastructure, use an external secrets manager to load the environment variables from stored secrets. - - For example, to export connection strings as environment variables: - - ~~~ shell - export SOURCE="postgres://postgres:postgres@localhost:5432/molt?sslmode=verify-full" - export TARGET="postgres://root@localhost:26257/molt?sslmode=verify-full" - ~~~ - - Afterward, to pass the environment variables in `molt fetch` commands: - - ~~~ shell - molt fetch \ - --source "$SOURCE" \ - --target "$TARGET" \ - --table-filter 'employees' \ - --bucket-path 's3://molt-test' \ - --table-handling truncate-if-exists - ~~~ - -### Secure cloud storage - -- When using [cloud storage](#cloud-storage) for your intermediate store, ensure that access control is properly configured. Refer to the [GCP](https://cloud.google.com/storage/docs/access-control) or [AWS](https://docs.aws.amazon.com/AmazonS3/latest/userguide/security-iam.html) documentation. -- Do not use public cloud storage in production. - -### Perform a dry run - -To verify that your connections and configuration work properly, run MOLT Fetch in a staging environment before moving any data in production. Use a test or development environment that is as similar as possible to production. - -## Commands - -| Command | Usage | -|---------|---------------------------------------------------------------------------------------------------| -| `fetch` | Start the fetch process. This loads data from a source database to a target CockroachDB database. | - -### Subcommands - -| Command | Usage | -|--------------|----------------------------------------------------------------------| -| `tokens list` | List active [continuation tokens](#list-active-continuation-tokens). | - -## Flags - -### Global flags - -| Flag | Description | -|-----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `--source` | (Required) Connection string for the source database. For details, see [Source and target databases](#source-and-target-databases). | -| `--target` | (Required) Connection string for the target database. For details, see [Source and target databases](#source-and-target-databases). | -| `--allow-tls-mode-disable` | Allow insecure connections to databases. Secure SSL/TLS connections should be used by default. This should be enabled **only** if secure SSL/TLS connections to the source or target database are not possible. | -| `--bucket-path` | The path within the [cloud storage](#cloud-storage) bucket where intermediate files are written (e.g., `'s3://bucket/path'` or `'gs://bucket/path'`). | -| `--cleanup` | Whether to delete intermediate files after moving data using [cloud or local storage](#data-path). **Note:** Cleanup does not occur on [continuation](#fetch-continuation). | -| `--compression` | Compression method for data when using [`IMPORT INTO` mode](#fetch-mode) (`gzip`/`none`).

**Default:** `gzip` | -| `--continuation-file-name` | Restart fetch at the specified filename if the process encounters an error. `--fetch-id` must be specified. For details, see [Fetch continuation](#fetch-continuation). | -| `--continuation-token` | Restart fetch at a specific table, using the specified continuation token, if the process encounters an error. `--fetch-id` must be specified. For details, see [Fetch continuation](#fetch-continuation). | -| `--crdb-pts-duration` | The duration for which each timestamp used in data export from a CockroachDB source is protected from garbage collection. This ensures that the data snapshot remains consistent. For example, if set to `24h`, each timestamp is protected for 24 hours from the initiation of the export job. This duration is extended at regular intervals specified in `--crdb-pts-refresh-interval`.

**Default:** `24h0m0s` | -| `--crdb-pts-refresh-interval` | The frequency at which the protected timestamp's validity is extended. This interval maintains protection of the data snapshot until data export from a CockroachDB source is completed. For example, if set to `10m`, the protected timestamp's expiration will be extended by the duration specified in `--crdb-pts-duration` (e.g., `24h`) every 10 minutes while export is not complete.

**Default:** `10m0s` | -| `--direct-copy` | Enables [direct copy mode](#fetch-mode), which copies data directly from source to target without using an intermediate store. | -| `--export-concurrency` | Number of shards to export at a time, each on a dedicated thread. This only applies when exporting data from the source database, not when loading data into the target database. The number of concurrent threads is the product of `--export-concurrency` and `--table-concurrency`.

This value **cannot** be set higher than `1` when moving data from MySQL. Refer to [Best practices](#best-practices).

**Default:** `4` with a PostgreSQL source; `1` with a MySQL source | -| `--fetch-id` | Restart fetch process corresponding to the specified ID. If `--continuation-file-name` or `--continuation-token` are not specified, fetch restarts for all failed tables. | -| `--flush-rows` | Number of rows before the source data is flushed to intermediate files. **Note:** If `--flush-size` is also specified, the fetch behavior is based on the flag whose criterion is met first. | -| `--flush-size` | Size (in bytes) before the source data is flushed to intermediate files. **Note:** If `--flush-rows` is also specified, the fetch behavior is based on the flag whose criterion is met first. | -| `--import-batch-size` | The number of files to be imported at a time to the target database. This applies only when using the [`IMPORT INTO` mode](#fetch-mode) for loading data into the target. **Note:** Increasing this value can improve the performance of full-scan queries on the target database shortly after fetch completes, but very high values are not recommended. If any individual file in the import batch fails, you must [retry](#fetch-continuation) the entire batch.

**Default:** `1000` | -| `--local-path` | The path within the [local file server](#local-file-server) where intermediate files are written (e.g., `data/migration/cockroach`). `--local-path-listen-addr` must be specified. | -| `--local-path-crdb-access-addr` | Address of a [local file server](#local-file-server) that is reachable by CockroachDB. This flag is only necessary if CockroachDB cannot reach the local address specified with `--local-path-listen-addr` (e.g., when moving data to a CockroachDB {{ site.data.products.cloud }} deployment). `--local-path` and `--local-path-listen-addr` must be specified.

**Default:** Value of `--local-path-listen-addr`. | -| `--local-path-listen-addr` | Write intermediate files to a [local file server](#local-file-server) at the specified address (e.g., `'localhost:3000'`). `--local-path` must be specified. | -| `--log-file` | Write messages to the specified log filename. If not specified, messages are only written to `stdout`. | -| `--logging` | Level at which to log messages (`'trace'`/`'debug'`/`'info'`/`'warn'`/`'error'`/`'fatal'`/`'panic'`).

**Default:** `'info'` | -| `--metrics-listen-addr` | Address of the metrics endpoint.

**Default:** `'127.0.0.1:3030'` | -| `--non-interactive` | Run the fetch process without interactive prompts. This is recommended **only** when running `molt fetch` in an automated process (i.e., a job or continuous integration). | -| `--ongoing-replication` | Enable continuous [replication](#replication) to begin after the fetch process succeeds (i.e., initial source data is loaded into CockroachDB). | -| `--pglogical-replication-slot-drop-if-exists` | Drop the replication slot, if specified with `--pglogical-replication-slot-name`. Otherwise, the default replication slot is not dropped. | -| `--pglogical-replication-slot-name` | The name of a replication slot to create before taking a snapshot of data (e.g., `'fetch'`). **Required** in order to perform continuous [replication](#replication) from a source PostgreSQL database. | -| `--pglogical-replication-slot-plugin` | The output plugin used for logical replication under `--pglogical-replication-slot-name`.

**Default:** `pgoutput` | -| `--pprof-listen-addr` | Address of the pprof endpoint.

**Default:** `'127.0.0.1:3031'` | -| `--replicator-flags` | If continuous [replication](#replication) is enabled with `--ongoing-replication`, specify replication flags ([PostgreSQL](https://github.com/cockroachdb/replicator/wiki/PGLogical#postgresql-logical-replication) or [MySQL](https://github.com/cockroachdb/replicator/wiki/MYLogical#mysqlmariadb-replication)) to override. | -| `--row-batch-size` | Number of rows per shard to export at a time. See [Best practices](#best-practices).

**Default:** `100000` | -| `--schema-filter` | Move schemas that match a specified [regular expression](https://wikipedia.org/wiki/Regular_expression).

**Default:** `'.*'` | -| `--table-concurrency` | Number of tables to export at a time. The number of concurrent threads is the product of `--export-concurrency` and `--table-concurrency`.

This value **cannot** be set higher than `1` when moving data from MySQL. Refer to [Best practices](#best-practices).

**Default:** `4` with a PostgreSQL source; `1` with a MySQL source | -| `--table-filter` | Move tables that match a specified [POSIX regular expression](https://wikipedia.org/wiki/Regular_expression).

**Default:** `'.*'` | -| `--table-handling` | How tables are initialized on the target database (`'none'`/`'drop-on-target-and-recreate'`/`'truncate-if-exists'`). For details, see [Target table handling](#target-table-handling).

**Default:** `'none'` | -| `--type-map-file` | Path to a JSON file that contains explicit type mappings for automatic schema creation, when enabled with `--table-handling 'drop-on-target-and-recreate'`. For details on the JSON format and valid type mappings, see [type mapping](#type-mapping). | -| `--use-console-writer` | Use the console writer, which has cleaner log output but introduces more latency.

**Default:** `false` (log as structured JSON) | -| `--use-copy` | Use [`COPY FROM` mode](#fetch-mode) to move data. This makes tables queryable during data load, but is slower than `IMPORT INTO` mode. For details, see [Fetch mode](#fetch-mode). | - -### `tokens list` flags - -| Flag | Description | -|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------| -| `--conn-string` | (Required) Connection string for the target database. For details, see [List active continuation tokens](#list-active-continuation-tokens). | -| `-n`, `--num-results` | Number of results to return. | - -## Usage - -The following sections describe how to use the `molt fetch` [flags](#flags). - -### Source and target databases - -{{site.data.alerts.callout_success}} -Follow the recommendations in [Connection strings](#connection-strings). -{{site.data.alerts.end}} - -`--source` specifies the connection string of the source database. - -PostgreSQL or CockroachDB: - -{% include_cached copy-clipboard.html %} -~~~ ---source 'postgresql://{username}:{password}@{host}:{port}/{database}' -~~~ - -MySQL: - -{% include_cached copy-clipboard.html %} -~~~ ---source 'mysql://{username}:{password}@{protocol}({host}:{port})/{database}' -~~~ - -`--target` specifies the [CockroachDB connection string]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url): - -{% include_cached copy-clipboard.html %} -~~~ ---target 'postgresql://{username}:{password}@{host}:{port}/{database} -~~~ - -### Fetch mode - -MOLT Fetch can use either [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}) or [`COPY FROM`]({% link {{ page.version.version }}/copy-from.md %}) to load data into CockroachDB. - -By default, MOLT Fetch uses `IMPORT INTO`: - -- `IMPORT INTO` mode achieves the highest throughput, but [requires taking the tables **offline**]({% link {{ page.version.version }}/import-into.md %}#considerations) to achieve its import speed. Tables are taken back online once an [import job]({% link {{ page.version.version }}/import-into.md %}#view-and-control-import-jobs) completes successfully. See [Best practices](#best-practices). -- `IMPORT INTO` mode supports compression using the `--compression` flag, which reduces the amount of storage used. - -`--use-copy` configures MOLT Fetch to use `COPY FROM`: - -- `COPY FROM` mode enables your tables to remain online and accessible. However, it is slower than using [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %}). -- `COPY FROM` mode does not support compression. - -{{site.data.alerts.callout_info}} -`COPY FROM` is also used in [direct copy mode](#direct-copy). -{{site.data.alerts.end}} - -### Data path - -MOLT Fetch can move the source data to CockroachDB via [cloud storage](#cloud-storage), a [local file server](#local-file-server), or [directly](#direct-copy) without an intermediate store. - -#### Cloud storage - -{{site.data.alerts.callout_success}} -Follow the recommendations in [Secure cloud storage](#secure-cloud-storage). -{{site.data.alerts.end}} - -`--bucket-path` specifies that MOLT Fetch should write intermediate files to a path within a [Google Cloud Storage](https://cloud.google.com/storage/docs/buckets) or [Amazon S3](https://aws.amazon.com/s3/) bucket to which you have the necessary permissions. For example: - -Google Cloud Storage: - -{% include_cached copy-clipboard.html %} -~~~ ---bucket-path 'gs://migration/data/cockroach' -~~~ - -Amazon S3: - -{% include_cached copy-clipboard.html %} -~~~ ---bucket-path 's3://migration/data/cockroach' -~~~ - -Cloud storage can be used with either the [`IMPORT INTO` or `COPY FROM` modes](#fetch-mode). - -#### Local file server - -`--local-path` specifies that MOLT Fetch should write intermediate files to a path within a [local file server]({% link {{ page.version.version }}/use-a-local-file-server.md %}). `local-path-listen-addr` specifies the address of the local file server. For example: - -{% include_cached copy-clipboard.html %} -~~~ ---local-path /migration/data/cockroach ---local-path-listen-addr 'localhost:3000' -~~~ - -In some cases, CockroachDB will not be able to use the local address specified by `--local-path-listen-addr`. This will depend on where CockroachDB is deployed, the runtime OS, and the source dialect. - -For example, if you are migrating to CockroachDB {{ site.data.products.cloud }}, such that the {{ site.data.products.cloud }} cluster is in a different physical location than the machine running `molt fetch`, then CockroachDB cannot reach an address such as `localhost:3000`. In these situations, use `--local-path-crdb-access-addr` to specify an address for the local file server that is reachable by CockroachDB. For example: - -{% include_cached copy-clipboard.html %} -~~~ ---local-path /migration/data/cockroach ---local-path-listen-addr 'localhost:3000' ---local-path-crdb-access-addr '44.55.66.77:3000' -~~~ - -A local file server can be used with either the [`IMPORT INTO` or `COPY FROM` modes](#fetch-mode). - -{{site.data.alerts.callout_success}} -[Cloud storage](#cloud-storage) is often preferable to a local file server, which can require considerable disk space. -{{site.data.alerts.end}} - -#### Direct copy - -`--direct-copy` specifies that MOLT Fetch should use `COPY FROM` to move the source data directly to CockroachDB without an intermediate store: - -- Because the data is held in memory, the machine must have sufficient RAM for the data currently in flight: - - ~~~ - average size of each row * --row-batch-size * --export-concurrency * --table-concurrency - ~~~ - -- Direct copy mode does not support compression or [continuation](#fetch-continuation). -- The [`--use-copy`](#fetch-mode) flag is redundant with `--direct-copy`. - -### Schema and table selection - -By default, MOLT Fetch moves all data from the [`--source`](#source-and-target-databases) database to CockroachDB. Use the following flags to move a subset of data. - -`--schema-filter` specifies a range of schema objects to move to CockroachDB, formatted as a POSIX regex string. For example, to move every table in the source database's `public` schema: - -{% include_cached copy-clipboard.html %} -~~~ ---schema-filter 'public' -~~~ - -`--table-filter` specifies a range of tables to move to CockroachDB, formatted as a POSIX regex string. For example, to move every table in the source database that has "user" in the title: - -{% include_cached copy-clipboard.html %} -~~~ ---table-filter '.*user.*' -~~~ - -### Target table handling - -`--table-handling` defines how MOLT Fetch loads data on the CockroachDB tables that [match the selection](#schema-and-table-selection). - -To load the data without changing the existing data in the tables, use `'none'`: - -{% include_cached copy-clipboard.html %} -~~~ ---table-handling 'none' -~~~ - -To [truncate]({% link {{ page.version.version }}/truncate.md %}) tables before loading the data, use `'truncate-if-exists'`: - -{% include_cached copy-clipboard.html %} -~~~ ---table-handling 'truncate-if-exists' -~~~ - -To drop existing tables and create new tables before loading the data, use `'drop-on-target-and-recreate'`: - -{% include_cached copy-clipboard.html %} -~~~ ---table-handling 'drop-on-target-and-recreate' -~~~ - -When using the `'drop-on-target-and-recreate'` option, MOLT Fetch creates a new CockroachDB table to load the source data if one does not already exist. To guide the automatic schema creation, you can [explicitly map source types to CockroachDB types](#type-mapping). - -#### Mismatch handling - -If either [`'none'`](#target-table-handling) or [`'truncate-if-exists'`](#target-table-handling) is set, `molt fetch` loads data into the existing tables on the target CockroachDB database. If the target schema mismatches the source schema, `molt fetch` will exit early in [certain cases](#exit-early), and will need to be re-run from the beginning. - -{{site.data.alerts.callout_info}} -This does not apply when [`'drop-on-target-and-recreate'`](#target-table-handling) is specified, since this mode automatically creates a compatible CockroachDB schema. -{{site.data.alerts.end}} - -`molt fetch` exits early in the following cases, and will output a log with a corresponding `mismatch_tag` and `failable_mismatch` set to `true`: - -- A source table is missing a primary key. -- A source and table primary key have mismatching types. -- A [`STRING`]({% link {{ page.version.version }}/string.md %}) primary key has a different [collation]({% link {{ page.version.version }}/collate.md %}) on the source and target. -- A source and target column have mismatching types that are not [allowable mappings](#type-mapping). -- A target table is missing a column that is in the corresponding source table. -- A source column is nullable, but the corresponding target column is not nullable (i.e., the constraint is more strict on the target). - -`molt fetch` can continue in the following cases, and will output a log with a corresponding `mismatch_tag` and `failable_mismatch` set to `false`: - -- A target table has a column that is not in the corresponding source table. -- A source column has a `NOT NULL` constraint, and the corresponding target column is nullable (i.e., the constraint is less strict on the target). -- A [`DEFAULT`]({% link {{ page.version.version }}/default-value.md %}), [`CHECK`]({% link {{ page.version.version }}/check.md %}), [`FOREIGN KEY`]({% link {{ page.version.version }}/foreign-key.md %}), or [`UNIQUE`]({% link {{ page.version.version }}/unique.md %}) constraint is specified on a target column and not on the source column. - -#### Type mapping - -If [`'drop-on-target-and-recreate'`](#target-table-handling) is set, MOLT Fetch automatically creates a CockroachDB schema that is compatible with the source data. The column types are determined as follows: - -- PostgreSQL types are mapped to existing CockroachDB [types]({% link {{ page.version.version }}/data-types.md %}) that have the same [`OID`]({% link {{ page.version.version }}/oid.md %}). -- The following MySQL types are mapped to corresponding CockroachDB types: - - | MySQL type | CockroachDB type | - |-----------------------------------------------------|----------------------------------------------------------------------------------------------------------------| - | `CHAR`, `CHARACTER`, `VARCHAR`, `NCHAR`, `NVARCHAR` | [`VARCHAR`]({% link {{ page.version.version }}/string.md %}) | - | `TINYTEXT`, `TEXT`, `MEDIUMTEXT`, `LONGTEXT` | [`STRING`]({% link {{ page.version.version }}/string.md %}) | - | `GEOMETRY` | [`GEOMETRY`]({% link {{ page.version.version }}/architecture/glossary.md %}#geometry) | - | `LINESTRING` | [`LINESTRING`]({% link {{ page.version.version }}/linestring.md %}) | - | `POINT` | [`POINT`]({% link {{ page.version.version }}/point.md %}) | - | `POLYGON` | [`POLYGON`]({% link {{ page.version.version }}/polygon.md %}) | - | `MULTIPOINT` | [`MULTIPOINT`]({% link {{ page.version.version }}/multipoint.md %}) | - | `MULTILINESTRING` | [`MULTILINESTRING`]({% link {{ page.version.version }}/multilinestring.md %}) | - | `MULTIPOLYGON` | [`MULTIPOLYGON`]({% link {{ page.version.version }}/multipolygon.md %}) | - | `GEOMETRYCOLLECTION`, `GEOMCOLLECTION` | [`GEOMETRYCOLLECTION`]({% link {{ page.version.version }}/geometrycollection.md %}) | - | `JSON` | [`JSONB`]({% link {{ page.version.version }}/jsonb.md %}) | - | `TINYINT`, `INT1` | [`INT2`]({% link {{ page.version.version }}/int.md %}) | - | `BLOB` | [`BYTES`]({% link {{ page.version.version }}/bytes.md %}) | - | `SMALLINT`, `INT2` | [`INT2`]({% link {{ page.version.version }}/int.md %}) | - | `MEDIUMINT`, `INT`, `INTEGER`, `INT4` | [`INT4`]({% link {{ page.version.version }}/int.md %}) | - | `BIGINT`, `INT8` | [`INT`]({% link {{ page.version.version }}/int.md %}) | - | `FLOAT` | [`FLOAT4`]({% link {{ page.version.version }}/float.md %}) | - | `DOUBLE` | [`FLOAT`]({% link {{ page.version.version }}/float.md %}) | - | `DECIMAL`, `NUMERIC`, `REAL` | [`DECIMAL`]({% link {{ page.version.version }}/decimal.md %}) (Negative scale values are autocorrected to `0`) | - | `BINARY`, `VARBINARY` | [`BYTES`]({% link {{ page.version.version }}/bytes.md %}) | - | `DATETIME` | [`TIMESTAMP`]({% link {{ page.version.version }}/timestamp.md %}) | - | `TIMESTAMP` | [`TIMESTAMPTZ`]({% link {{ page.version.version }}/timestamp.md %}) | - | `TIME` | [`TIME`]({% link {{ page.version.version }}/time.md %}) | - | `BIT` | [`VARBIT`]({% link {{ page.version.version }}/bit.md %}) | - | `DATE` | [`DATE`]({% link {{ page.version.version }}/date.md %}) | - | `TINYBLOB`, `MEDIUMBLOB`, `LONGBLOB` | [`BYTES`]({% link {{ page.version.version }}/bytes.md %}) | - | `BOOL`, `BOOLEAN` | [`BOOL`]({% link {{ page.version.version }}/bool.md %}) | - | `ENUM` | [`ANY_ENUM`]({% link {{ page.version.version }}/enum.md %}) | - -- To override the default mappings for automatic schema creation, you can map source to target CockroachDB types explicitly. These are specified using a JSON file and `--type-map-file`. The allowable custom mappings are valid CockroachDB aliases, casts, and the following mappings specific to MOLT Fetch and [Verify]({% link {{ page.version.version }}/molt-verify.md %}): - - - [`TIMESTAMP`]({% link {{ page.version.version }}/timestamp.md %}) <> [`TIMESTAMPTZ`]({% link {{ page.version.version }}/timestamp.md %}) - - [`VARCHAR`]({% link {{ page.version.version }}/string.md %}) <> [`UUID`]({% link {{ page.version.version }}/uuid.md %}) - - [`BOOL`]({% link {{ page.version.version }}/bool.md %}) <> [`INT2`]({% link {{ page.version.version }}/int.md %}) - - [`VARBIT`]({% link {{ page.version.version }}/bit.md %}) <> [`TEXT`]({% link {{ page.version.version }}/string.md %}) - - [`JSONB`]({% link {{ page.version.version }}/jsonb.md %}) <> [`TEXT`]({% link {{ page.version.version }}/string.md %}) - - [`INET`]({% link {{ page.version.version }}/inet.md %}) <> [`TEXT`]({% link {{ page.version.version }}/string.md %}) - -`--type-map-file` specifies the path to the JSON file containing the explicit type mappings. For example: - -{% include_cached copy-clipboard.html %} -~~~ ---type-map-file 'type-mappings.json' -~~~ - -The JSON is formatted as follows: - -~~~ json -[ - { - "table": "public.t1", - "column-type-map": [ - { - "column": "*", - "type-kv": { - "source-type": "int", - "crdb-type": "INT2" - } - }, - { - "column": "name", - "type-kv": { - "source-type": "varbit", - "crdb-type": "string" - } - } - ] - } -] -~~~ - -- `table` specifies the table that will use the custom type mappings in `column-type-map`, written as `{schema}.{table}`. -- `column` specifies the column that will use the custom type mapping in `type-kv`. If `*` is specified, then all columns in the `table` with the matching `source-type` are converted. -- `type-kv` specifies the `source-type` that maps to the target `crdb-type`. - -### Fetch continuation - -If MOLT Fetch fails while loading data into CockroachDB from intermediate files, it exits with an error message, fetch ID, and [continuation token](#list-active-continuation-tokens) for each table that failed to load on the target database. You can use this information to continue the process from the *continuation point* where it was interrupted. For an example, see [Continue fetch after encountering an error](#continue-fetch-after-encountering-an-error). - -Continuation is only possible under the following conditions: - -- All data has been exported from the source database into intermediate files on [cloud](#cloud-storage) or [local storage](#local-file-server). -- The *initial load* of source data to the target CockroachDB database is incomplete. This means that ongoing [replication](#replication) of source data has not begun. - -{{site.data.alerts.callout_info}} -Only one fetch ID and set of continuation tokens, each token corresponding to a table, are active at any time. See [List active continuation tokens](#list-active-continuation-tokens). -{{site.data.alerts.end}} - -To retry all data starting from the continuation point, reissue the `molt fetch` command and include the `--fetch-id`. - -{% include_cached copy-clipboard.html %} -~~~ ---fetch-id d44762e5-6f70-43f8-8e15-58b4de10a007 -~~~ - -To retry a specific table that failed, include both `--fetch-id` and `--continuation-token`. The latter flag specifies a token string that corresponds to a specific table on the source database. A continuation token is written in the `molt fetch` output for each failed table. If the fetch process encounters a subsequent error, it generates a new token for each failed table. See [List active continuation tokens](#list-active-continuation-tokens). - -{{site.data.alerts.callout_info}} -This will retry only the table that corresponds to the continuation token. If the fetch process succeeds, there may still be source data that is not yet loaded into CockroachDB. -{{site.data.alerts.end}} - -{% include_cached copy-clipboard.html %} -~~~ ---fetch-id d44762e5-6f70-43f8-8e15-58b4de10a007 ---continuation-token 011762e5-6f70-43f8-8e15-58b4de10a007 -~~~ - -To retry all data starting from a specific file, include both `--fetch-id` and `--continuation-file-name`. The latter flag specifies the filename of an intermediate file in [cloud or local storage](#data-path). All filenames are prepended with `part_` and have the `.csv.gz` or `.csv` extension, depending on compression type (gzip by default). For example: - -{% include_cached copy-clipboard.html %} -~~~ ---fetch-id d44762e5-6f70-43f8-8e15-58b4de10a007 ---continuation-file-name part_00000003.csv.gz -~~~ - -{{site.data.alerts.callout_info}} -Continuation is not possible when using [direct copy mode](#direct-copy). -{{site.data.alerts.end}} - -#### List active continuation tokens - -To view all active continuation tokens, issue a `molt fetch tokens list` command along with `--conn-string`, which specifies the [connection string]({% link {{ page.version.version }}/connection-parameters.md %}#connect-using-a-url) for the target CockroachDB database. For example: - -{% include_cached copy-clipboard.html %} -~~~ shell -molt fetch tokens list \ ---conn-string 'postgres://root@localhost:26257/defaultdb?sslmode=verify-full' -~~~ - -~~~ -+--------------------------------------+--------------------------------------+------------------+----------------------+ -| ID | FETCH ID | TABLE NAME | FILE NAME | -+--------------------------------------+--------------------------------------+------------------+----------------------+ -| f6f0284c-d9c1-43c9-8fde-af609d0dbd82 | 66443597-5689-4df3-a7b9-9fc5e27180eb | public.employees | part_00000001.csv.gz | -+--------------------------------------+--------------------------------------+------------------+----------------------+ -Continuation Tokens. -~~~ - -### Replication - -`--ongoing-replication` enables logical replication from the source database to the target CockroachDB database. - -{% include_cached copy-clipboard.html %} -~~~ ---ongoing-replication -~~~ - -When the `--ongoing-replication` flag is set, changes on the source database are continuously replicated on CockroachDB. This begins only after the fetch process succeeds—i.e., the initial source data is loaded into CockroachDB—as indicated by a `fetch complete` message in the output. - -Before using this feature, configure the source PostgreSQL or MySQL database for continuous replication, as described in [Setup](#setup). - -If the source is a PostgreSQL database, you must also specify a replication slot name: - -{% include_cached copy-clipboard.html %} -~~~ ---ongoing-replication ---pglogical-replication-slot-name 'replication_slot' -~~~ - -To customize the replication behavior (an advanced use case), use `--replicator-flags` to specify one or more replication-specific flags ([PostgreSQL](https://github.com/cockroachdb/replicator/wiki/PGLogical#postgresql-logical-replication) or [MySQL](https://github.com/cockroachdb/replicator/wiki/MYLogical#mysqlmariadb-replication)) to override. - -{% include_cached copy-clipboard.html %} -~~~ ---ongoing-replication ---replicator-flags "--applyTimeout '1h' --parallelism 64" -~~~ - -To cancel replication, enter `ctrl-c` to issue a `SIGTERM` signal. This returns an exit code `0`. If replication fails, a non-zero exit code is returned. - -### CDC cursor - -A change data capture (CDC) cursor is written to the output as `cdc_cursor` at the beginning and end of the fetch process. For example: - -~~~ json -{"level":"info","type":"summary","fetch_id":"735a4fe0-c478-4de7-a342-cfa9738783dc","num_tables":1,"tables":["public.employees"],"cdc_cursor":"0/3F41E40","net_duration_ms":4879.890041,"net_duration":"000h 00m 04s","time":"2024-03-18T12:37:02-04:00","message":"fetch complete"} -~~~ - -You can use the `cdc_cursor` value with an external change data capture (CDC) tool to continuously replicate subsequent changes on the source data to CockroachDB. - -## Examples - -The following examples demonstrate how to issue `molt fetch` commands to load data into CockroachDB. These examples assume that [secure connections](#secure-connections) to the source and target database are used. - -{{site.data.alerts.callout_success}} -After successfully running MOLT Fetch, you can run [`molt verify`]({% link {{ page.version.version }}/molt-verify.md %}) to confirm that replication worked successfully without missing or mismatched rows. -{{site.data.alerts.end}} - -### Load PostgreSQL data via S3 with ongoing replication - -The following `molt fetch` command uses `IMPORT INTO` to load a subset of tables from a PostgreSQL database to CockroachDB. - -{% include_cached copy-clipboard.html %} -~~~ shell -molt fetch \ ---source 'postgres://postgres:postgres@localhost/molt' \ ---target 'postgres://root@localhost:26257/defaultdb?sslmode=verify-full' \ ---table-handling 'truncate-if-exists' \ ---table-filter 'employees' \ ---bucket-path 's3://migration/data/cockroach' \ ---cleanup \ ---pglogical-replication-slot-name 'replication_slot' \ ---ongoing-replication -~~~ - -- `--table-handling` specifies that existing tables on CockroachDB should be truncated before the source data is loaded. -- `--table-filter` filters for tables with the `employees` string in the name. -- `--bucket-path` specifies a directory on an [Amazon S3 bucket](#data-path) where intermediate files will be written. -- `--cleanup` specifies that the intermediate files should be removed after the source data is loaded. -- `--pglogical-replication-slot-name` specifies a replication slot name to be created on the source PostgreSQL database. This is used in continuous [replication](#replication). -- `--ongoing-replication` starts continuous [replication](#replication) of data from the source database to CockroachDB after the fetch process succeeds. - -If the fetch process succeeds, the output displays a `fetch complete` message like the following: - -~~~ json -{"level":"info","type":"summary","fetch_id":"f5cb422f-4bb4-4bbd-b2ae-08c4d00d1e7c","num_tables":1,"tables":["public.employees"],"cdc_cursor":"0/3F41E40","net_duration_ms":6752.847625,"net_duration":"000h 00m 06s","time":"2024-03-18T12:30:37-04:00","message":"fetch complete"} -~~~ - -{{site.data.alerts.callout_info}} -If the fetch process encounters an error, it will exit and can be [continued](#continue-fetch-after-encountering-an-error). -{{site.data.alerts.end}} - -Continuous [replication](#replication) begins immediately afterward: - -~~~ json -{"level":"info","time":"2024-05-13T14:33:07-04:00","message":"starting replicator"} -{"level":"info","time":"2024-05-13T14:36:22-04:00","message":"creating publication"} -~~~ - -To cancel replication, enter `ctrl-c` to issue a `SIGTERM` signal. - -### Load MySQL data via GCP with ongoing replication - -The following `molt fetch` command uses `COPY FROM` to load a subset of tables from a MySQL database to CockroachDB. - -{% include_cached copy-clipboard.html %} -~~~ shell -molt fetch \ ---source 'mysql://root:password@localhost/molt?sslcert=.%2fsource_certs%2fclient.root.crt&sslkey=.%2fsource_certs%2fclient.root.key&sslmode=verify-full&sslrootcert=.%2fsource_certs%2fca.crt' \ ---target 'postgres://root@localhost:26257/defaultdb?sslmode=verify-full' \ ---table-handling 'truncate-if-exists' \ ---table-filter 'employees' \ ---bucket-path 'gs://migration/data/cockroach' \ ---use-copy \ ---cleanup -~~~ - -- `--source` specifies the MySQL connection string and the certificates in URL-encoded format. Secure connections should be used by default. Refer to [Best practices](#best-practices). -- `--table-handling` specifies that existing tables on CockroachDB should be truncated before the source data is loaded. -- `--table-filter` filters for tables with the `employees` string in the name. -- `--bucket-path` specifies a directory on an [Google Cloud Storage bucket](#data-path) where intermediate files will be written. -- `--use-copy` specifies that `COPY FROM` is used to load the tables, keeping the source tables online and queryable but loading the data more slowly than `IMPORT INTO`. -- `--cleanup` specifies that the intermediate files should be removed after the source data is loaded. -- `--ongoing-replication` starts continuous [replication](#replication) of data from the source database to CockroachDB after the fetch process succeeds. - -If the fetch process succeeds, the output displays a `fetch complete` message like the following: - -~~~ json -{"level":"info","type":"summary","fetch_id":"f5cb422f-4bb4-4bbd-b2ae-08c4d00d1e7c","num_tables":1,"tables":["public.employees"],"cdc_cursor":"0/3F41E40","net_duration_ms":6752.847625,"net_duration":"000h 00m 06s","time":"2024-03-18T12:30:37-04:00","message":"fetch complete"} -~~~ - -{{site.data.alerts.callout_info}} -If the fetch process encounters an error, it will exit and can be [continued](#continue-fetch-after-encountering-an-error). -{{site.data.alerts.end}} - -Continuous [replication](#replication) begins immediately afterward: - -~~~ json -{"level":"info","time":"2024-05-13T14:33:07-04:00","message":"starting replicator"} -~~~ - -To cancel replication, enter `ctrl-c` to issue a `SIGTERM` signal. - -### Load CockroachDB data via direct copy - -The following `molt fetch` command uses `COPY FROM` to load all tables directly from one CockroachDB database to another. - -{% include_cached copy-clipboard.html %} -~~~ shell -molt fetch \ ---source 'postgres://root@localhost:26257/defaultdb?sslmode=disable' \ ---target 'postgres://root@localhost:26258/defaultdb?sslmode=disable' \ ---table-handling 'none' \ ---direct-copy \ ---allow-tls-mode-disable -~~~ - -- `--source` specifies `sslmode=disable` to establish an insecure connection. By default, insecure connections are disallowed and should be used **only** for testing or if a secure SSL/TLS connection to the source or target database is not possible. -- `--table-handling` specifies that existing tables on the target CockroachDB database should not be modified before the source data is loaded. -- `--direct-copy` specifies that `COPY FROM` is used to load the tables directly, without creating intermediate files. -- `--allow-tls-mode-disable` enables insecure connections to the source and target databases. Refer to [Secure connections](#secure-connections). - -### Continue fetch after encountering an error - -If the fetch process encounters an error, it exits with an error message, fetch ID, and continuation token for each table that failed to load on the target database. You can use these values to [continue the fetch process](#fetch-continuation) from where it was interrupted. - -~~~ json -{"level":"info","table":"public.tbl1","file_name":"shard_01_part_00000001.csv","message":"creating or updating token for duplicate key value violates unique constraint \"tbl1_pkey\"; Key (id)=(22) already exists."} -{"level":"info","table":"public.tbl1","continuation_token":"5e7c7173-101c-4539-9b8d-28fad37d0240","message":"created continuation token"} -{"level":"info","fetch_id":"87bf8dc0-803c-4e26-89d5-3352576f92a7","message":"continue from this fetch ID"} -~~~ - -To retry a specific table, reissue the initial `molt fetch` command and include the fetch ID and a continuation token: - -{{site.data.alerts.callout_success}} -You can use `molt fetch tokens list` to list all active continuation tokens. Refer to [List active continuation tokens](#list-active-continuation-tokens). -{{site.data.alerts.end}} - -{% include_cached copy-clipboard.html %} -~~~ shell -molt fetch \ -... \ ---fetch-id '87bf8dc0-803c-4e26-89d5-3352576f92a7' \ ---continuation-token '5e7c7173-101c-4539-9b8d-28fad37d0240' -~~~ - -To retry all tables that failed, exclude `--continuation-token` from the command. When prompted, type `y` to clear all active continuation tokens. To avoid the prompt (e.g., when running `molt fetch` in a job), include the `--non-interactive` flag: - -{% include_cached copy-clipboard.html %} -~~~ shell -molt fetch \ -... \ ---fetch-id '87bf8dc0-803c-4e26-89d5-3352576f92a7' \ ---non-interactive -~~~ - -## See also - -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) diff --git a/src/current/v23.2/molt-verify.md b/src/current/v23.2/molt-verify.md deleted file mode 100644 index 1b239439615..00000000000 --- a/src/current/v23.2/molt-verify.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: MOLT Verify -summary: Learn how to use the MOLT Verify tool to check for data discrepancies during and after a migration. -toc: true -docs_area: migrate ---- - -{{site.data.alerts.callout_info}} -{% include feature-phases/preview.md %} -{{site.data.alerts.end}} - -MOLT Verify checks for data discrepancies between a source database and CockroachDB during a [database migration]({% link {{ page.version.version }}/migration-overview.md %}). - -The tool performs the following verifications to ensure data integrity during a migration: - -- **Table Verification:** Check that the structure of tables between the source database and the target database are the same. -- **Column Definition Verification:** Check that the column names, data types, constraints, nullability, and other attributes between the source database and the target database are the same. -- **Row Value Verification:** Check that the actual data in the tables is the same between the source database and the target database. - -For a demo of MOLT Verify, watch the following video: - -{% include_cached youtube.html video_id="6mfebmCLClY" %} - -## Supported databases - -The following databases are currently supported: - -- [PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- CockroachDB - -## Install and run MOLT Verify - -To install MOLT Verify, download the binary that matches your system. To download the latest binary: - -| Operating System | AMD 64-bit | ARM 64-bit | -|------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------| -| Windows | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.windows-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.windows-arm64.tgz) | -| Linux | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.linux-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.linux-arm64.tgz) | -| Mac | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.darwin-amd64.tgz) | [Download](https://molt.cockroachdb.com/molt/cli/molt-latest.darwin-arm64.tgz) | - -For previous binaries, refer to the [MOLT version manifest](https://molt.cockroachdb.com/molt/cli/versions.html). - -# Setup - -Complete the following items before using MOLT Verify: - -- Make sure the SQL user running MOLT Verify has read privileges on the necessary tables. - -- Percent-encode the connection strings for the source database and [CockroachDB]({% link {{ page.version.version }}/connect-to-the-database.md %}). This ensures that the MOLT tools can parse special characters in your password. - - - Given a password `a$52&`, pass it to the `molt escape-password` command with single quotes: - - {% include_cached copy-clipboard.html %} - ~~~ shell - molt escape-password 'a$52&' - ~~~ - - ~~~ - Substitute the following encoded password in your original connection url string: - a%2452%26 - ~~~ - - - Use the encoded password in your connection string. For example: - - ~~~ - postgres://postgres:a%2452%26@localhost:5432/replicationload - ~~~ - -## Flags - -Flag | Description -----------|------------ -`--source` | (Required) Connection string for the source database. -`--target` | (Required) Connection string for the target database. -`--concurrency` | Number of shards to process at a time.
**Default:** 16
For faster verification, set this flag to a higher value. {% comment %}
Note: Table splitting by shard only works for [`INT`]({% link {{ page.version.version }}/int.md %}), [`UUID`]({% link {{ page.version.version }}/uuid.md %}), and [`FLOAT`]({% link {{ page.version.version }}/float.md %}) data types.{% endcomment %} -`--row-batch-size` | Number of rows to get from a table at a time.
**Default:** 20000 -`--table-filter` | Verify tables that match a specified [regular expression](https://wikipedia.org/wiki/Regular_expression). -`--schema-filter` | Verify schemas that match a specified [regular expression](https://wikipedia.org/wiki/Regular_expression). -`--continuous` | Verify tables in a continuous loop.
**Default:** `false` -`--live` | Retry verification on rows before emitting warnings or errors. This is useful during live data import, when temporary mismatches can occur.
**Default:** `false` - -## Usage - -`molt verify` takes two SQL connection strings as `--source` and `--target` arguments. - -To compare a PostgreSQL database with a CockroachDB database: - -{% include_cached copy-clipboard.html %} -~~~ shell -molt verify \ - --source 'postgresql://{username}:{password}@{host}:{port}/{database}' \ - --target 'postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full' -~~~ - -To compare a MySQL database with a CockroachDB database: - -{% include_cached copy-clipboard.html %} -~~~ shell -molt verify \ - --source 'mysql://{username}:{password}@{protocol}({host}:{port})/{database}' \ - --target 'postgresql://{username}:{password}@{host}:{port}/{database}?sslmode=verify-full' -~~~ - -Use the optional [flags](#flags) to customize the verification results. - -When verification completes, the output displays a summary message like the following: - -~~~ json -{"level":"info","type":"summary","table_schema":"public","table_name":"common_table","num_truth_rows":6,"num_success":3,"num_conditional_success":0,"num_missing":2,"num_mismatch":1,"num_extraneous":2,"num_live_retry":0,"num_column_mismatch":0,"message":"finished row verification on public.common_table (shard 1/1)"} -~~~ - -- `num_missing` is the number of rows that are missing on the target database. You can [add any missing data]({% link {{ page.version.version }}/insert.md %}) to the target database and run `molt verify` again. -- `num_mismatch` is the number of rows with mismatched values on the target database. -- `num_extraneous` is the number of extraneous tables on the target database. -- `num_column_mismatch` is the number of columns with mismatched types on the target database, preventing `molt verify` from comparing the column's rows. For example, if your source table uses an auto-incrementing ID, MOLT Verify will identify a mismatch with CockroachDB's [`UUID`]({% link {{ page.version.version }}/uuid.md %}) type. In such cases, you might fix the mismatch by [creating a composite type]({% link {{ page.version.version }}/create-type.md %}#create-a-composite-data-type) on CockroachDB that uses the auto-incrementing ID. -- `num_success` is the number of rows that matched. -- `num_conditional_success` is the number of rows that matched while having a column mismatch due to a type difference. This value indicates that all other columns that could be compared have matched successfully. You should manually review the warnings and errors in the output to determine whether the column mismatches can be ignored. - -## Limitations - -- While verifying data, MOLT Verify pages 20,000 rows at a time by default, and row values can change between batches, which can lead to temporary inconsistencies in data. Enable `--live` mode to have the tool retry verification on these rows. You can also change the row batch size using the `--row_batch_size` [flag](#flags). -- MySQL enums and set types are not supported. -- MOLT Verify checks for collation mismatches on [primary key]({% link {{ page.version.version }}/primary-key.md %}) columns. This may cause validation to fail when a [`STRING`]({% link {{ page.version.version }}/string.md %}) is used as a primary key and the source and target databases are using different [collations]({% link {{ page.version.version }}/collate.md %}). -- MOLT Verify only supports comparing one MySQL database to a whole CockroachDB schema (which is assumed to be `public`). -- MOLT Verify might give an error in case of schema changes on either the source or target database. -- [Geospatial types]({% link {{ page.version.version }}/spatial-data-overview.md %}#spatial-objects) cannot yet be compared. - -## See also - -- [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) -- [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) -- [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) -- [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) -- [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) diff --git a/src/current/v24.1/migrate-from-mysql.md b/src/current/v24.1/migrate-from-mysql.md index 71485a01e5b..2a1adcde9a2 100644 --- a/src/current/v24.1/migrate-from-mysql.md +++ b/src/current/v24.1/migrate-from-mysql.md @@ -34,7 +34,7 @@ Identifiers are case-sensitive in MySQL and [case-insensitive in CockroachDB]({% The MySQL [`AUTO_INCREMENT`](https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html) attribute, which creates sequential column values, is not supported in CockroachDB. When [using the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page?filters=mysql#convert-a-schema), columns with `AUTO_INCREMENT` can be converted to use [sequences]({% link {{ page.version.version }}/create-sequence.md %}), `UUID` values with [`gen_random_uuid()`]({% link {{ page.version.version }}/functions-and-operators.md %}#id-generation-functions), or unique `INT8` values using [`unique_rowid()`]({% link {{ page.version.version }}/functions-and-operators.md %}#id-generation-functions). Cockroach Labs does not recommend using a sequence to define a primary key column. For more information, see [Unique ID best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#unique-id-best-practices). {{site.data.alerts.callout_info}} -Changing a column type during schema conversion will cause [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to identify a type mismatch during [data validation](#step-3-validate-the-migrated-data). This is expected behavior. +Changing a column type during schema conversion will cause [MOLT Verify]({% link molt/molt-verify.md %}) to identify a type mismatch during [data validation](#step-3-validate-the-migrated-data). This is expected behavior. {{site.data.alerts.end}} #### `ENUM` type @@ -158,7 +158,7 @@ Use the [Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachclo Click **Save**. - This is a workaround to prevent [data validation](#step-3-validate-the-migrated-data) from failing due to collation mismatches. For more details, see the [MOLT Verify] ({% link {{ page.version.version }}/molt-verify.md %}#known-limitations) documentation. + This is a workaround to prevent [data validation](#step-3-validate-the-migrated-data) from failing due to collation mismatches. For more details, see the [MOLT Verify] ({% link molt/molt-verify.md %}#known-limitations) documentation. 1. Click [**Migrate Schema**](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page?filters=mysql#migrate-the-schema) to create a new {{ site.data.products.serverless }} cluster with the converted schema. Name the database `world`. @@ -358,9 +358,9 @@ By default, [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %} ### Step 3. Validate the migrated data -Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to check that the data on MySQL and CockroachDB are consistent. +Use [MOLT Verify]({% link molt/molt-verify.md %}) to check that the data on MySQL and CockroachDB are consistent. -1. [Install MOLT Verify.]({% link {{ page.version.version }}/molt-verify.md %}) +1. [Install MOLT Verify.]({% link molt/molt-verify.md %}) 1. In the directory where you installed MOLT Verify, use the following command to compare the two databases, specifying the [JDBC connection string for MySQL](https://dev.mysql.com/doc/connector-j/8.1/en/connector-j-reference-jdbc-url-format.html) with `--source` and the SQL connection string for CockroachDB with `--target`: @@ -403,7 +403,7 @@ To learn more, see the [Migration Overview]({% link {{ page.version.version }}/m - [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) - [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [Use the MOLT Verify tool]({% link {{ page.version.version }}/molt-verify.md %}) +- [Use the MOLT Verify tool]({% link molt/molt-verify.md %}) - [Import Performance Best Practices]({% link {{ page.version.version }}/import-performance-best-practices.md %}) - [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) - [Migrate from PostgreSQL]({% link {{ page.version.version }}/migrate-from-postgres.md %}) diff --git a/src/current/v24.1/migrate-from-postgres.md b/src/current/v24.1/migrate-from-postgres.md index 396ebdfdcb6..8d35c7c7dd4 100644 --- a/src/current/v24.1/migrate-from-postgres.md +++ b/src/current/v24.1/migrate-from-postgres.md @@ -247,9 +247,9 @@ By default, [`IMPORT INTO`]({% link {{ page.version.version }}/import-into.md %} ### Step 3. Validate the migrated data -Use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to check that the data on PostgreSQL and CockroachDB are consistent. +Use [MOLT Verify]({% link molt/molt-verify.md %}) to check that the data on PostgreSQL and CockroachDB are consistent. -1. [Install MOLT Verify.]({% link {{ page.version.version }}/molt-verify.md %}) +1. [Install MOLT Verify.]({% link molt/molt-verify.md %}) 1. In the directory where you installed MOLT Verify, use the following command to compare the two databases, specifying the PostgreSQL connection string with `--source` and the CockroachDB connection string with `--target`: @@ -288,7 +288,7 @@ To learn more, see the [Migration Overview]({% link {{ page.version.version }}/m - [Migration Overview]({% link {{ page.version.version }}/migration-overview.md %}) - [Use the Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [Use the MOLT Verify tool]({% link {{ page.version.version }}/molt-verify.md %}) +- [Use the MOLT Verify tool]({% link molt/molt-verify.md %}) - [Import Performance Best Practices]({% link {{ page.version.version }}/import-performance-best-practices.md %}) - [Migrate from CSV]({% link {{ page.version.version }}/migrate-from-csv.md %}) - [Migrate from MySQL]({% link {{ page.version.version }}/migrate-from-mysql.md %}) diff --git a/src/current/v24.1/migration-overview.md b/src/current/v24.1/migration-overview.md index 3e1372af46a..cf12e36472b 100644 --- a/src/current/v24.1/migration-overview.md +++ b/src/current/v24.1/migration-overview.md @@ -218,7 +218,8 @@ In the following order: You can use the following MOLT (Migrate Off Legacy Technology) tools to simplify these steps: - [Schema Conversion Tool](https://www.cockroachlabs.com/docs/cockroachcloud/migrations-page) -- [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) +- [MOLT Fetch]({% link molt/molt-fetch.md %}) +- [MOLT Verify]({% link molt/molt-verify.md %}) #### Convert the schema @@ -245,7 +246,7 @@ Then import the converted schema to a CockroachDB cluster: Before moving data, Cockroach Labs recommends [dropping any indexes]({% link {{ page.version.version }}/drop-index.md %}) on the CockroachDB database. The indexes can be [recreated]({% link {{ page.version.version }}/create-index.md %}) after the data is loaded. Doing so will optimize performance. {{site.data.alerts.end}} -After [converting the schema](#convert-the-schema), load your data into CockroachDB so that you can [test your application queries](#validate-queries). Then use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. +After [converting the schema](#convert-the-schema), load your data into CockroachDB so that you can [test your application queries](#validate-queries). Then use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. Alternatively, you can use one of the following methods to migrate the data. Additional tooling may be required to extract or convert the data to a supported file format. @@ -263,7 +264,7 @@ Note that CockroachDB defaults to the [`SERIALIZABLE`]({% link {{ page.version.v You can "shadow" your production workload by executing your source SQL statements on CockroachDB in parallel. You can then [validate the queries](#test-query-results-and-performance) on CockroachDB for consistency, performance, and potential issues with the migration. -The [CockroachDB Live Migration Service (MOLT LMS)]({% link {{ page.version.version }}/live-migration-service.md %}) can [perform shadowing]({% link {{ page.version.version }}/live-migration-service.md %}#shadowing-modes). This is intended only for [testing](#test-query-results-and-performance) or [performing a dry run](#perform-a-dry-run). Shadowing should **not** be used in production when performing a [live migration](#zero-downtime). +The [CockroachDB Live Migration Service (MOLT LMS)]({% link molt/live-migration-service.md %}) can [perform shadowing]({% link molt/live-migration-service.md %}#shadowing-modes). This is intended only for [testing](#test-query-results-and-performance) or [performing a dry run](#perform-a-dry-run). Shadowing should **not** be used in production when performing a [live migration](#zero-downtime). ##### Test query results and performance @@ -271,7 +272,7 @@ You can manually validate your queries by testing a subset of "critical queries" - Check the application logs for error messages and the API response time. If application requests are slower than expected, use the **SQL Activity** page on the [CockroachDB {{ site.data.products.cloud }} Console](https://www.cockroachlabs.com/docs/cockroachcloud/statements-page) or [DB Console]({% link {{ page.version.version }}/ui-statements-page.md %}) to find the longest-running queries that are part of that application request. If necessary, tune the queries according to our best practices for [SQL performance]({% link {{ page.version.version }}/performance-best-practices-overview.md %}). -- Compare the results of the queries and check that they are identical in both the source database and CockroachDB. To do this, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}). +- Compare the results of the queries and check that they are identical in both the source database and CockroachDB. To do this, you can use [MOLT Verify]({% link molt/molt-verify.md %}). Test performance on a CockroachDB cluster that is appropriately [sized](#capacity-planning) for your workload: @@ -312,8 +313,8 @@ Using this method, consistency is achieved by only performing the cutover once a The following is a high-level overview of the migration steps. For considerations and details about the pros and cons of this approach, see [Migration Strategy: Lift and Shift]({% link {{ page.version.version }}/migration-strategy-lift-and-shift.md %}). 1. Stop application traffic to your source database. **This begins downtime.** -1. Use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. -1. After the data is migrated, use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. Use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. +1. After the data is migrated, use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. 1. Perform a [cutover](#cutover-strategy) by resuming application traffic, now to CockroachDB. {% comment %}1. If you want the ability to [roll back](#all-at-once-rollback) the migration, replicate data back to the source database.{% endcomment %} @@ -325,18 +326,18 @@ The following is a high-level overview of the migration steps. The two approache To prioritize consistency and minimize downtime: -1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link {{ page.version.version }}/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Do **not** shadow the application traffic. -1. Use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to [**replicate ongoing changes**]({% link {{ page.version.version }}/molt-fetch.md %}#replication) after it performs the initial load of data into CockroachDB. -1. As the data is migrating, use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. -1. After nearly all data from your source database has been moved to CockroachDB (for example, with a <1-second delay or <1000 rows), use MOLT LMS to begin a [*consistent cutover*]({% link {{ page.version.version }}/live-migration-service.md %}#consistent-cutover) and stop application traffic to your source database. **This begins downtime.** +1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link molt/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Do **not** shadow the application traffic. +1. Use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to [**replicate ongoing changes**]({% link molt/molt-fetch.md %}#replication) after it performs the initial load of data into CockroachDB. +1. As the data is migrating, use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. After nearly all data from your source database has been moved to CockroachDB (for example, with a <1-second delay or <1000 rows), use MOLT LMS to begin a [*consistent cutover*]({% link molt/live-migration-service.md %}#consistent-cutover) and stop application traffic to your source database. **This begins downtime.** 1. Wait for MOLT Fetch to finish replicating changes to CockroachDB. -1. Use MOLT LMS to commit the [consistent cutover]({% link {{ page.version.version }}/live-migration-service.md %}#consistent-cutover). This resumes application traffic, now to CockroachDB. +1. Use MOLT LMS to commit the [consistent cutover]({% link molt/live-migration-service.md %}#consistent-cutover). This resumes application traffic, now to CockroachDB. To achieve zero downtime with inconsistency: -1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link {{ page.version.version }}/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Use a [shadowing mode]({% link {{ page.version.version }}/live-migration-service.md %}#shadowing-modes) to run application queries simultaneously on your source database and CockroachDB. -1. Use [MOLT Fetch]({% link {{ page.version.version }}/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to **replicate ongoing changes** after performing the initial load of data into CockroachDB. -1. As the data is migrating, you can use [MOLT Verify]({% link {{ page.version.version }}/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. +1. Set up the [CockroachDB Live Migration Service (MOLT LMS)]({% link molt/live-migration-service.md %}) to proxy for application traffic between your source database and CockroachDB. Use a [shadowing mode]({% link molt/live-migration-service.md %}#shadowing-modes) to run application queries simultaneously on your source database and CockroachDB. +1. Use [MOLT Fetch]({% link molt/molt-fetch.md %}) to move the source data to CockroachDB. Use the tool to **replicate ongoing changes** after performing the initial load of data into CockroachDB. +1. As the data is migrating, you can use [MOLT Verify]({% link molt/molt-verify.md %}) to validate the consistency of the data between the source database and CockroachDB. 1. After nearly all data from your source database has been moved to CockroachDB (for example, with a <1 second delay or <1000 rows), perform an [*immediate cutover*](#cutover-strategy) by pointing application traffic to CockroachDB. 1. Manually reconcile any inconsistencies caused by writes that were not replicated during the cutover. 1. Close the connection to the source database when you are ready to finish the migration.