From c3d89a5d627732e8c35316860dd2e249fe8e6e80 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 23 Nov 2023 16:20:43 +0800 Subject: [PATCH] updated SQL for `DATA_LENGTH` (#15432) (#15434) --- dm/dm-hardware-and-software-requirements.md | 43 ++++++++++++------- migrate-large-mysql-to-tidb.md | 33 +++++++++++--- tidb-lightning/tidb-lightning-requirements.md | 35 ++++++++++----- 3 files changed, 79 insertions(+), 32 deletions(-) diff --git a/dm/dm-hardware-and-software-requirements.md b/dm/dm-hardware-and-software-requirements.md index 4c73979bb97a5..e4f6077305e14 100644 --- a/dm/dm-hardware-and-software-requirements.md +++ b/dm/dm-hardware-and-software-requirements.md @@ -56,18 +56,31 @@ The target TiKV cluster must have enough disk space to store the imported data. You can estimate the data volume by using the following SQL statements to summarize the `DATA_LENGTH` field: -- Calculate the size of all schemas, in MiB. Replace `${schema_name}` with your schema name. - - {{< copyable "sql" >}} - - ```sql - select table_schema,sum(data_length)/1024/1024 as data_length,sum(index_length)/1024/1024 as index_length,sum(data_length+index_length)/1024/1024 as sum from information_schema.tables where table_schema = "${schema_name}" group by table_schema; - ``` - -- Calculate the size of the largest table, in MiB. Replace ${schema_name} with your schema name. - - {{< copyable "sql" >}} - - ```sql - select table_name,table_schema,sum(data_length)/1024/1024 as data_length,sum(index_length)/1024/1024 as index_length,sum(data_length+index_length)/1024/1024 as sum from information_schema.tables where table_schema = "${schema_name}" group by table_name,table_schema order by sum desc limit 5; - ``` \ No newline at end of file +```sql +-- Calculate the size of all schemas +SELECT + TABLE_SCHEMA, + FORMAT_BYTES(SUM(DATA_LENGTH)) AS 'Data Size', + FORMAT_BYTES(SUM(INDEX_LENGTH)) 'Index Size' +FROM + information_schema.tables +GROUP BY + TABLE_SCHEMA; + +-- Calculate the 5 largest tables +SELECT + TABLE_NAME, + TABLE_SCHEMA, + FORMAT_BYTES(SUM(data_length)) AS 'Data Size', + FORMAT_BYTES(SUM(index_length)) AS 'Index Size', + FORMAT_BYTES(SUM(data_length+index_length)) AS 'Total Size' +FROM + information_schema.tables +GROUP BY + TABLE_NAME, + TABLE_SCHEMA +ORDER BY + SUM(DATA_LENGTH+INDEX_LENGTH) DESC +LIMIT + 5; +``` \ No newline at end of file diff --git a/migrate-large-mysql-to-tidb.md b/migrate-large-mysql-to-tidb.md index 5f687d214d00a..e8383ff180c5e 100644 --- a/migrate-large-mysql-to-tidb.md +++ b/migrate-large-mysql-to-tidb.md @@ -31,14 +31,33 @@ This document describes how to perform the full migration using Dumpling and TiD **Note**: It is difficult to calculate the exact data volume exported by Dumpling from MySQL, but you can estimate the data volume by using the following SQL statement to summarize the `DATA_LENGTH` field in the `information_schema.tables` table: -{{< copyable "" >}} - ```sql -/* Calculate the size of all schemas, in MiB. Replace ${schema_name} with your schema name. */ -SELECT table_schema,SUM(data_length)/1024/1024 AS data_length,SUM(index_length)/1024/1024 AS index_length,SUM(data_length+index_length)/1024/1024 AS SUM FROM information_schema.tables WHERE table_schema = "${schema_name}" GROUP BY table_schema; - -/* Calculate the size of the largest table, in MiB. Replace ${schema_name} with your schema name. */ -SELECT table_name,table_schema,SUM(data_length)/1024/1024 AS data_length,SUM(index_length)/1024/1024 AS index_length,SUM(data_length+index_length)/1024/1024 AS SUM from information_schema.tables WHERE table_schema = "${schema_name}" GROUP BY table_name,table_schema ORDER BY SUM DESC LIMIT 5; +-- Calculate the size of all schemas +SELECT + TABLE_SCHEMA, + FORMAT_BYTES(SUM(DATA_LENGTH)) AS 'Data Size', + FORMAT_BYTES(SUM(INDEX_LENGTH)) 'Index Size' +FROM + information_schema.tables +GROUP BY + TABLE_SCHEMA; + +-- Calculate the 5 largest tables +SELECT + TABLE_NAME, + TABLE_SCHEMA, + FORMAT_BYTES(SUM(data_length)) AS 'Data Size', + FORMAT_BYTES(SUM(index_length)) AS 'Index Size', + FORMAT_BYTES(SUM(data_length+index_length)) AS 'Total Size' +FROM + information_schema.tables +GROUP BY + TABLE_NAME, + TABLE_SCHEMA +ORDER BY + SUM(DATA_LENGTH+INDEX_LENGTH) DESC +LIMIT + 5; ``` ### Disk space for the target TiKV cluster diff --git a/tidb-lightning/tidb-lightning-requirements.md b/tidb-lightning/tidb-lightning-requirements.md index 86c9bb53af71f..a866870437871 100644 --- a/tidb-lightning/tidb-lightning-requirements.md +++ b/tidb-lightning/tidb-lightning-requirements.md @@ -86,16 +86,31 @@ The target TiKV cluster must have enough disk space to store the imported data. It is difficult to calculate the exact data volume exported by Dumpling from MySQL. However, you can estimate the data volume by using the following SQL statement to summarize the `DATA_LENGTH` field in the information_schema.tables table: -Calculate the size of all schemas, in MiB. Replace ${schema_name} with your schema name. - ```sql -SELECT table_schema, SUM(data_length)/1024/1024 AS data_length, SUM(index_length)/1024/1024 AS index_length, SUM(data_length+index_length)/1024/1024 AS sum FROM information_schema.tables WHERE table_schema = "${schema_name}" GROUP BY table_schema; -``` - -Calculate the size of the largest table, in MiB. Replace ${schema_name} with your schema name. +-- Calculate the size of all schemas +SELECT + TABLE_SCHEMA, + FORMAT_BYTES(SUM(DATA_LENGTH)) AS 'Data Size', + FORMAT_BYTES(SUM(INDEX_LENGTH)) 'Index Size' +FROM + information_schema.tables +GROUP BY + TABLE_SCHEMA; -{{< copyable "sql" >}} - -```sql -SELECT table_name, table_schema, SUM(data_length)/1024/1024 AS data_length, SUM(index_length)/1024/1024 AS index_length,sum(data_length+index_length)/1024/1024 AS sum FROM information_schema.tables WHERE table_schema = "${schema_name}" GROUP BY table_name,table_schema ORDER BY sum DESC LIMIT 5; +-- Calculate the 5 largest tables +SELECT + TABLE_NAME, + TABLE_SCHEMA, + FORMAT_BYTES(SUM(data_length)) AS 'Data Size', + FORMAT_BYTES(SUM(index_length)) AS 'Index Size', + FORMAT_BYTES(SUM(data_length+index_length)) AS 'Total Size' +FROM + information_schema.tables +GROUP BY + TABLE_NAME, + TABLE_SCHEMA +ORDER BY + SUM(DATA_LENGTH+INDEX_LENGTH) DESC +LIMIT + 5; ```