diff --git a/best-practices/massive-regions-best-practices.md b/best-practices/massive-regions-best-practices.md index 6e041c02094c2..9b70a9986c0eb 100644 --- a/best-practices/massive-regions-best-practices.md +++ b/best-practices/massive-regions-best-practices.md @@ -95,8 +95,8 @@ Enable `Region Merge` by configuring the following parameters: {{< copyable "" >}} ``` -config set max-merge-region-size 20 -config set max-merge-region-keys 200000 +config set max-merge-region-size 54 +config set max-merge-region-keys 540000 config set merge-schedule-limit 8 ``` @@ -138,7 +138,11 @@ If Region followers have not received the heartbeat from the leader within the ` ### Method 6: Adjust Region size -The default size of a Region is 96 MiB, and you can reduce the number of Regions by setting Regions to a larger size. For more information, see [Tune Region Performance](/tune-region-performance.md). +The default size of a Region is 256 MiB, and you can reduce the number of Regions by setting Regions to a larger size. For more information, see [Tune Region Performance](/tune-region-performance.md). + +> **Note:** +> +> Starting from v8.4.0, the default Region size is increased from 96 MiB to 256 MiB. If you have not modified the Region size manually, when you upgrade a TiKV cluster to v8.4.0 or later, the TiKV cluster's default Region size will automatically be updated to 256 MiB. > **Warning:** > diff --git a/best-practices/pd-scheduling-best-practices.md b/best-practices/pd-scheduling-best-practices.md index a86e17f0974d8..d014a4794d164 100644 --- a/best-practices/pd-scheduling-best-practices.md +++ b/best-practices/pd-scheduling-best-practices.md @@ -104,9 +104,9 @@ Region merge refers to the process of merging adjacent small regions. It serves Specifically, when a newly split Region exists for more than the value of [`split-merge-interval`](/pd-configuration-file.md#split-merge-interval) (`1h` by default), if the following conditions occur at the same time, this Region triggers the Region merge scheduling: -- The size of this Region is smaller than the value of the [`max-merge-region-size`](/pd-configuration-file.md#max-merge-region-size) (20 MiB by default) +- The size of this Region is smaller than the value of the [`max-merge-region-size`](/pd-configuration-file.md#max-merge-region-size). Starting from v8.4.0, the default value is changed from 20 MiB to 54 MiB. The new default value is automatically applied only to newly created clusters. Existing clusters are not affected. -- The number of keys in this Region is smaller than the value of [`max-merge-region-keys`](/pd-configuration-file.md#max-merge-region-keys) (200,000 by default). +- The number of keys in this Region is smaller than the value of [`max-merge-region-keys`](/pd-configuration-file.md#max-merge-region-keys). Starting from v8.4.0, the default value is changed from 200000 to 540000. The new default value is automatically applied only to newly created clusters. Existing clusters are not affected. ## Query scheduling status diff --git a/glossary.md b/glossary.md index 39b96d20f7288..5b50d37f33de4 100644 --- a/glossary.md +++ b/glossary.md @@ -137,7 +137,7 @@ Raft Engine is an embedded persistent storage engine with a log-structured desig ### Region/peer/Raft group -Region is the minimal piece of data storage in TiKV, each representing a range of data (96 MiB by default). Each Region has three replicas by default. A replica of a Region is called a peer. Multiple peers of the same Region replicate data via the Raft consensus algorithm, so peers are also members of a Raft instance. TiKV uses Multi-Raft to manage data. That is, for each Region, there is a corresponding, isolated Raft group. +Region is the minimal piece of data storage in TiKV, each representing a range of data (256 MiB by default). Each Region has three replicas by default. A replica of a Region is called a peer. Multiple peers of the same Region replicate data via the Raft consensus algorithm, so peers are also members of a Raft instance. TiKV uses Multi-Raft to manage data. That is, for each Region, there is a corresponding, isolated Raft group. ### Region split diff --git a/information-schema/information-schema-cluster-config.md b/information-schema/information-schema-cluster-config.md index c5e1bfa1db225..cb3cc065e3b2e 100644 --- a/information-schema/information-schema-cluster-config.md +++ b/information-schema/information-schema-cluster-config.md @@ -50,10 +50,10 @@ SELECT * FROM cluster_config WHERE type='tikv' AND `key` LIKE 'coprocessor%'; | TYPE | INSTANCE | KEY | VALUE | +------+-----------------+-----------------------------------+---------+ | tikv | 127.0.0.1:20165 | coprocessor.batch-split-limit | 10 | -| tikv | 127.0.0.1:20165 | coprocessor.region-max-keys | 1440000 | -| tikv | 127.0.0.1:20165 | coprocessor.region-max-size | 144MiB | -| tikv | 127.0.0.1:20165 | coprocessor.region-split-keys | 960000 | -| tikv | 127.0.0.1:20165 | coprocessor.region-split-size | 96MiB | +| tikv | 127.0.0.1:20165 | coprocessor.region-max-keys | 3840000 | +| tikv | 127.0.0.1:20165 | coprocessor.region-max-size | 384MiB | +| tikv | 127.0.0.1:20165 | coprocessor.region-split-keys | 2560000 | +| tikv | 127.0.0.1:20165 | coprocessor.region-split-size | 256MiB | | tikv | 127.0.0.1:20165 | coprocessor.split-region-on-table | false | +------+-----------------+-----------------------------------+---------+ 6 rows in set (0.00 sec) diff --git a/pd-configuration-file.md b/pd-configuration-file.md index f727ce33ba9b5..02c08549ec151 100644 --- a/pd-configuration-file.md +++ b/pd-configuration-file.md @@ -261,13 +261,13 @@ Configuration items related to scheduling ### `max-merge-region-size` + Controls the size limit of `Region Merge`. When the Region size is greater than the specified value, PD does not merge the Region with the adjacent Regions. -+ Default value: `20` ++ Default value: `54`. Before v8.4.0, the default value is `20`. Starting from v8.4.0, the default value is `54`. + Unit: MiB ### `max-merge-region-keys` + Specifies the upper limit of the `Region Merge` key. When the Region key is greater than the specified value, the PD does not merge the Region with its adjacent Regions. -+ Default value: `200000` ++ Default value: `540000`. Before v8.4.0, the default value is `200000`. Starting from v8.4.0, the default value is `540000`. ### `patrol-region-interval` diff --git a/pd-control.md b/pd-control.md index daf4bcbd6e2d7..c9cafeac22108 100644 --- a/pd-control.md +++ b/pd-control.md @@ -146,8 +146,8 @@ Usage: "leader-schedule-limit": 4, "leader-schedule-policy": "count", "low-space-ratio": 0.8, - "max-merge-region-keys": 200000, - "max-merge-region-size": 20, + "max-merge-region-keys": 540000, + "max-merge-region-size": 54, "max-pending-peer-count": 64, "max-snapshot-count": 64, "max-store-down-time": "30m0s", diff --git a/tidb-storage.md b/tidb-storage.md index 8c801eb559988..7526ab2a341b8 100644 --- a/tidb-storage.md +++ b/tidb-storage.md @@ -49,7 +49,7 @@ To make it easy to understand, let's assume that all data only has one replica. * Hash: Create Hash by Key and select the corresponding storage node according to the Hash value. * Range: Divide ranges by Key, where a segment of serial Key is stored on a node. -TiKV chooses the second solution that divides the whole Key-Value space into a series of consecutive Key segments. Each segment is called a Region. Each Region can be described by `[StartKey, EndKey)`, a left-closed and right-open interval. The default size limit for each Region is 96 MiB and the size can be configured. +TiKV chooses the second solution that divides the whole Key-Value space into a series of consecutive Key segments. Each segment is called a Region. Each Region can be described by `[StartKey, EndKey)`, a left-closed and right-open interval. The default size limit for each Region is 256 MiB and the size can be configured. ![Region in TiDB](/media/tidb-storage-2.png) diff --git a/tikv-configuration-file.md b/tikv-configuration-file.md index 8c96e8528b37c..ba79d683c5213 100644 --- a/tikv-configuration-file.md +++ b/tikv-configuration-file.md @@ -1082,7 +1082,7 @@ Configuration items related to Coprocessor. ### `region-split-size` + The size of the newly split Region. This value is an estimate. -+ Default value: `"96MiB"` ++ Default value: `"256MiB"`. Before v8.4.0, the default value is `"96MiB"`. + Unit: KiB|MiB|GiB ### `region-max-keys` @@ -1093,7 +1093,7 @@ Configuration items related to Coprocessor. ### `region-split-keys` + The number of keys in the newly split Region. This value is an estimate. -+ Default value: `960000` ++ Default value: `2560000`. Before v8.4.0, the default value is `960000`. ### `consistency-check-method` @@ -2147,7 +2147,7 @@ Configuration items related to BR backup. + The threshold of the backup SST file size. If the size of a backup file in a TiKV Region exceeds this threshold, the file is backed up to several files with the TiKV Region split into multiple Region ranges. Each of the files in the split Regions is the same size as `sst-max-size` (or slightly larger). + For example, when the size of a backup file in the Region of `[a,e)` is larger than `sst-max-size`, the file is backed up to several files with regions `[a,b)`, `[b,c)`, `[c,d)` and `[d,e)`, and the size of `[a,b)`, `[b,c)`, `[c,d)` is the same as that of `sst-max-size` (or slightly larger). -+ Default value: `"144MiB"` ++ Default value: `"384MiB"`. Before v8.4.0, the default value is `"144MiB"`. ### `enable-auto-tune` New in v5.4.0 diff --git a/tikv-overview.md b/tikv-overview.md index 5934f8d8cd1fb..077ae60234b57 100644 --- a/tikv-overview.md +++ b/tikv-overview.md @@ -21,7 +21,7 @@ There is a RocksDB database within each Store and it stores data into the local Data consistency between replicas of a Region is guaranteed by the Raft Consensus Algorithm. Only the leader of the Region can provide the writing service, and only when the data is written to the majority of replicas of a Region, the write operation succeeds. -TiKV tries to keep an appropriate size for each Region in the cluster. The Region size is currently 96 MiB by default. This mechanism helps the PD component to balance Regions among nodes in a TiKV cluster. When the size of a Region exceeds a threshold (144 MiB by default), TiKV splits it into two or more Regions. When the size of a Region is smaller than the threshold (20 MiB by default), TiKV merges the two smaller adjacent Regions into one Region. +TiKV tries to keep an appropriate size for each Region in the cluster. The Region size is currently 256 MiB by default. This mechanism helps the PD component to balance Regions among nodes in a TiKV cluster. When the size of a Region exceeds a threshold (384 MiB by default), TiKV splits it into two or more Regions. When the size of a Region is smaller than the threshold (54 MiB by default), TiKV merges the two smaller adjacent Regions into one Region. When PD moves a replica from one TiKV node to another, it firstly adds a Learner replica on the target node, after the data in the Learner replica is nearly the same as that in the Leader replica, PD changes it to a Follower replica and removes the Follower replica on the source node. diff --git a/tune-region-performance.md b/tune-region-performance.md index 869771f6ea47a..d7ff0a7ac1eb3 100644 --- a/tune-region-performance.md +++ b/tune-region-performance.md @@ -11,7 +11,13 @@ This document introduces how to tune Region performance by adjusting the Region TiKV automatically [shards bottom-layered data](/best-practices/tidb-best-practices.md#data-sharding). Data is split into multiple Regions based on the key ranges. When the size of a Region exceeds a threshold, TiKV splits it into two or more Regions. -In scenarios involving large datasets, if the Region size is relatively small, TiKV might have too many Regions, which causes more resource consumption and [performance regression](/best-practices/massive-regions-best-practices.md#performance-problem). Since v6.1.0, TiDB supports customizing Region size. The default size of a Region is 96 MiB. To reduce the number of Regions, you can adjust Regions to a larger size. +In scenarios involving large datasets, if the Region size is relatively small, TiKV might have too many Regions, which causes more resource consumption and [performance regression](/best-practices/massive-regions-best-practices.md#performance-problem). + +> **Note:** +> +> - In v6.1.0, TiDB supports customizing Region size as an experimental feature. +> - Starting from v6.5.0, this feature becomes generally available (GA). +> - Starting from v8.4.0, the default size of the Region is resized from 96 MiB to 256 MiB. Increasing the Region size can reduce the number of Regions. To reduce the performance overhead of many Regions, you can also enable [Hibernate Region](/best-practices/massive-regions-best-practices.md#method-4-increase-the-number-of-tikv-instances) or [`Region Merge`](/best-practices/massive-regions-best-practices.md#method-5-adjust-raft-base-tick-interval). diff --git a/tune-tikv-memory-performance.md b/tune-tikv-memory-performance.md index 7c1c45fa60c81..acaa8c4946dbc 100644 --- a/tune-tikv-memory-performance.md +++ b/tune-tikv-memory-performance.md @@ -86,7 +86,7 @@ log-level = "info" ## ## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly. ## Otherwise, the OOM problem might occur in TiKV. -# capacity = "1GB" +# capacity = "1GiB" [pd] # PD address @@ -105,14 +105,14 @@ job = "tikv" # raftdb-path = "/tmp/tikv/store/raft" # When the data size change in a Region is larger than the threshold value, TiKV checks whether this Region needs split. -# To reduce the costs of scanning data in the checking process, set the value to 32 MB during the data import process. In the normal operation status, set it to the default value. -region-split-check-diff = "32MB" +# To reduce the costs of scanning data in the checking process, set the value to 32 MiB during the data import process. In the normal operation status, set it to the default value. +region-split-check-diff = "32MiB" [coprocessor] ## If the size of a Region with the range of [a,e) is larger than the value of `region_max_size`, TiKV tries to split the Region to several Regions, for example, the Regions with the ranges of [a,b), [b,c), [c,d), and [d,e). ## After the Region split, the size of the split Regions is equal to the value of `region_split_size` (or slightly larger than the value of `region_split_size`). -# region-max-size = "144MB" -# region-split-size = "96MB" +# region-max-size = "144MiB" +# region-split-size = "96MiB" [rocksdb] # The maximum number of threads of RocksDB background tasks. The background tasks include compaction and flush. @@ -126,7 +126,7 @@ region-split-check-diff = "32MB" # max-open-files = 40960 # The file size limit of RocksDB MANIFEST. For more details, see https://github.com/facebook/rocksdb/wiki/MANIFEST -max-manifest-file-size = "20MB" +max-manifest-file-size = "20MiB" # The directory of RocksDB write-ahead logs. If there are two disks on the machine, store the RocksDB data and WAL logs # on different disks to improve TiKV performance. @@ -138,10 +138,10 @@ max-manifest-file-size = "20MB" # wal-size-limit = 0 # In most cases, set the maximum total size of RocksDB WAL logs to the default value. -# max-total-wal-size = "4GB" +# max-total-wal-size = "4GiB" -# Use this parameter to enable the readahead feature during RocksDB compaction. If you are using mechanical disks, it is recommended to set the value to 2MB at least. -# compaction-readahead-size = "2MB" +# Use this parameter to enable the readahead feature during RocksDB compaction. If you are using mechanical disks, it is recommended to set the value to 2MiB at least. +# compaction-readahead-size = "2MiB" [rocksdb.defaultcf] # The data block size. RocksDB compresses data based on the unit of block. @@ -167,7 +167,7 @@ block-size = "64KB" compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] # The RocksDB memtable size -write-buffer-size = "128MB" +write-buffer-size = "128MiB" # The maximum number of the memtables. The data written into RocksDB is first recorded in the WAL log, and then inserted # into memtables. When the memtable reaches the size limit of `write-buffer-size`, it turns into read only and generates @@ -198,25 +198,25 @@ level0-stop-writes-trigger = 36 # compaction of level0 and level1 and the trigger condition of compaction for level0 is that the number of the # sst files reaches 4 (the default value). When both level0 and level1 adopt compaction, it is necessary to analyze # RocksDB logs to know the size of an sst file compressed from an mentable. For example, if the file size is 32MB, -# the proposed value of `max-bytes-for-level-base` is 32MB * 4 = 128MB. -max-bytes-for-level-base = "512MB" +# the proposed value of `max-bytes-for-level-base` is 32MiB * 4 = 128MiB. +max-bytes-for-level-base = "512MiB" # The sst file size. The sst file size of level0 is influenced by the compaction algorithm of `write-buffer-size` # and level0. `target-file-size-base` is used to control the size of a single sst file of level1-level6. -target-file-size-base = "32MB" +target-file-size-base = "32MiB" [rocksdb.writecf] # Set it the same as `rocksdb.defaultcf.compression-per-level`. compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] # Set it the same as `rocksdb.defaultcf.write-buffer-size`. -write-buffer-size = "128MB" +write-buffer-size = "128MiB" max-write-buffer-number = 5 min-write-buffer-number-to-merge = 1 # Set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`. -max-bytes-for-level-base = "512MB" -target-file-size-base = "32MB" +max-bytes-for-level-base = "512MiB" +target-file-size-base = "32MiB" [raftdb] # The maximum number of the file handles RaftDB can open @@ -224,20 +224,20 @@ target-file-size-base = "32MB" # Enable the readahead feature in RaftDB compaction. If you are using mechanical disks, it is recommended to set # this value to 2MB at least. -# compaction-readahead-size = "2MB" +# compaction-readahead-size = "2MiB" [raftdb.defaultcf] # Set it the same as `rocksdb.defaultcf.compression-per-level`. compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] # Set it the same as `rocksdb.defaultcf.write-buffer-size`. -write-buffer-size = "128MB" +write-buffer-size = "128MiB" max-write-buffer-number = 5 min-write-buffer-number-to-merge = 1 # Set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`. -max-bytes-for-level-base = "512MB" -target-file-size-base = "32MB" +max-bytes-for-level-base = "512MiB" +target-file-size-base = "32MiB" ``` ## TiKV memory usage @@ -250,7 +250,7 @@ Besides `block cache` and `write buffer` which occupy the system memory, the sys ## Recommended configuration of TiKV -+ In production environments, it is not recommended to deploy TiKV on the machine whose CPU cores are less than 8 or the memory is less than 32GB. ++ In production environments, it is not recommended to deploy TiKV on the machine whose CPU cores are less than 8 or the memory is less than 32GiB. + If you demand a high write throughput, it is recommended to use a disk with good throughput capacity.