From b67c23f933c9ef4ae7098288f647b6b113fccdba Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 22 Feb 2024 17:46:58 +0800 Subject: [PATCH] =?UTF-8?q?adjust=20the=20recommend=20value=20of=20raft=20?= =?UTF-8?q?election-timeout=20in=20multi=20dc=20deplo=E2=80=A6=20(#16561)?= =?UTF-8?q?=20(#16576)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config-templates/geo-redundancy-deployment.yaml | 4 ++-- dr-multi-replica.md | 4 ++-- geo-distributed-deployment-topology.md | 8 ++++++-- three-data-centers-in-two-cities-deployment.md | 14 +++++++++----- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/config-templates/geo-redundancy-deployment.yaml b/config-templates/geo-redundancy-deployment.yaml index 74ad7ecddca3a..4f839c7752cc8 100644 --- a/config-templates/geo-redundancy-deployment.yaml +++ b/config-templates/geo-redundancy-deployment.yaml @@ -107,8 +107,8 @@ tikv_servers: host: host1 readpool.storage.use-unified-pool: true readpool.storage.low-concurrency: 10 - raftstore.raft-min-election-timeout-ticks: 1000 - raftstore.raft-max-election-timeout-ticks: 1020 + raftstore.raft-min-election-timeout-ticks: 50 + raftstore.raft-max-election-timeout-ticks: 60 monitoring_servers: - host: 10.0.1.16 grafana_servers: diff --git a/dr-multi-replica.md b/dr-multi-replica.md index fb8d2ed065cd5..fa85092ede9fe 100644 --- a/dr-multi-replica.md +++ b/dr-multi-replica.md @@ -74,8 +74,8 @@ In this example, TiDB contains five replicas and three regions. Region 1 is the config: server.labels: { Region: "Region3", AZ: "AZ5" } - raftstore.raft-min-election-timeout-ticks: 1000 - raftstore.raft-max-election-timeout-ticks: 1200 + raftstore.raft-min-election-timeout-ticks: 50 + raftstore.raft-max-election-timeout-ticks: 60 monitoring_servers: - host: tidb-dr-test2 diff --git a/geo-distributed-deployment-topology.md b/geo-distributed-deployment-topology.md index 4fef9d2a38fe1..be56da57a605c 100644 --- a/geo-distributed-deployment-topology.md +++ b/geo-distributed-deployment-topology.md @@ -56,10 +56,14 @@ This section describes the key parameter configuration of the TiDB geo-distribut - To prevent remote TiKV nodes from launching unnecessary Raft elections, it is required to increase the minimum and maximum number of ticks that the remote TiKV nodes need to launch an election. The two parameters are set to `0` by default. ```yaml - raftstore.raft-min-election-timeout-ticks: 1000 - raftstore.raft-max-election-timeout-ticks: 1020 + raftstore.raft-min-election-timeout-ticks: 50 + raftstore.raft-max-election-timeout-ticks: 60 ``` +> **Note:** +> +> Using `raftstore.raft-min-election-timeout-ticks` and `raftstore.raft-max-election-timeout-ticks` to configure larger election timeout ticks for a TiKV node can significantly decrease the likelihood of Regions on that node becoming Leaders. However, in a disaster scenario where some TiKV nodes are offline and the remaining active TiKV nodes lag behind in Raft logs, only Regions on this TiKV node with large election timeout ticks can become Leaders. Because Regions on this TiKV node must wait for at least the duration set by `raftstore.raft-min-election-timeout-ticks' before initiating an election, it is recommended to avoid setting these values excessively large to prevent potential impact on the cluster availability in such scenarios. + #### PD parameters - The PD metadata information records the topology of the TiKV cluster. PD schedules the Raft Group replicas on the following four dimensions: diff --git a/three-data-centers-in-two-cities-deployment.md b/three-data-centers-in-two-cities-deployment.md index 3d82d054aee35..1b08e5d76df41 100644 --- a/three-data-centers-in-two-cities-deployment.md +++ b/three-data-centers-in-two-cities-deployment.md @@ -113,8 +113,8 @@ tikv_servers: - host: 10.63.10.34 config: server.labels: { az: "3", replication zone: "5", rack: "5", host: "34" } - raftstore.raft-min-election-timeout-ticks: 1000 - raftstore.raft-max-election-timeout-ticks: 1200 + raftstore.raft-min-election-timeout-ticks: 50 + raftstore.raft-max-election-timeout-ticks: 60 monitoring_servers: - host: 10.63.10.60 @@ -174,11 +174,15 @@ In the deployment of three AZs in two regions, to optimize performance, you need - Optimize the network configuration of the TiKV node in another region (San Francisco). Modify the following TiKV parameters for AZ3 in San Francisco and try to prevent the replica in this TiKV node from participating in the Raft election. ```yaml - raftstore.raft-min-election-timeout-ticks: 1000 - raftstore.raft-max-election-timeout-ticks: 1200 + raftstore.raft-min-election-timeout-ticks: 50 + raftstore.raft-max-election-timeout-ticks: 60 ``` -- Configure scheduling. After the cluster is enabled, use the `tiup ctl:v pd` tool to modify the scheduling policy. Modify the number of TiKV Raft replicas. Configure this number as planned. In this example, the number of replicas is five. +> **Note:** +> +> Using `raftstore.raft-min-election-timeout-ticks` and `raftstore.raft-max-election-timeout-ticks` to configure larger election timeout ticks for a TiKV node can significantly decrease the likelihood of Regions on that node becoming Leaders. However, in a disaster scenario where some TiKV nodes are offline and the remaining active TiKV nodes lag behind in Raft logs, only Regions on this TiKV node with large election timeout ticks can become Leaders. Because Regions on this TiKV node must wait for at least the duration set by `raftstore.raft-min-election-timeout-ticks' before initiating an election, it is recommended to avoid setting these values excessively large to prevent potential impact on the cluster availability in such scenarios. + +- Configure scheduling. After the cluster is enabled, use the `tiup ctl:v{CLUSTER_VERSION} pd` tool to modify the scheduling policy. Modify the number of TiKV Raft replicas. Configure this number as planned. In this example, the number of replicas is five. ```bash config set max-replicas 5