diff --git a/.gitignore b/.gitignore index 6345f4f4e30..4c2ce9d9eba 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ default.pd *.swp .DS_Store tags -/_tools/ +/.retools/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 962af605e91..bf8a5b1df10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # PD Change Log +## v2.1.0-rc2 +### Features +* Support the `GetAllStores` interface +* Add the statistics of scheduling estimation in Simulator +### Improvements +* Optimize the handling process of down stores to make up replicas as soon as possible +* Optimize the start of Coordinator to reduce the unnecessary scheduling caused by restarting PD +* Optimize the memory usage to reduce the overhead caused by heartbeats +* Optimize error handling and improve the log information +* Support querying the Region information of a specific store in pd-ctl +* Support querying the topN Region information based on version +* Support more accurate TSO decoding in pd-ctl +### Bug fix +* Fix the issue that pd-ctl uses the `hot store` command to exit wrongly + ## v2.1.0-rc1 ### Features * Introduce the version control mechanism and support rolling update of the cluster with compatibility diff --git a/Makefile b/Makefile index d57e3b4f415..672386e1391 100644 --- a/Makefile +++ b/Makefile @@ -7,9 +7,10 @@ BASIC_TEST_PKGS := $(filter-out github.com/pingcap/pd/pkg/integration_test,$(TES PACKAGES := go list ./... PACKAGE_DIRECTORIES := $(PACKAGES) | sed 's|github.com/pingcap/pd/||' GOCHECKER := awk '{ print } END { if (NR > 0) { exit 1 } }' +RETOOL:= ./hack/retool -GOFAIL_ENABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs retool do gofail enable) -GOFAIL_DISABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs retool do gofail disable) +GOFAIL_ENABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs ./hack/retool do gofail enable) +GOFAIL_DISABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs ./hack/retool do gofail disable) LDFLAGS += -X "$(PD_PKG)/server.PDReleaseVersion=$(shell git describe --tags --dirty)" LDFLAGS += -X "$(PD_PKG)/server.PDBuildTS=$(shell date -u '+%Y-%m-%d %I:%M:%S')" @@ -35,9 +36,9 @@ ifeq ("$(WITH_RACE)", "1") else CGO_ENABLED=0 go build -ldflags '$(LDFLAGS)' -o bin/pd-server cmd/pd-server/main.go endif - CGO_ENABLED=0 go build -ldflags '$(LDFLAGS)' -o bin/pd-ctl cmd/pd-ctl/main.go - CGO_ENABLED=0 go build -o bin/pd-tso-bench cmd/pd-tso-bench/main.go - CGO_ENABLED=0 go build -o bin/pd-recover cmd/pd-recover/main.go + CGO_ENABLED=0 go build -ldflags '$(LDFLAGS)' -o bin/pd-ctl tools/pd-ctl/main.go + CGO_ENABLED=0 go build -o bin/pd-tso-bench tools/pd-tso-bench/main.go + CGO_ENABLED=0 go build -o bin/pd-recover tools/pd-recover/main.go test: retool-setup # testing.. @@ -52,26 +53,26 @@ basic_test: # These need to be fixed before they can be ran regularly check-fail: - CGO_ENABLED=0 retool do gometalinter.v2 --disable-all \ + CGO_ENABLED=0 ./hack/retool do gometalinter.v2 --disable-all \ --enable errcheck \ $$($(PACKAGE_DIRECTORIES)) - CGO_ENABLED=0 retool do gosec $$($(PACKAGE_DIRECTORIES)) + CGO_ENABLED=0 ./hack/retool do gosec $$($(PACKAGE_DIRECTORIES)) check-all: static lint @echo "checking" retool-setup: @which retool >/dev/null 2>&1 || go get github.com/twitchtv/retool - @retool sync + @./hack/retool sync check: retool-setup check-all static: @ # Not running vet and fmt through metalinter becauase it ends up looking at vendor gofmt -s -l $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) - retool do govet --shadow $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) + ./hack/retool do govet --shadow $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) - CGO_ENABLED=0 retool do gometalinter.v2 --disable-all --deadline 120s \ + CGO_ENABLED=0 ./hack/retool do gometalinter.v2 --disable-all --deadline 120s \ --enable misspell \ --enable megacheck \ --enable ineffassign \ @@ -79,7 +80,7 @@ static: lint: @echo "linting" - CGO_ENABLED=0 retool do revive -formatter friendly -config revive.toml $$($(PACKAGES)) + CGO_ENABLED=0 ./hack/retool do revive -formatter friendly -config revive.toml $$($(PACKAGES)) travis_coverage: ifeq ("$(TRAVIS_COVERAGE)", "1") @@ -100,8 +101,7 @@ endif bash ./hack/clean_vendor.sh simulator: - CGO_ENABLED=0 go build -o bin/simulator cmd/simulator/main.go - bin/simulator + CGO_ENABLED=0 go build -o bin/pd-simulator tools/pd-simulator/main.go gofail-enable: # Converting gofail failpoints... diff --git a/pd-client/client.go b/client/client.go similarity index 100% rename from pd-client/client.go rename to client/client.go diff --git a/pd-client/client_test.go b/client/client_test.go similarity index 100% rename from pd-client/client_test.go rename to client/client_test.go diff --git a/pd-client/metrics.go b/client/metrics.go similarity index 100% rename from pd-client/metrics.go rename to client/metrics.go diff --git a/cmd/README.md b/cmd/README.md deleted file mode 100644 index 47c2fecd124..00000000000 --- a/cmd/README.md +++ /dev/null @@ -1,4 +0,0 @@ -## cmd - -This directory is meant to enforce vendoring for pd binaries without polluting -the pd client libraries with vendored dependencies. diff --git a/conf/simconfig.toml b/conf/simconfig.toml new file mode 100644 index 00000000000..7b91e3916f4 --- /dev/null +++ b/conf/simconfig.toml @@ -0,0 +1,17 @@ +# PD Simulator Configuration + +[tick] +# the tick interval when starting PD inside (default: "100ms") +sim-tick-interval = "100ms" +# the tick interval when connecting with an external PD (default: "1s") +norm-tick-interval = "1s" + +[store] +# the capacity size of a new store in GB (default: 1024) +store-capacity = 1024 +# the available size of a new store in GB (default: 1024) +store-available = 1024 +# the io rate of a new store in MB/s (default: 40) +store-io-per-second = 40 +# the version of a new store (default: "2.1.0") +store-version = "2.1.0" diff --git a/hack/retool b/hack/retool new file mode 100755 index 00000000000..de9950a7004 --- /dev/null +++ b/hack/retool @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# +# Add standard retool options +set -euo pipefail + + +cd $(dirname "$0")/.. +exec retool -tool-dir "$PWD/.retools" "$@" diff --git a/hack/retool-install.sh b/hack/retool-install.sh index ba3fbca887b..0ca3f0ca97c 100755 --- a/hack/retool-install.sh +++ b/hack/retool-install.sh @@ -3,21 +3,23 @@ set -euo pipefail # This script generates tools.json # It helps record what releases/branches are being used + +cd $(dirname "$0")/.. which retool >/dev/null || go get github.com/twitchtv/retool # tool environment # check runner -retool add gopkg.in/alecthomas/gometalinter.v2 v2.0.5 +./hack/retool add gopkg.in/alecthomas/gometalinter.v2 v2.0.5 # check spelling -retool add github.com/client9/misspell/cmd/misspell v0.3.4 +./hack/retool add github.com/client9/misspell/cmd/misspell v0.3.4 # checks correctness -retool add github.com/gordonklaus/ineffassign 7bae11eba15a3285c75e388f77eb6357a2d73ee2 -retool add honnef.co/go/tools/cmd/megacheck master -retool add github.com/dnephin/govet 4a96d43e39d340b63daa8bc5576985aa599885f6 +./hack/retool add github.com/gordonklaus/ineffassign 7bae11eba15a3285c75e388f77eb6357a2d73ee2 +./hack/retool add honnef.co/go/tools/cmd/megacheck master +./hack/retool add github.com/dnephin/govet 4a96d43e39d340b63daa8bc5576985aa599885f6 # slow checks -retool add github.com/kisielk/errcheck v1.1.0 +./hack/retool add github.com/kisielk/errcheck v1.1.0 # linter -retool add github.com/mgechev/revive 7773f47324c2bf1c8f7a5500aff2b6c01d3ed73b -retool add github.com/securego/gosec/cmd/gosec 1.0.0 +./hack/retool add github.com/mgechev/revive 7773f47324c2bf1c8f7a5500aff2b6c01d3ed73b +./hack/retool add github.com/securego/gosec/cmd/gosec 1.0.0 # go fail -retool add github.com/etcd-io/gofail master +./hack/retool add github.com/etcd-io/gofail master diff --git a/pdctl/README.md b/pdctl/README.md deleted file mode 100644 index 7d5e7f7aaba..00000000000 --- a/pdctl/README.md +++ /dev/null @@ -1,103 +0,0 @@ -pdctl -======== - -pdctl is a command line tool for pd - -## Build -1. Make sure [*Go*](https://golang.org/) (version 1.5+) is installed. -2. Use `make` in pd root path. `pdctl` will build in `bin` directory. - -## Usage - -### Example -run: - - ./pd-ctl store -d -u 127.0.0.1:2379 -show all stores status. '-u' specify the pd address, it can be overwritten by setting the environment variable PD_ADDR. Such as `export PD_ADDR=127.0.0.1:2379` - -### Flags -#### --pd,-u -+ The pd address -+ default: http://127.0.0.1:2379 -+ env variable: PD_ADDR - -#### --detach,-d -+ Run pdctl without readline -+ default: false - -### Command -#### store [delete] -show the store status or delete a store - -##### example -``` ->> store -{ - "count": 3, - "stores": [...] -} ->> store 1 - ...... ->> store delete 1 - ...... -``` - -#### config [show | set \ \] -show or set the balance config -##### example -``` ->> config show -{ - "min-region-count": 10, - "min-leader-count": 10, - "max-snapshot-count": 3, - "min-balance-diff-ratio": 0.01, - "max-store-down-duration": "30m0s", - "leader-schedule-limit": 8, - "leader-schedule-interval": "10s", - "storage-schedule-limit": 4, - "storage-schedule-interval": "30s" -} ->> config set leader-schedule-interval 20s -Success! -``` - -#### Member [leader | delete] -show the pd members status -##### example -``` ->> member -{ - "members": [......] -} ->> member leader -{ - "name": "pd", - "addr": "http://192.168.199.229:2379", - "id": 9724873857558226554 -} ->> member delete name pd2 -Success! -``` - -#### Region -show one or all regions status -##### Example -``` ->> region -{ - "count": 1, - "regions": [......] -} - ->> region 2 -{ - "region": { - "id": 2, - ...... - } - "leader": { - ...... - } -} -``` diff --git a/pkg/faketikv/cluster.go b/pkg/faketikv/cluster.go deleted file mode 100644 index 87a4428872f..00000000000 --- a/pkg/faketikv/cluster.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2017 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package faketikv - -import ( - "context" - - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/pingcap/pd/pkg/faketikv/cases" - "github.com/pingcap/pd/server/core" - "github.com/pkg/errors" -) - -// ClusterInfo records all cluster information. -type ClusterInfo struct { - conf *cases.Conf - Nodes map[uint64]*Node -} - -// NewClusterInfo creates the initialized cluster with config. -func NewClusterInfo(pdAddr string, conf *cases.Conf) (*ClusterInfo, error) { - cluster := &ClusterInfo{ - conf: conf, - Nodes: make(map[uint64]*Node), - } - - for _, store := range conf.Stores { - node, err := NewNode(store, pdAddr) - if err != nil { - return nil, err - } - cluster.Nodes[store.ID] = node - } - - return cluster, nil -} - -// GetBootstrapInfo returns a valid bootstrap store and region. -func (c *ClusterInfo) GetBootstrapInfo(r *RaftEngine) (*metapb.Store, *metapb.Region, error) { - origin := r.RandRegion() - if origin == nil { - return nil, nil, errors.New("no region found for bootstrap") - } - region := origin.Clone( - core.WithStartKey([]byte("")), - core.WithEndKey([]byte("")), - core.SetRegionConfVer(1), - core.SetRegionVersion(1), - core.SetPeers([]*metapb.Peer{origin.GetLeader()}), - ) - if region.GetLeader() == nil { - return nil, nil, errors.New("bootstrap region has no leader") - } - store := c.Nodes[region.GetLeader().GetStoreId()] - if store == nil { - return nil, nil, errors.Errorf("bootstrap store %v not found", region.GetLeader().GetStoreId()) - } - return store.Store, region.GetMeta(), nil -} - -func (c *ClusterInfo) allocID(storeID uint64) (uint64, error) { - node, ok := c.Nodes[storeID] - if !ok { - return 0, errors.Errorf("node %d not found", storeID) - } - id, err := node.client.AllocID(context.Background()) - return id, errors.WithStack(err) -} diff --git a/pkg/faketikv/config.go b/pkg/faketikv/config.go deleted file mode 100644 index 2a76c787747..00000000000 --- a/pkg/faketikv/config.go +++ /dev/null @@ -1,5 +0,0 @@ -package faketikv - -// Config is the faketikv configuration. -type Config struct { -} diff --git a/pkg/faketikv/conn.go b/pkg/faketikv/conn.go deleted file mode 100644 index cbdbd240672..00000000000 --- a/pkg/faketikv/conn.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2018 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package faketikv - -// Conn records the informations of connection among nodes. -type Conn struct { - Nodes map[uint64]*Node -} - -// NewConn returns a conn. -func NewConn(nodes map[uint64]*Node) (*Conn, error) { - conn := &Conn{ - Nodes: nodes, - } - return conn, nil -} - -func (c *Conn) nodeHealth(storeID uint64) bool { - n, ok := c.Nodes[storeID] - if !ok { - return false - } - - return n.GetState() == Up -} diff --git a/pkg/faketikv/drive.go b/pkg/faketikv/drive.go deleted file mode 100644 index f49efce4774..00000000000 --- a/pkg/faketikv/drive.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2017 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package faketikv - -import ( - "context" - - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/pingcap/kvproto/pkg/pdpb" - "github.com/pingcap/pd/pkg/faketikv/cases" - "github.com/pingcap/pd/pkg/faketikv/simutil" - "github.com/pingcap/pd/server/core" - "github.com/pkg/errors" -) - -// Driver promotes the cluster status change. -type Driver struct { - addr string - confName string - conf *cases.Conf - clusterInfo *ClusterInfo - client Client - tickCount int64 - eventRunner *EventRunner - raftEngine *RaftEngine -} - -// NewDriver returns a driver. -func NewDriver(addr string, confName string) *Driver { - return &Driver{ - addr: addr, - confName: confName, - } -} - -// Prepare initializes cluster information, bootstraps cluster and starts nodes. -func (d *Driver) Prepare() error { - d.conf = cases.NewConf(d.confName) - if d.conf == nil { - return errors.Errorf("failed to create conf %s", d.confName) - } - - clusterInfo, err := NewClusterInfo(d.addr, d.conf) - if err != nil { - return err - } - d.clusterInfo = clusterInfo - - conn, err := NewConn(d.clusterInfo.Nodes) - if err != nil { - return err - } - - raftEngine, err := NewRaftEngine(d.conf, conn) - if err != nil { - return err - } - d.raftEngine = raftEngine - - for _, node := range d.clusterInfo.Nodes { - node.raftEngine = raftEngine - } - - // Bootstrap. - store, region, err := clusterInfo.GetBootstrapInfo(d.raftEngine) - if err != nil { - return err - } - d.client = clusterInfo.Nodes[store.GetId()].client - - ctx, cancel := context.WithTimeout(context.Background(), pdTimeout) - err = d.client.Bootstrap(ctx, store, region) - cancel() - if err != nil { - simutil.Logger.Fatal("bootstrapped error: ", err) - } else { - simutil.Logger.Debug("Bootstrap success") - } - - // Setup alloc id. - for { - id, err := d.client.AllocID(context.Background()) - if err != nil { - return errors.WithStack(err) - } - if id > d.conf.MaxID { - break - } - } - - for _, n := range d.clusterInfo.Nodes { - err := n.Start() - if err != nil { - return err - } - } - d.eventRunner = NewEventRunner(d.conf.Events) - return nil -} - -// Tick invokes nodes' Tick. -func (d *Driver) Tick() { - d.tickCount++ - d.raftEngine.stepRegions(d.clusterInfo) - d.eventRunner.Tick(d) - for _, n := range d.clusterInfo.Nodes { - n.reportRegionChange() - n.Tick() - } -} - -// Check checks if the simulation is completed. -func (d *Driver) Check() bool { - return d.conf.Checker(d.raftEngine.regionsInfo) -} - -// PrintStatistics prints the statistics of the scheduler. -func (d *Driver) PrintStatistics() { - d.raftEngine.schedulerStats.PrintStatistics() -} - -// Stop stops all nodes. -func (d *Driver) Stop() { - for _, n := range d.clusterInfo.Nodes { - n.Stop() - } -} - -// TickCount returns the simulation's tick count. -func (d *Driver) TickCount() int64 { - return d.tickCount -} - -// AddNode adds a new node. -func (d *Driver) AddNode(id uint64) { - if _, ok := d.clusterInfo.Nodes[id]; ok { - simutil.Logger.Infof("Node %d already existed", id) - return - } - s := &cases.Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: 1 * cases.TB, - Available: 1 * cases.TB, - Version: "2.1.0", - } - n, err := NewNode(s, d.addr) - if err != nil { - simutil.Logger.Errorf("Add node %d failed: %v", id, err) - return - } - d.clusterInfo.Nodes[id] = n - n.raftEngine = d.raftEngine - err = n.Start() - if err != nil { - simutil.Logger.Errorf("Start node %d failed: %v", id, err) - } -} - -// DeleteNode deletes a node. -func (d *Driver) DeleteNode(id uint64) { - node := d.clusterInfo.Nodes[id] - if node == nil { - simutil.Logger.Errorf("Node %d not existed", id) - return - } - delete(d.clusterInfo.Nodes, id) - node.Stop() - - regions := d.raftEngine.GetRegions() - for _, region := range regions { - storeIDs := region.GetStoreIds() - if _, ok := storeIDs[id]; ok { - downPeer := &pdpb.PeerStats{ - Peer: region.GetStorePeer(id), - DownSeconds: 24 * 60 * 60, - } - region = region.Clone(core.WithDownPeers(append(region.GetDownPeers(), downPeer))) - d.raftEngine.SetRegion(region) - } - } -} diff --git a/pkg/faketikv/event.go b/pkg/faketikv/event.go deleted file mode 100644 index 250cf7587e6..00000000000 --- a/pkg/faketikv/event.go +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2018 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package faketikv - -import ( - "github.com/pingcap/pd/pkg/faketikv/cases" - "github.com/pingcap/pd/pkg/faketikv/simutil" -) - -// Event that affect the status of the cluster -type Event interface { - Run(driver *Driver) bool -} - -// EventRunner includes all events -type EventRunner struct { - events []Event -} - -// NewEventRunner news a event runner -func NewEventRunner(events []cases.EventInner) *EventRunner { - er := &EventRunner{events: make([]Event, 0, len(events))} - for _, e := range events { - event := parserEvent(e) - if event != nil { - er.events = append(er.events, event) - } - } - return er -} - -func parserEvent(e cases.EventInner) Event { - switch v := e.(type) { - case *cases.WriteFlowOnSpotInner: - return &WriteFlowOnSpot{in: v} - case *cases.WriteFlowOnRegionInner: - return &WriteFlowOnRegion{in: v} - case *cases.ReadFlowOnRegionInner: - return &ReadFlowOnRegion{in: v} - case *cases.AddNodesDynamicInner: - return &AddNodesDynamic{in: v} - case *cases.DeleteNodesInner: - return &DeleteNodes{in: v} - } - return nil -} - -// Tick ticks the event run -func (er *EventRunner) Tick(driver *Driver) { - var finishedIndex int - for i, e := range er.events { - isFinished := e.Run(driver) - if isFinished { - er.events[i], er.events[finishedIndex] = er.events[finishedIndex], er.events[i] - finishedIndex++ - } - } - er.events = er.events[finishedIndex:] -} - -// WriteFlowOnSpot writes bytes in some range -type WriteFlowOnSpot struct { - in *cases.WriteFlowOnSpotInner -} - -// Run implements the event interface -func (w *WriteFlowOnSpot) Run(driver *Driver) bool { - raft := driver.raftEngine - res := w.in.Step(driver.tickCount) - for key, size := range res { - region := raft.SearchRegion([]byte(key)) - if region == nil { - simutil.Logger.Errorf("region not found for key %s", key) - continue - } - raft.updateRegionStore(region, size) - } - return false -} - -// WriteFlowOnRegion writes bytes in some region -type WriteFlowOnRegion struct { - in *cases.WriteFlowOnRegionInner -} - -// Run implements the event interface -func (w *WriteFlowOnRegion) Run(driver *Driver) bool { - raft := driver.raftEngine - res := w.in.Step(driver.tickCount) - for id, bytes := range res { - region := raft.GetRegion(id) - if region == nil { - simutil.Logger.Errorf("region %d not found", id) - continue - } - raft.updateRegionStore(region, bytes) - } - return false -} - -// ReadFlowOnRegion reads bytes in some region -type ReadFlowOnRegion struct { - in *cases.ReadFlowOnRegionInner -} - -// Run implements the event interface -func (w *ReadFlowOnRegion) Run(driver *Driver) bool { - res := w.in.Step(driver.tickCount) - driver.raftEngine.updateRegionReadBytes(res) - return false -} - -// AddNodesDynamic adds nodes dynamically. -type AddNodesDynamic struct { - in *cases.AddNodesDynamicInner -} - -// Run implements the event interface. -func (w *AddNodesDynamic) Run(driver *Driver) bool { - res := w.in.Step(driver.tickCount) - if res == 0 { - return false - } - driver.AddNode(res) - return false -} - -// DeleteNodes deletes nodes randomly -type DeleteNodes struct { - in *cases.DeleteNodesInner -} - -// Run implements the event interface -func (w *DeleteNodes) Run(driver *Driver) bool { - res := w.in.Step(driver.tickCount) - if res == 0 { - return false - } - driver.DeleteNode(res) - return false -} diff --git a/pkg/integration_test/client_test.go b/pkg/integration_test/client_test.go index 3e4ffe12a81..a40e4316e24 100644 --- a/pkg/integration_test/client_test.go +++ b/pkg/integration_test/client_test.go @@ -23,7 +23,7 @@ import ( "github.com/coreos/etcd/clientv3" . "github.com/pingcap/check" - pd "github.com/pingcap/pd/pd-client" + pd "github.com/pingcap/pd/client" "github.com/pingcap/pd/pkg/testutil" ) diff --git a/server/config.go b/server/config.go index a2d3c19fda1..b36d966fe26 100644 --- a/server/config.go +++ b/server/config.go @@ -463,6 +463,7 @@ func (c *ScheduleConfig) clone() *ScheduleConfig { DisableMakeUpReplica: c.DisableMakeUpReplica, DisableRemoveExtraReplica: c.DisableRemoveExtraReplica, DisableLocationReplacement: c.DisableLocationReplacement, + DisableNamespaceRelocation: c.DisableNamespaceRelocation, Schedulers: schedulers, } } diff --git a/server/handler.go b/server/handler.go index eeea095f92a..b65d1f0d32c 100644 --- a/server/handler.go +++ b/server/handler.go @@ -115,22 +115,38 @@ func (h *Handler) GetHotReadRegions() *core.StoreHotRegionInfos { // GetHotBytesWriteStores gets all hot write stores stats. func (h *Handler) GetHotBytesWriteStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresBytesWriteStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresBytesWriteStat() } // GetHotBytesReadStores gets all hot write stores stats. func (h *Handler) GetHotBytesReadStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresBytesReadStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresBytesReadStat() } // GetHotKeysWriteStores gets all hot write stores stats. func (h *Handler) GetHotKeysWriteStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresKeysWriteStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresKeysWriteStat() } // GetHotKeysReadStores gets all hot write stores stats. func (h *Handler) GetHotKeysReadStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresKeysReadStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresKeysReadStat() } // AddScheduler adds a scheduler. diff --git a/server/schedulers/adjacent_region.go b/server/schedulers/adjacent_region.go index 2137b71972d..6148e9388d2 100644 --- a/server/schedulers/adjacent_region.go +++ b/server/schedulers/adjacent_region.go @@ -144,8 +144,8 @@ func (l *balanceAdjacentRegionScheduler) Schedule(cluster schedule.Cluster, opIn regions := cluster.ScanRegions(l.lastKey, scanLimit) // scan to the end if len(regions) <= 1 { - l.adjacentRegionsCount = 0 schedulerStatus.WithLabelValues(l.GetName(), "adjacent_count").Set(float64(l.adjacentRegionsCount)) + l.adjacentRegionsCount = 0 l.lastKey = []byte("") return nil } @@ -199,7 +199,9 @@ func (l *balanceAdjacentRegionScheduler) process(cluster schedule.Cluster) []*sc l.cacheRegions.head = head + 1 l.lastKey = r2.GetStartKey() }() - if l.unsafeToBalance(cluster, r1) { + // after the cluster is prepared, there is a gap that some regions heartbeats are not received. + // Leader of those region is nil, and we should skip them. + if r1.GetLeader() == nil || r2.GetLeader() == nil || l.unsafeToBalance(cluster, r1) { schedulerCounter.WithLabelValues(l.GetName(), "skip").Inc() return nil } @@ -221,6 +223,9 @@ func (l *balanceAdjacentRegionScheduler) unsafeToBalance(cluster schedule.Cluste return true } store := cluster.GetStore(region.GetLeader().GetStoreId()) + if store == nil { + return true + } s := l.selector.SelectSource(cluster, []*core.StoreInfo{store}) if s == nil { return true diff --git a/server/testutil.go b/server/testutil.go index 1b4cc7af41d..8297ec0dc4b 100644 --- a/server/testutil.go +++ b/server/testutil.go @@ -57,7 +57,7 @@ func NewTestServer() (*Config, *Server, CleanupFunc, error) { } // NewTestSingleConfig is only for test to create one pd. -// Because pd-client also needs this, so export here. +// Because PD client also needs this, so export here. func NewTestSingleConfig() *Config { cfg := &Config{ Name: "pd", @@ -93,7 +93,7 @@ func NewTestSingleConfig() *Config { } // NewTestMultiConfig is only for test to create multiple pd configurations. -// Because pd-client also needs this, so export here. +// Because PD client also needs this, so export here. func NewTestMultiConfig(count int) []*Config { cfgs := make([]*Config, count) diff --git a/tools/pd-ctl/README.md b/tools/pd-ctl/README.md new file mode 100644 index 00000000000..690d8c9f017 --- /dev/null +++ b/tools/pd-ctl/README.md @@ -0,0 +1,686 @@ +pd-ctl +======== + +pd-ctl is a command line tool for PD, pd-ctl obtains the state information of the cluster and tunes the cluster. + +## Build +1. [Go](https://golang.org/) Version 1.9 or later +2. In the root directory of the [PD project](https://github.com/pingcap/pd), use the `make` command to compile and generate `bin/pd-ctl` + +> **Note:** Generally, you don't need to compile source code as the PD Control tool already exists in the released Binary or Docker. However, dev users can refer to the above instruction for compiling source code. + +## Usage + +Single-command mode: + + ./pd-ctl store -d -u http://127.0.0.1:2379 + +Interactive mode: + + ./pd-ctl -u http://127.0.0.1:2379 + +Use environment variables: + +```bash +export PD_ADDR=http://127.0.0.1:2379 +./pd-ctl +``` + +Use TLS to encrypt: + +```bash +./pd-ctl -u https://127.0.0.1:2379 --cacert="path/to/ca" --cert="path/to/cert" --key="path/to/key" +``` + +## Command line flags + +### \-\-pd,-u + ++ PD address ++ Default address: http://127.0.0.1:2379 ++ Enviroment variable: PD_ADDR + +### \-\-detach,-d + ++ Use single command line mode (not entering readline) ++ Default: false + +### --cacert + ++ Specify the path to the certificate file of the trusted CA in PEM format ++ Default: "" + +### --cert + ++ Specify the path to the certificate of SSL in PEM format ++ Default: "" + +### --key + ++ Specify the path to the certificate key file of SSL in PEM format, which is the private key of the certificate specified by `--cert` ++ Default: "" + +### --version,-V + ++ Print the version information and exit ++ Default: false + +## Command + +### `cluster` + +Use this command to view the basic information of the cluster. + +Usage: + +```bash +>> cluster // To show the cluster information +{ + "id": 6493707687106161130, + "max_peer_count": 3 +} +``` + +### `config [show | set