From 65dc1d1e4e3845639481236eac289a7aa02c17f9 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Wed, 12 Sep 2018 15:05:47 +0800 Subject: [PATCH 1/7] simulator: small refactor about the current simulator (#1239) --- Makefile | 1 - cmd/simulator/main.go | 8 +- pkg/faketikv/cases/add_nodes_dynamic.go | 2 +- pkg/faketikv/cases/cases.go | 14 +-- pkg/faketikv/cases/event_inner.go | 28 ++--- pkg/faketikv/cluster.go | 79 ------------- pkg/faketikv/conn.go | 34 ++++-- pkg/faketikv/drive.go | 148 ++++++++++-------------- pkg/faketikv/event.go | 120 +++++++++++++------ pkg/faketikv/node.go | 31 ++--- pkg/faketikv/raft.go | 63 +++++++--- 11 files changed, 244 insertions(+), 284 deletions(-) delete mode 100644 pkg/faketikv/cluster.go diff --git a/Makefile b/Makefile index d57e3b4f415..823be966b1e 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,6 @@ endif simulator: CGO_ENABLED=0 go build -o bin/simulator cmd/simulator/main.go - bin/simulator gofail-enable: # Converting gofail failpoints... diff --git a/cmd/simulator/main.go b/cmd/simulator/main.go index d2806d39676..517a57985d3 100644 --- a/cmd/simulator/main.go +++ b/cmd/simulator/main.go @@ -138,8 +138,12 @@ func initRaftLogger() { func simStart(pdAddr string, confName string, tickInterval time.Duration, clean ...server.CleanupFunc) { start := time.Now() - driver := faketikv.NewDriver(pdAddr, confName) - err := driver.Prepare() + driver, err := faketikv.NewDriver(pdAddr, confName) + if err != nil { + simutil.Logger.Fatal("create driver error:", err) + } + + err = driver.Prepare() if err != nil { simutil.Logger.Fatal("simulator prepare error:", err) } diff --git a/pkg/faketikv/cases/add_nodes_dynamic.go b/pkg/faketikv/cases/add_nodes_dynamic.go index 0831b777c7f..a79bfe2b643 100644 --- a/pkg/faketikv/cases/add_nodes_dynamic.go +++ b/pkg/faketikv/cases/add_nodes_dynamic.go @@ -54,7 +54,7 @@ func newAddNodesDynamic() *Conf { conf.MaxID = id.maxID numNodes := 8 - e := &AddNodesDynamicInner{} + e := &AddNodesInner{} e.Step = func(tick int64) uint64 { if tick%100 == 0 && numNodes < 16 { numNodes++ diff --git a/pkg/faketikv/cases/cases.go b/pkg/faketikv/cases/cases.go index aad331d2f73..e179081bd5c 100644 --- a/pkg/faketikv/cases/cases.go +++ b/pkg/faketikv/cases/cases.go @@ -22,7 +22,7 @@ import ( type Store struct { ID uint64 Status metapb.StoreState - Labels []metapb.StoreLabel + Labels []*metapb.StoreLabel Capacity uint64 Available uint64 LeaderWeight float32 @@ -92,15 +92,3 @@ func NewConf(name string) *Conf { } return nil } - -// NeedSplit checks whether the region need to split according it's size -// and number of keys. -func (c *Conf) NeedSplit(size, rows int64) bool { - if c.RegionSplitSize != 0 && size >= c.RegionSplitSize { - return true - } - if c.RegionSplitKeys != 0 && rows >= c.RegionSplitKeys { - return true - } - return false -} diff --git a/pkg/faketikv/cases/event_inner.go b/pkg/faketikv/cases/event_inner.go index 7eee4af31e2..5005023563e 100644 --- a/pkg/faketikv/cases/event_inner.go +++ b/pkg/faketikv/cases/event_inner.go @@ -13,57 +13,57 @@ package cases -// EventInner is a detail template for custom events +// EventInner is a detail template for custom events. type EventInner interface { Type() string } -// WriteFlowOnSpotInner writes bytes in some range +// WriteFlowOnSpotInner writes bytes in some range. type WriteFlowOnSpotInner struct { Step func(tick int64) map[string]int64 } -// Type implements the EventInner interface +// Type implements the EventInner interface. func (w *WriteFlowOnSpotInner) Type() string { return "write-flow-on-spot" } -// WriteFlowOnRegionInner writes bytes in some region +// WriteFlowOnRegionInner writes bytes in some region. type WriteFlowOnRegionInner struct { Step func(tick int64) map[uint64]int64 } -// Type implements the EventInner interface +// Type implements the EventInner interface. func (w *WriteFlowOnRegionInner) Type() string { return "write-flow-on-region" } -// ReadFlowOnRegionInner reads bytes in some region +// ReadFlowOnRegionInner reads bytes in some region. type ReadFlowOnRegionInner struct { Step func(tick int64) map[uint64]int64 } -// Type implements the EventInner interface +// Type implements the EventInner interface. func (w *ReadFlowOnRegionInner) Type() string { return "read-flow-on-region" } -// AddNodesDynamicInner adds nodes dynamically -type AddNodesDynamicInner struct { +// AddNodesInner adds nodes. +type AddNodesInner struct { Step func(tick int64) uint64 } -// Type implements the EventInner interface -func (w *AddNodesDynamicInner) Type() string { - return "add-nodes-dynamic" +// Type implements the EventInner interface. +func (w *AddNodesInner) Type() string { + return "add-nodes" } -// DeleteNodesInner removes nodes randomly. +// DeleteNodesInner removes nodes. type DeleteNodesInner struct { Step func(tick int64) uint64 } -// Type implements the EventInner interface +// Type implements the EventInner interface. func (w *DeleteNodesInner) Type() string { return "delete-nodes" } diff --git a/pkg/faketikv/cluster.go b/pkg/faketikv/cluster.go deleted file mode 100644 index 87a4428872f..00000000000 --- a/pkg/faketikv/cluster.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2017 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package faketikv - -import ( - "context" - - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/pingcap/pd/pkg/faketikv/cases" - "github.com/pingcap/pd/server/core" - "github.com/pkg/errors" -) - -// ClusterInfo records all cluster information. -type ClusterInfo struct { - conf *cases.Conf - Nodes map[uint64]*Node -} - -// NewClusterInfo creates the initialized cluster with config. -func NewClusterInfo(pdAddr string, conf *cases.Conf) (*ClusterInfo, error) { - cluster := &ClusterInfo{ - conf: conf, - Nodes: make(map[uint64]*Node), - } - - for _, store := range conf.Stores { - node, err := NewNode(store, pdAddr) - if err != nil { - return nil, err - } - cluster.Nodes[store.ID] = node - } - - return cluster, nil -} - -// GetBootstrapInfo returns a valid bootstrap store and region. -func (c *ClusterInfo) GetBootstrapInfo(r *RaftEngine) (*metapb.Store, *metapb.Region, error) { - origin := r.RandRegion() - if origin == nil { - return nil, nil, errors.New("no region found for bootstrap") - } - region := origin.Clone( - core.WithStartKey([]byte("")), - core.WithEndKey([]byte("")), - core.SetRegionConfVer(1), - core.SetRegionVersion(1), - core.SetPeers([]*metapb.Peer{origin.GetLeader()}), - ) - if region.GetLeader() == nil { - return nil, nil, errors.New("bootstrap region has no leader") - } - store := c.Nodes[region.GetLeader().GetStoreId()] - if store == nil { - return nil, nil, errors.Errorf("bootstrap store %v not found", region.GetLeader().GetStoreId()) - } - return store.Store, region.GetMeta(), nil -} - -func (c *ClusterInfo) allocID(storeID uint64) (uint64, error) { - node, ok := c.Nodes[storeID] - if !ok { - return 0, errors.Errorf("node %d not found", storeID) - } - id, err := node.client.AllocID(context.Background()) - return id, errors.WithStack(err) -} diff --git a/pkg/faketikv/conn.go b/pkg/faketikv/conn.go index cbdbd240672..8afab055388 100644 --- a/pkg/faketikv/conn.go +++ b/pkg/faketikv/conn.go @@ -13,24 +13,40 @@ package faketikv -// Conn records the informations of connection among nodes. -type Conn struct { - Nodes map[uint64]*Node +import ( + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/pd/pkg/faketikv/cases" +) + +// Connection records the informations of connection among nodes. +type Connection struct { + pdAddr string + Nodes map[uint64]*Node } -// NewConn returns a conn. -func NewConn(nodes map[uint64]*Node) (*Conn, error) { - conn := &Conn{ - Nodes: nodes, +// NewConnection creates nodes according to the configuration and returns the connection among nodes. +func NewConnection(conf *cases.Conf, pdAddr string) (*Connection, error) { + conn := &Connection{ + pdAddr: pdAddr, + Nodes: make(map[uint64]*Node), + } + + for _, store := range conf.Stores { + node, err := NewNode(store, pdAddr) + if err != nil { + return nil, err + } + conn.Nodes[store.ID] = node } + return conn, nil } -func (c *Conn) nodeHealth(storeID uint64) bool { +func (c *Connection) nodeHealth(storeID uint64) bool { n, ok := c.Nodes[storeID] if !ok { return false } - return n.GetState() == Up + return n.GetState() == metapb.StoreState_Up } diff --git a/pkg/faketikv/drive.go b/pkg/faketikv/drive.go index f49efce4774..7d42e29c7be 100644 --- a/pkg/faketikv/drive.go +++ b/pkg/faketikv/drive.go @@ -17,7 +17,6 @@ import ( "context" "github.com/pingcap/kvproto/pkg/metapb" - "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/pd/pkg/faketikv/cases" "github.com/pingcap/pd/pkg/faketikv/simutil" "github.com/pingcap/pd/server/core" @@ -26,58 +25,44 @@ import ( // Driver promotes the cluster status change. type Driver struct { - addr string - confName string + pdAddr string conf *cases.Conf - clusterInfo *ClusterInfo client Client tickCount int64 eventRunner *EventRunner raftEngine *RaftEngine + conn *Connection } // NewDriver returns a driver. -func NewDriver(addr string, confName string) *Driver { - return &Driver{ - addr: addr, - confName: confName, +func NewDriver(pdAddr string, confName string) (*Driver, error) { + conf := cases.NewConf(confName) + if conf == nil { + return nil, errors.Errorf("failed to create conf %s", confName) } + return &Driver{ + pdAddr: pdAddr, + conf: conf, + }, nil } // Prepare initializes cluster information, bootstraps cluster and starts nodes. func (d *Driver) Prepare() error { - d.conf = cases.NewConf(d.confName) - if d.conf == nil { - return errors.Errorf("failed to create conf %s", d.confName) - } - - clusterInfo, err := NewClusterInfo(d.addr, d.conf) - if err != nil { - return err - } - d.clusterInfo = clusterInfo - - conn, err := NewConn(d.clusterInfo.Nodes) - if err != nil { - return err - } - - raftEngine, err := NewRaftEngine(d.conf, conn) + conn, err := NewConnection(d.conf, d.pdAddr) if err != nil { return err } - d.raftEngine = raftEngine + d.conn = conn - for _, node := range d.clusterInfo.Nodes { - node.raftEngine = raftEngine - } + d.raftEngine = NewRaftEngine(d.conf, d.conn) + d.eventRunner = NewEventRunner(d.conf.Events, d.raftEngine) // Bootstrap. - store, region, err := clusterInfo.GetBootstrapInfo(d.raftEngine) + store, region, err := d.GetBootstrapInfo(d.raftEngine) if err != nil { return err } - d.client = clusterInfo.Nodes[store.GetId()].client + d.client = d.conn.Nodes[store.GetId()].client ctx, cancel := context.WithTimeout(context.Background(), pdTimeout) err = d.client.Bootstrap(ctx, store, region) @@ -90,7 +75,8 @@ func (d *Driver) Prepare() error { // Setup alloc id. for { - id, err := d.client.AllocID(context.Background()) + var id uint64 + id, err = d.client.AllocID(context.Background()) if err != nil { return errors.WithStack(err) } @@ -99,22 +85,20 @@ func (d *Driver) Prepare() error { } } - for _, n := range d.clusterInfo.Nodes { - err := n.Start() - if err != nil { - return err - } + err = d.Start() + if err != nil { + return err } - d.eventRunner = NewEventRunner(d.conf.Events) + return nil } // Tick invokes nodes' Tick. func (d *Driver) Tick() { d.tickCount++ - d.raftEngine.stepRegions(d.clusterInfo) - d.eventRunner.Tick(d) - for _, n := range d.clusterInfo.Nodes { + d.raftEngine.stepRegions() + d.eventRunner.Tick(d.tickCount) + for _, n := range d.conn.Nodes { n.reportRegionChange() n.Tick() } @@ -130,9 +114,20 @@ func (d *Driver) PrintStatistics() { d.raftEngine.schedulerStats.PrintStatistics() } +// Start starts all nodes. +func (d *Driver) Start() error { + for _, n := range d.conn.Nodes { + err := n.Start() + if err != nil { + return err + } + } + return nil +} + // Stop stops all nodes. func (d *Driver) Stop() { - for _, n := range d.clusterInfo.Nodes { + for _, n := range d.conn.Nodes { n.Stop() } } @@ -142,52 +137,25 @@ func (d *Driver) TickCount() int64 { return d.tickCount } -// AddNode adds a new node. -func (d *Driver) AddNode(id uint64) { - if _, ok := d.clusterInfo.Nodes[id]; ok { - simutil.Logger.Infof("Node %d already existed", id) - return - } - s := &cases.Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: 1 * cases.TB, - Available: 1 * cases.TB, - Version: "2.1.0", - } - n, err := NewNode(s, d.addr) - if err != nil { - simutil.Logger.Errorf("Add node %d failed: %v", id, err) - return - } - d.clusterInfo.Nodes[id] = n - n.raftEngine = d.raftEngine - err = n.Start() - if err != nil { - simutil.Logger.Errorf("Start node %d failed: %v", id, err) - } -} - -// DeleteNode deletes a node. -func (d *Driver) DeleteNode(id uint64) { - node := d.clusterInfo.Nodes[id] - if node == nil { - simutil.Logger.Errorf("Node %d not existed", id) - return - } - delete(d.clusterInfo.Nodes, id) - node.Stop() - - regions := d.raftEngine.GetRegions() - for _, region := range regions { - storeIDs := region.GetStoreIds() - if _, ok := storeIDs[id]; ok { - downPeer := &pdpb.PeerStats{ - Peer: region.GetStorePeer(id), - DownSeconds: 24 * 60 * 60, - } - region = region.Clone(core.WithDownPeers(append(region.GetDownPeers(), downPeer))) - d.raftEngine.SetRegion(region) - } - } +// GetBootstrapInfo returns a valid bootstrap store and region. +func (d *Driver) GetBootstrapInfo(r *RaftEngine) (*metapb.Store, *metapb.Region, error) { + origin := r.RandRegion() + if origin == nil { + return nil, nil, errors.New("no region found for bootstrap") + } + region := origin.Clone( + core.WithStartKey([]byte("")), + core.WithEndKey([]byte("")), + core.SetRegionConfVer(1), + core.SetRegionVersion(1), + core.SetPeers([]*metapb.Peer{origin.GetLeader()}), + ) + if region.GetLeader() == nil { + return nil, nil, errors.New("bootstrap region has no leader") + } + store := d.conn.Nodes[region.GetLeader().GetStoreId()] + if store == nil { + return nil, nil, errors.Errorf("bootstrap store %v not found", region.GetLeader().GetStoreId()) + } + return store.Store, region.GetMeta(), nil } diff --git a/pkg/faketikv/event.go b/pkg/faketikv/event.go index 250cf7587e6..2349e1139f8 100644 --- a/pkg/faketikv/event.go +++ b/pkg/faketikv/event.go @@ -14,23 +14,27 @@ package faketikv import ( + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/pd/pkg/faketikv/cases" "github.com/pingcap/pd/pkg/faketikv/simutil" + "github.com/pingcap/pd/server/core" ) -// Event that affect the status of the cluster +// Event affects the status of the cluster. type Event interface { - Run(driver *Driver) bool + Run(raft *RaftEngine, tickCount int64) bool } -// EventRunner includes all events +// EventRunner includes all events. type EventRunner struct { - events []Event + events []Event + raftEngine *RaftEngine } -// NewEventRunner news a event runner -func NewEventRunner(events []cases.EventInner) *EventRunner { - er := &EventRunner{events: make([]Event, 0, len(events))} +// NewEventRunner creates an event runner. +func NewEventRunner(events []cases.EventInner, raftEngine *RaftEngine) *EventRunner { + er := &EventRunner{events: make([]Event, 0, len(events)), raftEngine: raftEngine} for _, e := range events { event := parserEvent(e) if event != nil { @@ -48,8 +52,8 @@ func parserEvent(e cases.EventInner) Event { return &WriteFlowOnRegion{in: v} case *cases.ReadFlowOnRegionInner: return &ReadFlowOnRegion{in: v} - case *cases.AddNodesDynamicInner: - return &AddNodesDynamic{in: v} + case *cases.AddNodesInner: + return &AddNodes{in: v} case *cases.DeleteNodesInner: return &DeleteNodes{in: v} } @@ -57,10 +61,10 @@ func parserEvent(e cases.EventInner) Event { } // Tick ticks the event run -func (er *EventRunner) Tick(driver *Driver) { +func (er *EventRunner) Tick(tickCount int64) { var finishedIndex int for i, e := range er.events { - isFinished := e.Run(driver) + isFinished := e.Run(er.raftEngine, tickCount) if isFinished { er.events[i], er.events[finishedIndex] = er.events[finishedIndex], er.events[i] finishedIndex++ @@ -69,15 +73,14 @@ func (er *EventRunner) Tick(driver *Driver) { er.events = er.events[finishedIndex:] } -// WriteFlowOnSpot writes bytes in some range +// WriteFlowOnSpot writes bytes in some range. type WriteFlowOnSpot struct { in *cases.WriteFlowOnSpotInner } -// Run implements the event interface -func (w *WriteFlowOnSpot) Run(driver *Driver) bool { - raft := driver.raftEngine - res := w.in.Step(driver.tickCount) +// Run implements the event interface. +func (w *WriteFlowOnSpot) Run(raft *RaftEngine, tickCount int64) bool { + res := w.in.Step(tickCount) for key, size := range res { region := raft.SearchRegion([]byte(key)) if region == nil { @@ -89,15 +92,14 @@ func (w *WriteFlowOnSpot) Run(driver *Driver) bool { return false } -// WriteFlowOnRegion writes bytes in some region +// WriteFlowOnRegion writes bytes in some region. type WriteFlowOnRegion struct { in *cases.WriteFlowOnRegionInner } -// Run implements the event interface -func (w *WriteFlowOnRegion) Run(driver *Driver) bool { - raft := driver.raftEngine - res := w.in.Step(driver.tickCount) +// Run implements the event interface. +func (w *WriteFlowOnRegion) Run(raft *RaftEngine, tickCount int64) bool { + res := w.in.Step(tickCount) for id, bytes := range res { region := raft.GetRegion(id) if region == nil { @@ -114,39 +116,81 @@ type ReadFlowOnRegion struct { in *cases.ReadFlowOnRegionInner } -// Run implements the event interface -func (w *ReadFlowOnRegion) Run(driver *Driver) bool { - res := w.in.Step(driver.tickCount) - driver.raftEngine.updateRegionReadBytes(res) +// Run implements the event interface. +func (w *ReadFlowOnRegion) Run(raft *RaftEngine, tickCount int64) bool { + res := w.in.Step(tickCount) + raft.updateRegionReadBytes(res) return false } -// AddNodesDynamic adds nodes dynamically. -type AddNodesDynamic struct { - in *cases.AddNodesDynamicInner +// AddNodes adds nodes. +type AddNodes struct { + in *cases.AddNodesInner } // Run implements the event interface. -func (w *AddNodesDynamic) Run(driver *Driver) bool { - res := w.in.Step(driver.tickCount) - if res == 0 { +func (w *AddNodes) Run(raft *RaftEngine, tickCount int64) bool { + id := w.in.Step(tickCount) + if id == 0 { + return false + } + + if _, ok := raft.conn.Nodes[id]; ok { + simutil.Logger.Infof("Node %d already existed", id) + return false + } + s := &cases.Store{ + ID: id, + Status: metapb.StoreState_Up, + Capacity: 1 * cases.TB, + Available: 1 * cases.TB, + Version: "2.1.0", + } + n, err := NewNode(s, raft.conn.pdAddr) + if err != nil { + simutil.Logger.Errorf("Add node %d failed: %v", id, err) return false } - driver.AddNode(res) + raft.conn.Nodes[id] = n + n.raftEngine = raft + err = n.Start() + if err != nil { + simutil.Logger.Errorf("Start node %d failed: %v", id, err) + } return false } -// DeleteNodes deletes nodes randomly +// DeleteNodes deletes nodes. type DeleteNodes struct { in *cases.DeleteNodesInner } -// Run implements the event interface -func (w *DeleteNodes) Run(driver *Driver) bool { - res := w.in.Step(driver.tickCount) - if res == 0 { +// Run implements the event interface. +func (w *DeleteNodes) Run(raft *RaftEngine, tickCount int64) bool { + id := w.in.Step(tickCount) + if id == 0 { + return false + } + + node := raft.conn.Nodes[id] + if node == nil { + simutil.Logger.Errorf("Node %d not existed", id) return false } - driver.DeleteNode(res) + delete(raft.conn.Nodes, id) + node.Stop() + + regions := raft.GetRegions() + for _, region := range regions { + storeIDs := region.GetStoreIds() + if _, ok := storeIDs[id]; ok { + downPeer := &pdpb.PeerStats{ + Peer: region.GetStorePeer(id), + DownSeconds: 24 * 60 * 60, + } + region = region.Clone(core.WithDownPeers(append(region.GetDownPeers(), downPeer))) + raft.SetRegion(region) + } + } return false } diff --git a/pkg/faketikv/node.go b/pkg/faketikv/node.go index 2d362d57491..750c43de233 100644 --- a/pkg/faketikv/node.go +++ b/pkg/faketikv/node.go @@ -25,17 +25,6 @@ import ( "github.com/pingcap/pd/pkg/faketikv/simutil" ) -// NodeState node's state. -type NodeState int - -// some state -const ( - Up NodeState = iota - Down - LossConnect - Block -) - const ( storeHeartBeatPeriod = 10 regionHeartBeatPeriod = 60 @@ -53,7 +42,6 @@ type Node struct { receiveRegionHeartbeatCh <-chan *pdpb.RegionHeartbeatResponse ctx context.Context cancel context.CancelFunc - state NodeState raftEngine *RaftEngine ioRate int64 } @@ -65,6 +53,8 @@ func NewNode(s *cases.Store, pdAddr string) (*Node, error) { Id: s.ID, Address: fmt.Sprintf("mock:://tikv-%d", s.ID), Version: s.Version, + Labels: s.Labels, + State: s.Status, } stats := &pdpb.StoreStats{ StoreId: s.ID, @@ -85,7 +75,6 @@ func NewNode(s *cases.Store, pdAddr string) (*Node, error) { ctx: ctx, cancel: cancel, tasks: make(map[uint64]Task), - state: Down, receiveRegionHeartbeatCh: receiveRegionHeartbeatCh, // FIXME: This value should be adjusted to a appropriate one. ioRate: 40 * 1000 * 1000, @@ -102,7 +91,7 @@ func (n *Node) Start() error { } n.wg.Add(1) go n.receiveRegionHeartbeat() - n.state = Up + n.Store.State = metapb.StoreState_Up return nil } @@ -123,7 +112,7 @@ func (n *Node) receiveRegionHeartbeat() { // Tick steps node status change. func (n *Node) Tick() { - if n.state != Up { + if n.GetState() != metapb.StoreState_Up { return } n.stepHeartBeat() @@ -132,8 +121,8 @@ func (n *Node) Tick() { } // GetState returns current node state. -func (n *Node) GetState() NodeState { - return n.state +func (n *Node) GetState() metapb.StoreState { + return n.Store.State } func (n *Node) stepTask() { @@ -158,7 +147,7 @@ func (n *Node) stepHeartBeat() { } func (n *Node) storeHeartBeat() { - if n.state != Up { + if n.GetState() != metapb.StoreState_Up { return } ctx, cancel := context.WithTimeout(n.ctx, pdTimeout) @@ -170,7 +159,7 @@ func (n *Node) storeHeartBeat() { } func (n *Node) regionHeartBeat() { - if n.state != Up { + if n.GetState() != metapb.StoreState_Up { return } regions := n.raftEngine.GetRegions() @@ -187,7 +176,7 @@ func (n *Node) regionHeartBeat() { } func (n *Node) reportRegionChange() { - for _, regionID := range n.raftEngine.regionchange[n.Id] { + for _, regionID := range n.raftEngine.regionChange[n.Id] { region := n.raftEngine.GetRegion(regionID) ctx, cancel := context.WithTimeout(n.ctx, pdTimeout) err := n.client.RegionHeartbeat(ctx, region) @@ -196,7 +185,7 @@ func (n *Node) reportRegionChange() { } cancel() } - delete(n.raftEngine.regionchange, n.Id) + delete(n.raftEngine.regionChange, n.Id) } // AddTask adds task in this node. diff --git a/pkg/faketikv/raft.go b/pkg/faketikv/raft.go index 250d07b0c39..970ed3ad4e3 100644 --- a/pkg/faketikv/raft.go +++ b/pkg/faketikv/raft.go @@ -14,6 +14,7 @@ package faketikv import ( + "context" "math/rand" "sort" "sync" @@ -22,24 +23,29 @@ import ( "github.com/pingcap/pd/pkg/faketikv/cases" "github.com/pingcap/pd/pkg/faketikv/simutil" "github.com/pingcap/pd/server/core" + "github.com/pkg/errors" ) // RaftEngine records all raft infomations. type RaftEngine struct { sync.RWMutex - regionsInfo *core.RegionsInfo - conn *Conn - regionchange map[uint64][]uint64 - schedulerStats *schedulerStatistics + regionsInfo *core.RegionsInfo + conn *Connection + regionChange map[uint64][]uint64 + schedulerStats *schedulerStatistics + regionSplitSize int64 + regionSplitKeys int64 } // NewRaftEngine creates the initialized raft with the configuration. -func NewRaftEngine(conf *cases.Conf, conn *Conn) (*RaftEngine, error) { +func NewRaftEngine(conf *cases.Conf, conn *Connection) *RaftEngine { r := &RaftEngine{ - regionsInfo: core.NewRegionsInfo(), - conn: conn, - regionchange: make(map[uint64][]uint64), - schedulerStats: newSchedulerStatistics(), + regionsInfo: core.NewRegionsInfo(), + conn: conn, + regionChange: make(map[uint64][]uint64), + schedulerStats: newSchedulerStatistics(), + regionSplitSize: conf.RegionSplitSize, + regionSplitKeys: conf.RegionSplitKeys, } splitKeys := generateKeys(len(conf.Regions) - 1) @@ -69,14 +75,18 @@ func NewRaftEngine(conf *cases.Conf, conn *Conn) (*RaftEngine, error) { } } - return r, nil + for _, node := range conn.Nodes { + node.raftEngine = r + } + + return r } -func (r *RaftEngine) stepRegions(c *ClusterInfo) { +func (r *RaftEngine) stepRegions() { regions := r.GetRegions() for _, region := range regions { r.stepLeader(region) - r.stepSplit(region, c) + r.stepSplit(region) } } @@ -96,17 +106,17 @@ func (r *RaftEngine) stepLeader(region *core.RegionInfo) { r.recordRegionChange(newRegion) } -func (r *RaftEngine) stepSplit(region *core.RegionInfo, c *ClusterInfo) { +func (r *RaftEngine) stepSplit(region *core.RegionInfo) { if region.GetLeader() == nil { return } - if !c.conf.NeedSplit(region.GetApproximateSize(), region.GetApproximateKeys()) { + if !r.NeedSplit(region.GetApproximateSize(), region.GetApproximateKeys()) { return } ids := make([]uint64, 1+len(region.GetPeers())) for i := range ids { var err error - ids[i], err = c.allocID(region.GetLeader().GetStoreId()) + ids[i], err = r.allocID(region.GetLeader().GetStoreId()) if err != nil { simutil.Logger.Infof("alloc id failed: %s", err) return @@ -137,9 +147,21 @@ func (r *RaftEngine) stepSplit(region *core.RegionInfo, c *ClusterInfo) { r.recordRegionChange(right) } +// NeedSplit checks whether the region needs to split according its size +// and number of keys. +func (r *RaftEngine) NeedSplit(size, rows int64) bool { + if r.regionSplitSize != 0 && size >= r.regionSplitSize { + return true + } + if r.regionSplitKeys != 0 && rows >= r.regionSplitKeys { + return true + } + return false +} + func (r *RaftEngine) recordRegionChange(region *core.RegionInfo) { n := region.GetLeader().GetStoreId() - r.regionchange[n] = append(r.regionchange[n], region.GetID()) + r.regionChange[n] = append(r.regionChange[n], region.GetID()) } func (r *RaftEngine) updateRegionStore(region *core.RegionInfo, size int64) { @@ -225,6 +247,15 @@ func (r *RaftEngine) RandRegion() *core.RegionInfo { return r.regionsInfo.RandRegion() } +func (r *RaftEngine) allocID(storeID uint64) (uint64, error) { + node, ok := r.conn.Nodes[storeID] + if !ok { + return 0, errors.Errorf("node %d not found", storeID) + } + id, err := node.client.AllocID(context.Background()) + return id, errors.WithStack(err) +} + const ( // 26^10 ~= 1.4e+14, should be enough. keyChars = "abcdefghijklmnopqrstuvwxyz" From 4a874031746f2c94fbe455a8c933c83ce1d7005e Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Wed, 12 Sep 2018 15:31:15 +0800 Subject: [PATCH 2/7] fix hot store command (#1244) --- server/handler.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/server/handler.go b/server/handler.go index eeea095f92a..b65d1f0d32c 100644 --- a/server/handler.go +++ b/server/handler.go @@ -115,22 +115,38 @@ func (h *Handler) GetHotReadRegions() *core.StoreHotRegionInfos { // GetHotBytesWriteStores gets all hot write stores stats. func (h *Handler) GetHotBytesWriteStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresBytesWriteStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresBytesWriteStat() } // GetHotBytesReadStores gets all hot write stores stats. func (h *Handler) GetHotBytesReadStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresBytesReadStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresBytesReadStat() } // GetHotKeysWriteStores gets all hot write stores stats. func (h *Handler) GetHotKeysWriteStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresKeysWriteStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresKeysWriteStat() } // GetHotKeysReadStores gets all hot write stores stats. func (h *Handler) GetHotKeysReadStores() map[uint64]uint64 { - return h.s.cluster.cachedCluster.getStoresKeysReadStat() + cluster := h.s.GetRaftCluster() + if cluster == nil { + return nil + } + return cluster.cachedCluster.getStoresKeysReadStat() } // AddScheduler adds a scheduler. From 6a7832d2d6e5b2923c79683183e63d030f954563 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 14 Sep 2018 18:29:57 +0800 Subject: [PATCH 3/7] CHANGLOG: update changelog for v2.1.0-rc2 (#1249) * CHANGLOG: update change log for v2.1.0-rc2 --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 962af605e91..bf8a5b1df10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # PD Change Log +## v2.1.0-rc2 +### Features +* Support the `GetAllStores` interface +* Add the statistics of scheduling estimation in Simulator +### Improvements +* Optimize the handling process of down stores to make up replicas as soon as possible +* Optimize the start of Coordinator to reduce the unnecessary scheduling caused by restarting PD +* Optimize the memory usage to reduce the overhead caused by heartbeats +* Optimize error handling and improve the log information +* Support querying the Region information of a specific store in pd-ctl +* Support querying the topN Region information based on version +* Support more accurate TSO decoding in pd-ctl +### Bug fix +* Fix the issue that pd-ctl uses the `hot store` command to exit wrongly + ## v2.1.0-rc1 ### Features * Introduce the version control mechanism and support rolling update of the cluster with compatibility From fa5f08c252a4c5025cdef5bf0f2bdab78d7c0a36 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 17 Sep 2018 18:04:18 +0800 Subject: [PATCH 4/7] simulator: make some parameters configurable (#1248) --- cmd/simulator/main.go | 49 ++++++++++++----- conf/simconfig.toml | 17 ++++++ pkg/faketikv/cases/add_nodes.go | 14 ++--- pkg/faketikv/cases/add_nodes_dynamic.go | 16 +++--- pkg/faketikv/cases/balance_leader.go | 14 ++--- pkg/faketikv/cases/cases.go | 18 +++---- pkg/faketikv/cases/delete_nodes.go | 18 +++---- pkg/faketikv/cases/hot_read.go | 18 +++---- pkg/faketikv/cases/hot_write.go | 18 +++---- pkg/faketikv/cases/makeup_down_replica.go | 16 +++--- pkg/faketikv/cases/region_merge.go | 14 ++--- pkg/faketikv/cases/region_split.go | 20 +++---- pkg/faketikv/config.go | 65 ++++++++++++++++++++++- pkg/faketikv/conn.go | 6 +-- pkg/faketikv/drive.go | 26 ++++----- pkg/faketikv/event.go | 30 ++++++----- pkg/faketikv/node.go | 5 +- pkg/faketikv/raft.go | 4 +- 18 files changed, 236 insertions(+), 132 deletions(-) create mode 100644 conf/simconfig.toml diff --git a/cmd/simulator/main.go b/cmd/simulator/main.go index 517a57985d3..23d248f833f 100644 --- a/cmd/simulator/main.go +++ b/cmd/simulator/main.go @@ -22,6 +22,7 @@ import ( "syscall" "time" + "github.com/BurntSushi/toml" etcdlogutil "github.com/coreos/etcd/pkg/logutil" "github.com/coreos/etcd/raft" "github.com/pingcap/pd/pkg/faketikv" @@ -42,7 +43,8 @@ import ( var ( pdAddr = flag.String("pd", "", "pd address") - confName = flag.String("conf", "", "config name") + configFile = flag.String("config", "", "config file") + caseName = flag.String("case", "", "case name") serverLogLevel = flag.String("serverLog", "fatal", "pd server log level.") simLogLevel = flag.String("simLog", "fatal", "simulator log level.") ) @@ -54,22 +56,29 @@ func main() { simutil.InitLogger(*simLogLevel) schedule.Simulating = true - if *confName == "" { + if *caseName == "" { if *pdAddr != "" { simutil.Logger.Fatal("need to specify one config name") } - for conf := range cases.ConfMap { - run(conf) + for simCase := range cases.CaseMap { + run(simCase) } } else { - run(*confName) + run(*caseName) } } -func run(confName string) { +func run(simCase string) { + simConfig := faketikv.NewSimConfig() + if *configFile != "" { + if _, err := toml.DecodeFile(*configFile, simConfig); err != nil { + simutil.Logger.Fatal(err) + } + } + simConfig.Adjust() + if *pdAddr != "" { - tickInterval := 1 * time.Second - simStart(*pdAddr, confName, tickInterval) + simStart(*pdAddr, simCase, simConfig) } else { _, local, clean := NewSingleServer() err := local.Run(context.Background()) @@ -82,8 +91,7 @@ func run(confName string) { } time.Sleep(100 * time.Millisecond) } - tickInterval := 100 * time.Millisecond - simStart(local.GetAddr(), confName, tickInterval, clean) + simStart(local.GetAddr(), simCase, simConfig, clean) } } @@ -136,9 +144,9 @@ func initRaftLogger() { raft.SetLogger(lg) } -func simStart(pdAddr string, confName string, tickInterval time.Duration, clean ...server.CleanupFunc) { +func simStart(pdAddr string, simCase string, simConfig *faketikv.SimConfig, clean ...server.CleanupFunc) { start := time.Now() - driver, err := faketikv.NewDriver(pdAddr, confName) + driver, err := faketikv.NewDriver(pdAddr, simCase, simConfig) if err != nil { simutil.Logger.Fatal("create driver error:", err) } @@ -147,6 +155,19 @@ func simStart(pdAddr string, confName string, tickInterval time.Duration, clean if err != nil { simutil.Logger.Fatal("simulator prepare error:", err) } + + var runInternal bool + if len(clean) != 0 { + runInternal = true + } + + var tickInterval time.Duration + if runInternal { + tickInterval = simConfig.SimTickInterval.Duration + } else { + tickInterval = simConfig.NormTickInterval.Duration + } + tick := time.NewTicker(tickInterval) defer tick.Stop() sc := make(chan os.Signal, 1) @@ -173,11 +194,11 @@ EXIT: } driver.Stop() - if len(clean) != 0 { + if runInternal { clean[0]() } - fmt.Printf("%s [%s] total iteration: %d, time cost: %v\n", simResult, confName, driver.TickCount(), time.Since(start)) + fmt.Printf("%s [%s] total iteration: %d, time cost: %v\n", simResult, simCase, driver.TickCount(), time.Since(start)) driver.PrintStatistics() if simResult != "OK" { diff --git a/conf/simconfig.toml b/conf/simconfig.toml new file mode 100644 index 00000000000..7b91e3916f4 --- /dev/null +++ b/conf/simconfig.toml @@ -0,0 +1,17 @@ +# PD Simulator Configuration + +[tick] +# the tick interval when starting PD inside (default: "100ms") +sim-tick-interval = "100ms" +# the tick interval when connecting with an external PD (default: "1s") +norm-tick-interval = "1s" + +[store] +# the capacity size of a new store in GB (default: 1024) +store-capacity = 1024 +# the available size of a new store in GB (default: 1024) +store-available = 1024 +# the io rate of a new store in MB/s (default: 40) +store-io-per-second = 40 +# the version of a new store (default: "2.1.0") +store-version = "2.1.0" diff --git a/pkg/faketikv/cases/add_nodes.go b/pkg/faketikv/cases/add_nodes.go index eb003252e19..c873993a5b8 100644 --- a/pkg/faketikv/cases/add_nodes.go +++ b/pkg/faketikv/cases/add_nodes.go @@ -18,12 +18,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newAddNodes() *Conf { - var conf Conf +func newAddNodes() *Case { + var simCase Case var id idAllocator for i := 1; i <= 8; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -38,7 +38,7 @@ func newAddNodes() *Conf { {Id: id.nextID(), StoreId: uint64(i+1)%4 + 1}, {Id: id.nextID(), StoreId: uint64(i+2)%4 + 1}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -46,9 +46,9 @@ func newAddNodes() *Conf { Keys: 960000, }) } - conf.MaxID = id.maxID + simCase.MaxID = id.maxID - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { res := true leaderCounts := make([]int, 0, 8) regionCounts := make([]int, 0, 8) @@ -69,5 +69,5 @@ func newAddNodes() *Conf { simutil.Logger.Infof("region counts: %v", regionCounts) return res } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/add_nodes_dynamic.go b/pkg/faketikv/cases/add_nodes_dynamic.go index a79bfe2b643..410b3c35696 100644 --- a/pkg/faketikv/cases/add_nodes_dynamic.go +++ b/pkg/faketikv/cases/add_nodes_dynamic.go @@ -18,12 +18,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newAddNodesDynamic() *Conf { - var conf Conf +func newAddNodesDynamic() *Case { + var simCase Case var id idAllocator for i := 1; i <= 8; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -43,7 +43,7 @@ func newAddNodesDynamic() *Conf { {Id: id.nextID(), StoreId: uint64(i+1)%8 + 1}, {Id: id.nextID(), StoreId: uint64(i+2)%8 + 1}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -51,7 +51,7 @@ func newAddNodesDynamic() *Conf { Keys: 960000, }) } - conf.MaxID = id.maxID + simCase.MaxID = id.maxID numNodes := 8 e := &AddNodesInner{} @@ -64,9 +64,9 @@ func newAddNodesDynamic() *Conf { } return 0 } - conf.Events = []EventInner{e} + simCase.Events = []EventInner{e} - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { res := true leaderCounts := make([]int, 0, numNodes) regionCounts := make([]int, 0, numNodes) @@ -87,5 +87,5 @@ func newAddNodesDynamic() *Conf { simutil.Logger.Infof("region counts: %v", regionCounts) return res } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/balance_leader.go b/pkg/faketikv/cases/balance_leader.go index 01eb18f674f..9c21d437177 100644 --- a/pkg/faketikv/cases/balance_leader.go +++ b/pkg/faketikv/cases/balance_leader.go @@ -19,12 +19,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newBalanceLeader() *Conf { - var conf Conf +func newBalanceLeader() *Case { + var simCase Case var id idAllocator for i := 1; i <= 3; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -39,7 +39,7 @@ func newBalanceLeader() *Conf { {Id: id.nextID(), StoreId: 2}, {Id: id.nextID(), StoreId: 3}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -47,8 +47,8 @@ func newBalanceLeader() *Conf { Keys: 960000, }) } - conf.MaxID = id.maxID - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.MaxID = id.maxID + simCase.Checker = func(regions *core.RegionsInfo) bool { count1 := regions.GetStoreLeaderCount(1) count2 := regions.GetStoreLeaderCount(2) count3 := regions.GetStoreLeaderCount(3) @@ -58,5 +58,5 @@ func newBalanceLeader() *Conf { count2 >= 300 && count3 >= 300 } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/cases.go b/pkg/faketikv/cases/cases.go index e179081bd5c..b354f0a42c0 100644 --- a/pkg/faketikv/cases/cases.go +++ b/pkg/faketikv/cases/cases.go @@ -18,7 +18,7 @@ import ( "github.com/pingcap/pd/server/core" ) -// Store is the config to simulate tikv. +// Store is used to simulate tikv. type Store struct { ID uint64 Status metapb.StoreState @@ -30,7 +30,7 @@ type Store struct { Version string } -// Region is the config to simulate a region. +// Region is used to simulate a region. type Region struct { ID uint64 Peers []*metapb.Peer @@ -42,8 +42,8 @@ type Region struct { // CheckerFunc checks if the scheduler is finished. type CheckerFunc func(*core.RegionsInfo) bool -// Conf represents a test suite for simulator. -type Conf struct { +// Case represents a test suite for simulator. +type Case struct { Stores []*Store Regions []Region MaxID uint64 @@ -72,8 +72,8 @@ func (a *idAllocator) nextID() uint64 { return a.maxID } -// ConfMap is a mapping of the cases to the their corresponding initialize functions. -var ConfMap = map[string]func() *Conf{ +// CaseMap is a mapping of the cases to the their corresponding initialize functions. +var CaseMap = map[string]func() *Case{ "balance-leader": newBalanceLeader, "add-nodes": newAddNodes, "add-nodes-dynamic": newAddNodesDynamic, @@ -85,9 +85,9 @@ var ConfMap = map[string]func() *Conf{ "makeup-down-replicas": newMakeupDownReplicas, } -// NewConf creates a config to initialize simulator cluster. -func NewConf(name string) *Conf { - if f, ok := ConfMap[name]; ok { +// NewCase creates a new case. +func NewCase(name string) *Case { + if f, ok := CaseMap[name]; ok { return f() } return nil diff --git a/pkg/faketikv/cases/delete_nodes.go b/pkg/faketikv/cases/delete_nodes.go index 6bacc9c6a84..05e5d2e396d 100644 --- a/pkg/faketikv/cases/delete_nodes.go +++ b/pkg/faketikv/cases/delete_nodes.go @@ -20,12 +20,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newDeleteNodes() *Conf { - var conf Conf +func newDeleteNodes() *Case { + var simCase Case var id idAllocator for i := 1; i <= 8; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -40,7 +40,7 @@ func newDeleteNodes() *Conf { {Id: id.nextID(), StoreId: uint64(i+1)%8 + 1}, {Id: id.nextID(), StoreId: uint64(i+2)%8 + 1}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -48,10 +48,10 @@ func newDeleteNodes() *Conf { Keys: 960000, }) } - conf.MaxID = id.maxID + simCase.MaxID = id.maxID var ids []uint64 - for _, store := range conf.Stores { + for _, store := range simCase.Stores { ids = append(ids, store.ID) } @@ -67,9 +67,9 @@ func newDeleteNodes() *Conf { } return 0 } - conf.Events = []EventInner{e} + simCase.Events = []EventInner{e} - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { res := true leaderCounts := make([]int, 0, numNodes) regionCounts := make([]int, 0, numNodes) @@ -90,5 +90,5 @@ func newDeleteNodes() *Conf { simutil.Logger.Infof("region counts: %v", regionCounts) return res } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/hot_read.go b/pkg/faketikv/cases/hot_read.go index 7d1dfaca4a5..77ca1891bbb 100644 --- a/pkg/faketikv/cases/hot_read.go +++ b/pkg/faketikv/cases/hot_read.go @@ -21,12 +21,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newHotRead() *Conf { - var conf Conf +func newHotRead() *Case { + var simCase Case var id idAllocator // Initialize the cluster for i := 1; i <= 5; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -42,7 +42,7 @@ func newHotRead() *Conf { {Id: id.nextID(), StoreId: uint64(storeIDs[1] + 1)}, {Id: id.nextID(), StoreId: uint64(storeIDs[2] + 1)}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -50,12 +50,12 @@ func newHotRead() *Conf { Keys: 960000, }) } - conf.MaxID = id.maxID + simCase.MaxID = id.maxID // Events description // select 20 regions on store 1 as hot read regions. readFlow := make(map[uint64]int64, 20) - for _, r := range conf.Regions { + for _, r := range simCase.Regions { if r.Leader.GetStoreId() == 1 { readFlow[r.ID] = 128 * MB if len(readFlow) == 20 { @@ -67,9 +67,9 @@ func newHotRead() *Conf { e.Step = func(tick int64) map[uint64]int64 { return readFlow } - conf.Events = []EventInner{e} + simCase.Events = []EventInner{e} // Checker description - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { var leaderCount [5]int for id := range readFlow { leaderStore := regions.GetRegion(id).GetLeader().GetStoreId() @@ -90,5 +90,5 @@ func newHotRead() *Conf { return leaderCount[max]-leaderCount[min] < 2 } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/hot_write.go b/pkg/faketikv/cases/hot_write.go index 7a97633ce6e..88ad30cdb55 100644 --- a/pkg/faketikv/cases/hot_write.go +++ b/pkg/faketikv/cases/hot_write.go @@ -21,12 +21,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newHotWrite() *Conf { - var conf Conf +func newHotWrite() *Case { + var simCase Case var id idAllocator // Initialize the cluster for i := 1; i <= 10; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -42,7 +42,7 @@ func newHotWrite() *Conf { {Id: id.nextID(), StoreId: uint64(storeIDs[1] + 1)}, {Id: id.nextID(), StoreId: uint64(storeIDs[2] + 1)}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -50,12 +50,12 @@ func newHotWrite() *Conf { Keys: 960000, }) } - conf.MaxID = id.maxID + simCase.MaxID = id.maxID // Events description // select 5 regions on store 1 as hot write regions. writeFlow := make(map[uint64]int64, 5) - for _, r := range conf.Regions { + for _, r := range simCase.Regions { if r.Leader.GetStoreId() == 1 { writeFlow[r.ID] = 2 * MB if len(writeFlow) == 5 { @@ -68,10 +68,10 @@ func newHotWrite() *Conf { return writeFlow } - conf.Events = []EventInner{e} + simCase.Events = []EventInner{e} // Checker description - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { var leaderCount, peerCount [10]int for id := range writeFlow { region := regions.GetRegion(id) @@ -101,5 +101,5 @@ func newHotWrite() *Conf { return leaderCount[maxLeader]-leaderCount[minLeader] <= 2 && peerCount[maxPeer]-peerCount[minPeer] <= 2 } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/makeup_down_replica.go b/pkg/faketikv/cases/makeup_down_replica.go index 3d45ca8f578..17a583a661b 100644 --- a/pkg/faketikv/cases/makeup_down_replica.go +++ b/pkg/faketikv/cases/makeup_down_replica.go @@ -18,12 +18,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newMakeupDownReplicas() *Conf { - var conf Conf +func newMakeupDownReplicas() *Case { + var simCase Case var id idAllocator for i := 1; i <= 4; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -38,7 +38,7 @@ func newMakeupDownReplicas() *Conf { {Id: id.nextID(), StoreId: uint64(i+1)%4 + 1}, {Id: id.nextID(), StoreId: uint64(i+2)%4 + 1}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -46,7 +46,7 @@ func newMakeupDownReplicas() *Conf { Keys: 960000, }) } - conf.MaxID = id.maxID + simCase.MaxID = id.maxID numNodes := 4 down := false @@ -61,9 +61,9 @@ func newMakeupDownReplicas() *Conf { } return 0 } - conf.Events = []EventInner{e} + simCase.Events = []EventInner{e} - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { sum := 0 regionCounts := make([]int, 0, 3) for i := 1; i <= 4; i++ { @@ -89,5 +89,5 @@ func newMakeupDownReplicas() *Conf { } return true } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/region_merge.go b/pkg/faketikv/cases/region_merge.go index 6a2f399b3ba..5a93ba68d5f 100644 --- a/pkg/faketikv/cases/region_merge.go +++ b/pkg/faketikv/cases/region_merge.go @@ -21,12 +21,12 @@ import ( "github.com/pingcap/pd/server/core" ) -func newRegionMerge() *Conf { - var conf Conf +func newRegionMerge() *Case { + var simCase Case var id idAllocator // Initialize the cluster for i := 1; i <= 4; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: id.nextID(), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -42,7 +42,7 @@ func newRegionMerge() *Conf { {Id: id.nextID(), StoreId: uint64(storeIDs[1] + 1)}, {Id: id.nextID(), StoreId: uint64(storeIDs[2] + 1)}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: id.nextID(), Peers: peers, Leader: peers[0], @@ -50,10 +50,10 @@ func newRegionMerge() *Conf { Keys: 100000, }) } - conf.MaxID = id.maxID + simCase.MaxID = id.maxID // Checker description - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { count1 := regions.GetStoreRegionCount(1) count2 := regions.GetStoreRegionCount(2) count3 := regions.GetStoreRegionCount(3) @@ -63,5 +63,5 @@ func newRegionMerge() *Conf { simutil.Logger.Infof("region counts: %v %v %v %v", count1, count2, count3, count4) return sum == 30 } - return &conf + return &simCase } diff --git a/pkg/faketikv/cases/region_split.go b/pkg/faketikv/cases/region_split.go index 965c9b3c9e3..26eb8e13dd2 100644 --- a/pkg/faketikv/cases/region_split.go +++ b/pkg/faketikv/cases/region_split.go @@ -19,11 +19,11 @@ import ( "github.com/pingcap/pd/server/core" ) -func newRegionSplit() *Conf { - var conf Conf +func newRegionSplit() *Case { + var simCase Case // Initialize the cluster for i := 1; i <= 3; i++ { - conf.Stores = append(conf.Stores, &Store{ + simCase.Stores = append(simCase.Stores, &Store{ ID: uint64(i), Status: metapb.StoreState_Up, Capacity: 1 * TB, @@ -34,16 +34,16 @@ func newRegionSplit() *Conf { peers := []*metapb.Peer{ {Id: 4, StoreId: 1}, } - conf.Regions = append(conf.Regions, Region{ + simCase.Regions = append(simCase.Regions, Region{ ID: 5, Peers: peers, Leader: peers[0], Size: 1 * MB, Keys: 10000, }) - conf.MaxID = 5 - conf.RegionSplitSize = 128 * MB - conf.RegionSplitKeys = 10000 + simCase.MaxID = 5 + simCase.RegionSplitSize = 128 * MB + simCase.RegionSplitKeys = 10000 // Events description e := &WriteFlowOnSpotInner{} e.Step = func(tick int64) map[string]int64 { @@ -51,15 +51,15 @@ func newRegionSplit() *Conf { "foobar": 8 * MB, } } - conf.Events = []EventInner{e} + simCase.Events = []EventInner{e} // Checker description - conf.Checker = func(regions *core.RegionsInfo) bool { + simCase.Checker = func(regions *core.RegionsInfo) bool { count1 := regions.GetStoreRegionCount(1) count2 := regions.GetStoreRegionCount(2) count3 := regions.GetStoreRegionCount(3) simutil.Logger.Infof("region counts: %v %v %v", count1, count2, count3) return count1 > 5 && count2 > 5 && count3 > 5 } - return &conf + return &simCase } diff --git a/pkg/faketikv/config.go b/pkg/faketikv/config.go index 2a76c787747..e7c8055de47 100644 --- a/pkg/faketikv/config.go +++ b/pkg/faketikv/config.go @@ -1,5 +1,66 @@ package faketikv -// Config is the faketikv configuration. -type Config struct { +import ( + "time" + + "github.com/pingcap/pd/pkg/typeutil" +) + +const ( + defaultSimTickInterval = 100 * time.Millisecond + defaultNormTickInterval = 1 * time.Second + defaultStoreCapacityGB = 1024 + defaultStoreAvailableGB = 1024 + defaultStoreIOMBPerSecond = 40 + defaultStoreVersion = "2.1.0" +) + +// SimConfig is the simulator configuration. +type SimConfig struct { + SimTickInterval typeutil.Duration `toml:"sim-tick-interval"` + NormTickInterval typeutil.Duration `toml:"norm-tick-interval"` + + StoreCapacityGB uint64 `toml:"store-capacity"` + StoreAvailableGB uint64 `toml:"store-available"` + StoreIOMBPerSecond int64 `toml:"store-io-per-second"` + StoreVersion string `toml:"store-version"` +} + +// NewSimConfig create a new configuration of the simulator. +func NewSimConfig() *SimConfig { + return &SimConfig{} +} + +func adjustDuration(v *typeutil.Duration, defValue time.Duration) { + if v.Duration == 0 { + v.Duration = defValue + } +} + +func adjustString(v *string, defValue string) { + if len(*v) == 0 { + *v = defValue + } +} + +func adjustUint64(v *uint64, defValue uint64) { + if *v == 0 { + *v = defValue + } +} + +func adjustInt64(v *int64, defValue int64) { + if *v == 0 { + *v = defValue + } +} + +// Adjust is used to adjust configurations +func (sc *SimConfig) Adjust() { + adjustDuration(&sc.SimTickInterval, defaultSimTickInterval) + adjustDuration(&sc.NormTickInterval, defaultNormTickInterval) + adjustUint64(&sc.StoreCapacityGB, defaultStoreCapacityGB) + adjustUint64(&sc.StoreAvailableGB, defaultStoreAvailableGB) + adjustInt64(&sc.StoreIOMBPerSecond, defaultStoreIOMBPerSecond) + adjustString(&sc.StoreVersion, defaultStoreVersion) } diff --git a/pkg/faketikv/conn.go b/pkg/faketikv/conn.go index 8afab055388..517afa330e1 100644 --- a/pkg/faketikv/conn.go +++ b/pkg/faketikv/conn.go @@ -25,14 +25,14 @@ type Connection struct { } // NewConnection creates nodes according to the configuration and returns the connection among nodes. -func NewConnection(conf *cases.Conf, pdAddr string) (*Connection, error) { +func NewConnection(simCase *cases.Case, pdAddr string, storeConfig *SimConfig) (*Connection, error) { conn := &Connection{ pdAddr: pdAddr, Nodes: make(map[uint64]*Node), } - for _, store := range conf.Stores { - node, err := NewNode(store, pdAddr) + for _, store := range simCase.Stores { + node, err := NewNode(store, pdAddr, storeConfig.StoreIOMBPerSecond) if err != nil { return nil, err } diff --git a/pkg/faketikv/drive.go b/pkg/faketikv/drive.go index 7d42e29c7be..f04f6bece8e 100644 --- a/pkg/faketikv/drive.go +++ b/pkg/faketikv/drive.go @@ -26,36 +26,38 @@ import ( // Driver promotes the cluster status change. type Driver struct { pdAddr string - conf *cases.Conf + simCase *cases.Case client Client tickCount int64 eventRunner *EventRunner raftEngine *RaftEngine conn *Connection + simConfig *SimConfig } // NewDriver returns a driver. -func NewDriver(pdAddr string, confName string) (*Driver, error) { - conf := cases.NewConf(confName) - if conf == nil { - return nil, errors.Errorf("failed to create conf %s", confName) +func NewDriver(pdAddr string, caseName string, simConfig *SimConfig) (*Driver, error) { + simCase := cases.NewCase(caseName) + if simCase == nil { + return nil, errors.Errorf("failed to create case %s", caseName) } return &Driver{ - pdAddr: pdAddr, - conf: conf, + pdAddr: pdAddr, + simCase: simCase, + simConfig: simConfig, }, nil } // Prepare initializes cluster information, bootstraps cluster and starts nodes. func (d *Driver) Prepare() error { - conn, err := NewConnection(d.conf, d.pdAddr) + conn, err := NewConnection(d.simCase, d.pdAddr, d.simConfig) if err != nil { return err } d.conn = conn - d.raftEngine = NewRaftEngine(d.conf, d.conn) - d.eventRunner = NewEventRunner(d.conf.Events, d.raftEngine) + d.raftEngine = NewRaftEngine(d.simCase, d.conn, d.simConfig) + d.eventRunner = NewEventRunner(d.simCase.Events, d.raftEngine) // Bootstrap. store, region, err := d.GetBootstrapInfo(d.raftEngine) @@ -80,7 +82,7 @@ func (d *Driver) Prepare() error { if err != nil { return errors.WithStack(err) } - if id > d.conf.MaxID { + if id > d.simCase.MaxID { break } } @@ -106,7 +108,7 @@ func (d *Driver) Tick() { // Check checks if the simulation is completed. func (d *Driver) Check() bool { - return d.conf.Checker(d.raftEngine.regionsInfo) + return d.simCase.Checker(d.raftEngine.regionsInfo) } // PrintStatistics prints the statistics of the scheduler. diff --git a/pkg/faketikv/event.go b/pkg/faketikv/event.go index 2349e1139f8..87cbc3c0eba 100644 --- a/pkg/faketikv/event.go +++ b/pkg/faketikv/event.go @@ -79,8 +79,8 @@ type WriteFlowOnSpot struct { } // Run implements the event interface. -func (w *WriteFlowOnSpot) Run(raft *RaftEngine, tickCount int64) bool { - res := w.in.Step(tickCount) +func (e *WriteFlowOnSpot) Run(raft *RaftEngine, tickCount int64) bool { + res := e.in.Step(tickCount) for key, size := range res { region := raft.SearchRegion([]byte(key)) if region == nil { @@ -98,8 +98,8 @@ type WriteFlowOnRegion struct { } // Run implements the event interface. -func (w *WriteFlowOnRegion) Run(raft *RaftEngine, tickCount int64) bool { - res := w.in.Step(tickCount) +func (e *WriteFlowOnRegion) Run(raft *RaftEngine, tickCount int64) bool { + res := e.in.Step(tickCount) for id, bytes := range res { region := raft.GetRegion(id) if region == nil { @@ -117,8 +117,8 @@ type ReadFlowOnRegion struct { } // Run implements the event interface. -func (w *ReadFlowOnRegion) Run(raft *RaftEngine, tickCount int64) bool { - res := w.in.Step(tickCount) +func (e *ReadFlowOnRegion) Run(raft *RaftEngine, tickCount int64) bool { + res := e.in.Step(tickCount) raft.updateRegionReadBytes(res) return false } @@ -129,8 +129,8 @@ type AddNodes struct { } // Run implements the event interface. -func (w *AddNodes) Run(raft *RaftEngine, tickCount int64) bool { - id := w.in.Step(tickCount) +func (e *AddNodes) Run(raft *RaftEngine, tickCount int64) bool { + id := e.in.Step(tickCount) if id == 0 { return false } @@ -139,14 +139,16 @@ func (w *AddNodes) Run(raft *RaftEngine, tickCount int64) bool { simutil.Logger.Infof("Node %d already existed", id) return false } + + config := raft.storeConfig s := &cases.Store{ ID: id, Status: metapb.StoreState_Up, - Capacity: 1 * cases.TB, - Available: 1 * cases.TB, - Version: "2.1.0", + Capacity: config.StoreCapacityGB * cases.GB, + Available: config.StoreAvailableGB * cases.GB, + Version: config.StoreVersion, } - n, err := NewNode(s, raft.conn.pdAddr) + n, err := NewNode(s, raft.conn.pdAddr, config.StoreIOMBPerSecond) if err != nil { simutil.Logger.Errorf("Add node %d failed: %v", id, err) return false @@ -166,8 +168,8 @@ type DeleteNodes struct { } // Run implements the event interface. -func (w *DeleteNodes) Run(raft *RaftEngine, tickCount int64) bool { - id := w.in.Step(tickCount) +func (e *DeleteNodes) Run(raft *RaftEngine, tickCount int64) bool { + id := e.in.Step(tickCount) if id == 0 { return false } diff --git a/pkg/faketikv/node.go b/pkg/faketikv/node.go index 750c43de233..d283a71618f 100644 --- a/pkg/faketikv/node.go +++ b/pkg/faketikv/node.go @@ -47,7 +47,7 @@ type Node struct { } // NewNode returns a Node. -func NewNode(s *cases.Store, pdAddr string) (*Node, error) { +func NewNode(s *cases.Store, pdAddr string, ioRate int64) (*Node, error) { ctx, cancel := context.WithCancel(context.Background()) store := &metapb.Store{ Id: s.ID, @@ -76,8 +76,7 @@ func NewNode(s *cases.Store, pdAddr string) (*Node, error) { cancel: cancel, tasks: make(map[uint64]Task), receiveRegionHeartbeatCh: receiveRegionHeartbeatCh, - // FIXME: This value should be adjusted to a appropriate one. - ioRate: 40 * 1000 * 1000, + ioRate: ioRate * cases.MB, }, nil } diff --git a/pkg/faketikv/raft.go b/pkg/faketikv/raft.go index 970ed3ad4e3..c40354d48c3 100644 --- a/pkg/faketikv/raft.go +++ b/pkg/faketikv/raft.go @@ -35,10 +35,11 @@ type RaftEngine struct { schedulerStats *schedulerStatistics regionSplitSize int64 regionSplitKeys int64 + storeConfig *SimConfig } // NewRaftEngine creates the initialized raft with the configuration. -func NewRaftEngine(conf *cases.Conf, conn *Connection) *RaftEngine { +func NewRaftEngine(conf *cases.Case, conn *Connection, storeConfig *SimConfig) *RaftEngine { r := &RaftEngine{ regionsInfo: core.NewRegionsInfo(), conn: conn, @@ -46,6 +47,7 @@ func NewRaftEngine(conf *cases.Conf, conn *Connection) *RaftEngine { schedulerStats: newSchedulerStatistics(), regionSplitSize: conf.RegionSplitSize, regionSplitKeys: conf.RegionSplitKeys, + storeConfig: storeConfig, } splitKeys := generateKeys(len(conf.Regions) - 1) From de4e6d5b7e1a9fac0d66608f99b290a438780c56 Mon Sep 17 00:00:00 2001 From: caojiafeng Date: Thu, 20 Sep 2018 10:08:15 +0800 Subject: [PATCH 5/7] Fix panic of adjacent-region-scheduler after pd transfer leader (#1250) * scheduler: fix panic of adjacent-region-scheduler after pd leader transferred --- server/schedulers/adjacent_region.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/server/schedulers/adjacent_region.go b/server/schedulers/adjacent_region.go index 2137b71972d..6148e9388d2 100644 --- a/server/schedulers/adjacent_region.go +++ b/server/schedulers/adjacent_region.go @@ -144,8 +144,8 @@ func (l *balanceAdjacentRegionScheduler) Schedule(cluster schedule.Cluster, opIn regions := cluster.ScanRegions(l.lastKey, scanLimit) // scan to the end if len(regions) <= 1 { - l.adjacentRegionsCount = 0 schedulerStatus.WithLabelValues(l.GetName(), "adjacent_count").Set(float64(l.adjacentRegionsCount)) + l.adjacentRegionsCount = 0 l.lastKey = []byte("") return nil } @@ -199,7 +199,9 @@ func (l *balanceAdjacentRegionScheduler) process(cluster schedule.Cluster) []*sc l.cacheRegions.head = head + 1 l.lastKey = r2.GetStartKey() }() - if l.unsafeToBalance(cluster, r1) { + // after the cluster is prepared, there is a gap that some regions heartbeats are not received. + // Leader of those region is nil, and we should skip them. + if r1.GetLeader() == nil || r2.GetLeader() == nil || l.unsafeToBalance(cluster, r1) { schedulerCounter.WithLabelValues(l.GetName(), "skip").Inc() return nil } @@ -221,6 +223,9 @@ func (l *balanceAdjacentRegionScheduler) unsafeToBalance(cluster schedule.Cluste return true } store := cluster.GetStore(region.GetLeader().GetStoreId()) + if store == nil { + return true + } s := l.selector.SelectSource(cluster, []*core.StoreInfo{store}) if s == nil { return true From c51ce7ce68748fe5dfd76164162e901194654cb2 Mon Sep 17 00:00:00 2001 From: caojiafeng Date: Thu, 20 Sep 2018 10:35:14 +0800 Subject: [PATCH 6/7] config: fix DisableNamespaceRelocation option clone behaviour (#1251) --- server/config.go | 1 + 1 file changed, 1 insertion(+) diff --git a/server/config.go b/server/config.go index a2d3c19fda1..b36d966fe26 100644 --- a/server/config.go +++ b/server/config.go @@ -463,6 +463,7 @@ func (c *ScheduleConfig) clone() *ScheduleConfig { DisableMakeUpReplica: c.DisableMakeUpReplica, DisableRemoveExtraReplica: c.DisableRemoveExtraReplica, DisableLocationReplacement: c.DisableLocationReplacement, + DisableNamespaceRelocation: c.DisableNamespaceRelocation, Schedulers: schedulers, } } From 55db505e8f35e8ab4e00efd202beb27a8ecc40fb Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 20 Sep 2018 16:50:19 +0800 Subject: [PATCH 7/7] *: adjust the project layout (#1245) --- .gitignore | 2 +- Makefile | 25 +- {pd-client => client}/client.go | 0 {pd-client => client}/client_test.go | 0 {pd-client => client}/metrics.go | 0 cmd/README.md | 4 - hack/retool | 8 + hack/retool-install.sh | 20 +- pdctl/README.md | 103 --- pkg/integration_test/client_test.go | 2 +- server/testutil.go | 4 +- tools/pd-ctl/README.md | 686 ++++++++++++++++++ {cmd => tools}/pd-ctl/main.go | 2 +- .../pd-ctl/pdctl}/command/cluster_command.go | 0 .../pd-ctl/pdctl}/command/config_command.go | 0 .../pd-ctl/pdctl}/command/exit_command.go | 0 .../pd-ctl/pdctl}/command/global.go | 0 .../pd-ctl/pdctl}/command/health_command.go | 0 .../pd-ctl/pdctl}/command/hot_command.go | 0 .../pd-ctl/pdctl}/command/label_command.go | 0 .../pd-ctl/pdctl}/command/log_command.go | 0 .../pd-ctl/pdctl}/command/member_command.go | 0 .../pd-ctl/pdctl}/command/operator.go | 0 .../pd-ctl/pdctl}/command/ping_command.go | 0 .../pd-ctl/pdctl}/command/region_command.go | 0 .../pd-ctl/pdctl}/command/scheduler.go | 0 .../pd-ctl/pdctl}/command/store_command.go | 0 .../pdctl}/command/table_namespace_command.go | 0 .../pd-ctl/pdctl}/command/tso_command.go | 0 {pdctl => tools/pd-ctl/pdctl}/ctl.go | 2 +- tools/pd-recover/README.md | 42 ++ {cmd => tools}/pd-recover/main.go | 0 tools/pd-simulator/README.md | 38 + {cmd/simulator => tools/pd-simulator}/main.go | 12 +- .../simulator}/cases/add_nodes.go | 2 +- .../simulator}/cases/add_nodes_dynamic.go | 2 +- .../simulator}/cases/balance_leader.go | 2 +- .../pd-simulator/simulator}/cases/cases.go | 0 .../simulator}/cases/delete_nodes.go | 2 +- .../simulator}/cases/event_inner.go | 0 .../pd-simulator/simulator}/cases/hot_read.go | 2 +- .../simulator}/cases/hot_write.go | 2 +- .../simulator}/cases/makeup_down_replica.go | 2 +- .../simulator}/cases/region_merge.go | 2 +- .../simulator}/cases/region_split.go | 2 +- .../pd-simulator/simulator}/client.go | 4 +- .../pd-simulator/simulator}/config.go | 2 +- .../pd-simulator/simulator}/conn.go | 4 +- .../pd-simulator/simulator}/drive.go | 6 +- .../pd-simulator/simulator}/event.go | 6 +- .../pd-simulator/simulator}/node.go | 6 +- .../pd-simulator/simulator}/raft.go | 6 +- .../pd-simulator/simulator}/simutil/logger.go | 0 .../pd-simulator/simulator}/statistics.go | 2 +- .../pd-simulator/simulator}/task.go | 4 +- tools/pd-tso-bench/README.md | 46 ++ {cmd => tools}/pd-tso-bench/main.go | 2 +- 57 files changed, 886 insertions(+), 170 deletions(-) rename {pd-client => client}/client.go (100%) rename {pd-client => client}/client_test.go (100%) rename {pd-client => client}/metrics.go (100%) delete mode 100644 cmd/README.md create mode 100755 hack/retool delete mode 100644 pdctl/README.md create mode 100644 tools/pd-ctl/README.md rename {cmd => tools}/pd-ctl/main.go (98%) rename {pdctl => tools/pd-ctl/pdctl}/command/cluster_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/config_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/exit_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/global.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/health_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/hot_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/label_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/log_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/member_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/operator.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/ping_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/region_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/scheduler.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/store_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/table_namespace_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/command/tso_command.go (100%) rename {pdctl => tools/pd-ctl/pdctl}/ctl.go (97%) create mode 100644 tools/pd-recover/README.md rename {cmd => tools}/pd-recover/main.go (100%) create mode 100644 tools/pd-simulator/README.md rename {cmd/simulator => tools/pd-simulator}/main.go (92%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/add_nodes.go (96%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/add_nodes_dynamic.go (97%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/balance_leader.go (96%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/cases.go (100%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/delete_nodes.go (97%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/event_inner.go (100%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/hot_read.go (97%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/hot_write.go (97%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/makeup_down_replica.go (97%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/region_merge.go (96%) rename {pkg/faketikv => tools/pd-simulator/simulator}/cases/region_split.go (96%) rename {pkg/faketikv => tools/pd-simulator/simulator}/client.go (99%) rename {pkg/faketikv => tools/pd-simulator/simulator}/config.go (98%) rename {pkg/faketikv => tools/pd-simulator/simulator}/conn.go (94%) rename {pkg/faketikv => tools/pd-simulator/simulator}/drive.go (96%) rename {pkg/faketikv => tools/pd-simulator/simulator}/event.go (97%) rename {pkg/faketikv => tools/pd-simulator/simulator}/node.go (97%) rename {pkg/faketikv => tools/pd-simulator/simulator}/raft.go (98%) rename {pkg/faketikv => tools/pd-simulator/simulator}/simutil/logger.go (100%) rename {pkg/faketikv => tools/pd-simulator/simulator}/statistics.go (99%) rename {pkg/faketikv => tools/pd-simulator/simulator}/task.go (99%) create mode 100644 tools/pd-tso-bench/README.md rename {cmd => tools}/pd-tso-bench/main.go (99%) diff --git a/.gitignore b/.gitignore index 6345f4f4e30..4c2ce9d9eba 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ default.pd *.swp .DS_Store tags -/_tools/ +/.retools/ diff --git a/Makefile b/Makefile index 823be966b1e..672386e1391 100644 --- a/Makefile +++ b/Makefile @@ -7,9 +7,10 @@ BASIC_TEST_PKGS := $(filter-out github.com/pingcap/pd/pkg/integration_test,$(TES PACKAGES := go list ./... PACKAGE_DIRECTORIES := $(PACKAGES) | sed 's|github.com/pingcap/pd/||' GOCHECKER := awk '{ print } END { if (NR > 0) { exit 1 } }' +RETOOL:= ./hack/retool -GOFAIL_ENABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs retool do gofail enable) -GOFAIL_DISABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs retool do gofail disable) +GOFAIL_ENABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs ./hack/retool do gofail enable) +GOFAIL_DISABLE := $$(find $$PWD/ -type d | grep -vE "(\.git|vendor)" | xargs ./hack/retool do gofail disable) LDFLAGS += -X "$(PD_PKG)/server.PDReleaseVersion=$(shell git describe --tags --dirty)" LDFLAGS += -X "$(PD_PKG)/server.PDBuildTS=$(shell date -u '+%Y-%m-%d %I:%M:%S')" @@ -35,9 +36,9 @@ ifeq ("$(WITH_RACE)", "1") else CGO_ENABLED=0 go build -ldflags '$(LDFLAGS)' -o bin/pd-server cmd/pd-server/main.go endif - CGO_ENABLED=0 go build -ldflags '$(LDFLAGS)' -o bin/pd-ctl cmd/pd-ctl/main.go - CGO_ENABLED=0 go build -o bin/pd-tso-bench cmd/pd-tso-bench/main.go - CGO_ENABLED=0 go build -o bin/pd-recover cmd/pd-recover/main.go + CGO_ENABLED=0 go build -ldflags '$(LDFLAGS)' -o bin/pd-ctl tools/pd-ctl/main.go + CGO_ENABLED=0 go build -o bin/pd-tso-bench tools/pd-tso-bench/main.go + CGO_ENABLED=0 go build -o bin/pd-recover tools/pd-recover/main.go test: retool-setup # testing.. @@ -52,26 +53,26 @@ basic_test: # These need to be fixed before they can be ran regularly check-fail: - CGO_ENABLED=0 retool do gometalinter.v2 --disable-all \ + CGO_ENABLED=0 ./hack/retool do gometalinter.v2 --disable-all \ --enable errcheck \ $$($(PACKAGE_DIRECTORIES)) - CGO_ENABLED=0 retool do gosec $$($(PACKAGE_DIRECTORIES)) + CGO_ENABLED=0 ./hack/retool do gosec $$($(PACKAGE_DIRECTORIES)) check-all: static lint @echo "checking" retool-setup: @which retool >/dev/null 2>&1 || go get github.com/twitchtv/retool - @retool sync + @./hack/retool sync check: retool-setup check-all static: @ # Not running vet and fmt through metalinter becauase it ends up looking at vendor gofmt -s -l $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) - retool do govet --shadow $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) + ./hack/retool do govet --shadow $$($(PACKAGE_DIRECTORIES)) 2>&1 | $(GOCHECKER) - CGO_ENABLED=0 retool do gometalinter.v2 --disable-all --deadline 120s \ + CGO_ENABLED=0 ./hack/retool do gometalinter.v2 --disable-all --deadline 120s \ --enable misspell \ --enable megacheck \ --enable ineffassign \ @@ -79,7 +80,7 @@ static: lint: @echo "linting" - CGO_ENABLED=0 retool do revive -formatter friendly -config revive.toml $$($(PACKAGES)) + CGO_ENABLED=0 ./hack/retool do revive -formatter friendly -config revive.toml $$($(PACKAGES)) travis_coverage: ifeq ("$(TRAVIS_COVERAGE)", "1") @@ -100,7 +101,7 @@ endif bash ./hack/clean_vendor.sh simulator: - CGO_ENABLED=0 go build -o bin/simulator cmd/simulator/main.go + CGO_ENABLED=0 go build -o bin/pd-simulator tools/pd-simulator/main.go gofail-enable: # Converting gofail failpoints... diff --git a/pd-client/client.go b/client/client.go similarity index 100% rename from pd-client/client.go rename to client/client.go diff --git a/pd-client/client_test.go b/client/client_test.go similarity index 100% rename from pd-client/client_test.go rename to client/client_test.go diff --git a/pd-client/metrics.go b/client/metrics.go similarity index 100% rename from pd-client/metrics.go rename to client/metrics.go diff --git a/cmd/README.md b/cmd/README.md deleted file mode 100644 index 47c2fecd124..00000000000 --- a/cmd/README.md +++ /dev/null @@ -1,4 +0,0 @@ -## cmd - -This directory is meant to enforce vendoring for pd binaries without polluting -the pd client libraries with vendored dependencies. diff --git a/hack/retool b/hack/retool new file mode 100755 index 00000000000..de9950a7004 --- /dev/null +++ b/hack/retool @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# +# Add standard retool options +set -euo pipefail + + +cd $(dirname "$0")/.. +exec retool -tool-dir "$PWD/.retools" "$@" diff --git a/hack/retool-install.sh b/hack/retool-install.sh index ba3fbca887b..0ca3f0ca97c 100755 --- a/hack/retool-install.sh +++ b/hack/retool-install.sh @@ -3,21 +3,23 @@ set -euo pipefail # This script generates tools.json # It helps record what releases/branches are being used + +cd $(dirname "$0")/.. which retool >/dev/null || go get github.com/twitchtv/retool # tool environment # check runner -retool add gopkg.in/alecthomas/gometalinter.v2 v2.0.5 +./hack/retool add gopkg.in/alecthomas/gometalinter.v2 v2.0.5 # check spelling -retool add github.com/client9/misspell/cmd/misspell v0.3.4 +./hack/retool add github.com/client9/misspell/cmd/misspell v0.3.4 # checks correctness -retool add github.com/gordonklaus/ineffassign 7bae11eba15a3285c75e388f77eb6357a2d73ee2 -retool add honnef.co/go/tools/cmd/megacheck master -retool add github.com/dnephin/govet 4a96d43e39d340b63daa8bc5576985aa599885f6 +./hack/retool add github.com/gordonklaus/ineffassign 7bae11eba15a3285c75e388f77eb6357a2d73ee2 +./hack/retool add honnef.co/go/tools/cmd/megacheck master +./hack/retool add github.com/dnephin/govet 4a96d43e39d340b63daa8bc5576985aa599885f6 # slow checks -retool add github.com/kisielk/errcheck v1.1.0 +./hack/retool add github.com/kisielk/errcheck v1.1.0 # linter -retool add github.com/mgechev/revive 7773f47324c2bf1c8f7a5500aff2b6c01d3ed73b -retool add github.com/securego/gosec/cmd/gosec 1.0.0 +./hack/retool add github.com/mgechev/revive 7773f47324c2bf1c8f7a5500aff2b6c01d3ed73b +./hack/retool add github.com/securego/gosec/cmd/gosec 1.0.0 # go fail -retool add github.com/etcd-io/gofail master +./hack/retool add github.com/etcd-io/gofail master diff --git a/pdctl/README.md b/pdctl/README.md deleted file mode 100644 index 7d5e7f7aaba..00000000000 --- a/pdctl/README.md +++ /dev/null @@ -1,103 +0,0 @@ -pdctl -======== - -pdctl is a command line tool for pd - -## Build -1. Make sure [*Go*](https://golang.org/) (version 1.5+) is installed. -2. Use `make` in pd root path. `pdctl` will build in `bin` directory. - -## Usage - -### Example -run: - - ./pd-ctl store -d -u 127.0.0.1:2379 -show all stores status. '-u' specify the pd address, it can be overwritten by setting the environment variable PD_ADDR. Such as `export PD_ADDR=127.0.0.1:2379` - -### Flags -#### --pd,-u -+ The pd address -+ default: http://127.0.0.1:2379 -+ env variable: PD_ADDR - -#### --detach,-d -+ Run pdctl without readline -+ default: false - -### Command -#### store [delete] -show the store status or delete a store - -##### example -``` ->> store -{ - "count": 3, - "stores": [...] -} ->> store 1 - ...... ->> store delete 1 - ...... -``` - -#### config [show | set \ \] -show or set the balance config -##### example -``` ->> config show -{ - "min-region-count": 10, - "min-leader-count": 10, - "max-snapshot-count": 3, - "min-balance-diff-ratio": 0.01, - "max-store-down-duration": "30m0s", - "leader-schedule-limit": 8, - "leader-schedule-interval": "10s", - "storage-schedule-limit": 4, - "storage-schedule-interval": "30s" -} ->> config set leader-schedule-interval 20s -Success! -``` - -#### Member [leader | delete] -show the pd members status -##### example -``` ->> member -{ - "members": [......] -} ->> member leader -{ - "name": "pd", - "addr": "http://192.168.199.229:2379", - "id": 9724873857558226554 -} ->> member delete name pd2 -Success! -``` - -#### Region -show one or all regions status -##### Example -``` ->> region -{ - "count": 1, - "regions": [......] -} - ->> region 2 -{ - "region": { - "id": 2, - ...... - } - "leader": { - ...... - } -} -``` diff --git a/pkg/integration_test/client_test.go b/pkg/integration_test/client_test.go index 3e4ffe12a81..a40e4316e24 100644 --- a/pkg/integration_test/client_test.go +++ b/pkg/integration_test/client_test.go @@ -23,7 +23,7 @@ import ( "github.com/coreos/etcd/clientv3" . "github.com/pingcap/check" - pd "github.com/pingcap/pd/pd-client" + pd "github.com/pingcap/pd/client" "github.com/pingcap/pd/pkg/testutil" ) diff --git a/server/testutil.go b/server/testutil.go index 1b4cc7af41d..8297ec0dc4b 100644 --- a/server/testutil.go +++ b/server/testutil.go @@ -57,7 +57,7 @@ func NewTestServer() (*Config, *Server, CleanupFunc, error) { } // NewTestSingleConfig is only for test to create one pd. -// Because pd-client also needs this, so export here. +// Because PD client also needs this, so export here. func NewTestSingleConfig() *Config { cfg := &Config{ Name: "pd", @@ -93,7 +93,7 @@ func NewTestSingleConfig() *Config { } // NewTestMultiConfig is only for test to create multiple pd configurations. -// Because pd-client also needs this, so export here. +// Because PD client also needs this, so export here. func NewTestMultiConfig(count int) []*Config { cfgs := make([]*Config, count) diff --git a/tools/pd-ctl/README.md b/tools/pd-ctl/README.md new file mode 100644 index 00000000000..690d8c9f017 --- /dev/null +++ b/tools/pd-ctl/README.md @@ -0,0 +1,686 @@ +pd-ctl +======== + +pd-ctl is a command line tool for PD, pd-ctl obtains the state information of the cluster and tunes the cluster. + +## Build +1. [Go](https://golang.org/) Version 1.9 or later +2. In the root directory of the [PD project](https://github.com/pingcap/pd), use the `make` command to compile and generate `bin/pd-ctl` + +> **Note:** Generally, you don't need to compile source code as the PD Control tool already exists in the released Binary or Docker. However, dev users can refer to the above instruction for compiling source code. + +## Usage + +Single-command mode: + + ./pd-ctl store -d -u http://127.0.0.1:2379 + +Interactive mode: + + ./pd-ctl -u http://127.0.0.1:2379 + +Use environment variables: + +```bash +export PD_ADDR=http://127.0.0.1:2379 +./pd-ctl +``` + +Use TLS to encrypt: + +```bash +./pd-ctl -u https://127.0.0.1:2379 --cacert="path/to/ca" --cert="path/to/cert" --key="path/to/key" +``` + +## Command line flags + +### \-\-pd,-u + ++ PD address ++ Default address: http://127.0.0.1:2379 ++ Enviroment variable: PD_ADDR + +### \-\-detach,-d + ++ Use single command line mode (not entering readline) ++ Default: false + +### --cacert + ++ Specify the path to the certificate file of the trusted CA in PEM format ++ Default: "" + +### --cert + ++ Specify the path to the certificate of SSL in PEM format ++ Default: "" + +### --key + ++ Specify the path to the certificate key file of SSL in PEM format, which is the private key of the certificate specified by `--cert` ++ Default: "" + +### --version,-V + ++ Print the version information and exit ++ Default: false + +## Command + +### `cluster` + +Use this command to view the basic information of the cluster. + +Usage: + +```bash +>> cluster // To show the cluster information +{ + "id": 6493707687106161130, + "max_peer_count": 3 +} +``` + +### `config [show | set