Skip to content

Commit

Permalink
reporting rtt with the ping metric
Browse files Browse the repository at this point in the history
  • Loading branch information
vishalchangrani authored and Kay-Zee committed Feb 17, 2021
1 parent 892f041 commit 5c9c8f7
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 25 deletions.
25 changes: 11 additions & 14 deletions engine/access/ping/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,18 @@ func (e *Engine) startPing() {
// pingNode pings the given peer and updates the metrics with the result and the additional node information
func (e *Engine) pingNode(peer *flow.Identity) {
id := peer.ID()
reachable := e.pingAddress(id)
info := e.nodeInfo[id]
e.metrics.NodeReachable(peer, info, reachable)
}

// pingAddress sends a ping request to the given address, and block until either receive
// a ping respond then return true, or hitting a timeout and return false.
// if there is other unknown error, return false
func (e *Engine) pingAddress(target flow.Identifier) bool {
// ignore the ping duration for now
// ping will timeout in libp2p.PingTimeoutSecs seconds
_, err := e.middleware.Ping(target)
// ping the node
rtt, err := e.middleware.Ping(id) // ping will timeout in libp2p.PingTimeoutSecs seconds
if err != nil {
e.log.Debug().Err(err).Str("target", target.String()).Msg("failed to ping")
return false
e.log.Debug().Err(err).Str("target", id.String()).Msg("failed to ping")
// report the rtt duration as negative to make it easier to distinguish between pingable and non-pingable nodes
rtt = -1
}
return true

// get the additional info about the node
info := e.nodeInfo[id]

// update metric
e.metrics.NodeReachable(peer, info, rtt)
}
6 changes: 3 additions & 3 deletions module/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ type TransactionMetrics interface {
}

type PingMetrics interface {
// NodeReachable tracks the node availability of the node and reports it as 1 if the node was successfully pinged, 0
// otherwise. The nodeInfo provides additional information about the node such as the name of the node operator
NodeReachable(node *flow.Identity, nodeInfo string, reachable bool)
// NodeReachable tracks the round trip time in milliseconds taken to ping a node
// The nodeInfo provides additional information about the node such as the name of the node operator
NodeReachable(node *flow.Identity, nodeInfo string, rtt time.Duration)
}
15 changes: 10 additions & 5 deletions module/metrics/ping.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package metrics

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

Expand All @@ -23,14 +25,17 @@ func NewPingCollector() *PingCollector {
return pc
}

func (pc *PingCollector) NodeReachable(node *flow.Identity, nodeInfo string, reachable bool) {
var val float64
if reachable {
val = 1
func (pc *PingCollector) NodeReachable(node *flow.Identity, nodeInfo string, rtt time.Duration) {
var rttValue float64
if rtt > 0 {
rttValue = float64(rtt.Milliseconds())
} else {
rttValue = -1
}

pc.reachable.With(prometheus.Labels{
LabelNodeID: node.String(),
LabelNodeRole: node.Role.String(),
LabelNodeInfo: nodeInfo}).
Set(val)
Set(rttValue)
}
8 changes: 5 additions & 3 deletions module/mock/ping_metrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5c9c8f7

Please sign in to comment.