diff --git a/client.go b/client.go index 176b7fdb..7867a013 100644 --- a/client.go +++ b/client.go @@ -495,7 +495,7 @@ func getPingData(uni bool, protoType ProtocolType) []byte { func (c *Client) sendPing() { c.mu.Lock() - c.lastPing = time.Now().Unix() + c.lastPing = time.Now().UnixNano() c.mu.Unlock() unidirectional := c.transport.Unidirectional() _ = c.transportEnqueue(getPingData(unidirectional, c.transport.Protocol()), "", protocol.FrameTypeServerPing) @@ -518,6 +518,7 @@ func (c *Client) checkPong() { } lastSeen := c.lastSeen c.mu.RUnlock() + c.node.metrics.observePingPongDuration(time.Duration(lastSeen-lastPing) * time.Nanosecond) if lastSeen < lastPing { go func() { c.Disconnect(DisconnectNoPong) }() return @@ -1180,7 +1181,7 @@ func (c *Client) dispatchCommand(cmd *protocol.Command, cmdSize int) (*Disconnec // upon receiving pong we change a sign of lastPing value. This way we can handle // unnecessary pongs sent by the client and still use lastPing value in Client.checkPong. c.lastPing = -c.lastPing - c.lastSeen = time.Now().Unix() + c.lastSeen = time.Now().UnixNano() c.mu.Unlock() return nil, true } diff --git a/metrics.go b/metrics.go index a355611d..50ba7453 100644 --- a/metrics.go +++ b/metrics.go @@ -86,6 +86,7 @@ type metrics struct { pubSubLagHistogram prometheus.Histogram broadcastDurationHistogram prometheus.Histogram + pingPongDurationHistogram prometheus.Histogram } func (m *metrics) observeCommandDuration(frameType protocol.FrameType, d time.Duration) { @@ -131,6 +132,10 @@ func (m *metrics) observeBroadcastDuration(started time.Time) { m.broadcastDurationHistogram.Observe(time.Since(started).Seconds()) } +func (m *metrics) observePingPongDuration(duration time.Duration) { + m.pingPongDurationHistogram.Observe(duration.Seconds()) +} + func (m *metrics) setBuildInfo(version string) { m.buildInfoGauge.WithLabelValues(version).Set(1) } @@ -438,6 +443,17 @@ func initMetricsRegistry(registry prometheus.Registerer, metricsNamespace string Help: "Count of recover operations.", }, []string{"recovered"}) + m.pingPongDurationHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Subsystem: "client", + Name: "ping_pong_duration_seconds", + Help: "Ping/Pong duration in seconds", + Buckets: []float64{ + 0.000100, 0.000250, 0.000500, // Microsecond resolution. + 0.001, 0.005, 0.010, 0.025, 0.050, 0.100, 0.250, 0.500, // Millisecond resolution. + 1.0, 2.5, 5.0, 10.0, // Second resolution. + }}) + m.transportConnectCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: "transport", @@ -582,6 +598,9 @@ func initMetricsRegistry(registry prometheus.Registerer, metricsNamespace string if err := registry.Register(m.recoverCount); err != nil && !errors.As(err, &alreadyRegistered) { return nil, err } + if err := registry.Register(m.pingPongDurationHistogram); err != nil && !errors.As(err, &alreadyRegistered) { + return nil, err + } if err := registry.Register(m.transportConnectCount); err != nil && !errors.As(err, &alreadyRegistered) { return nil, err }