Skip to content

Commit

Permalink
metrics: add algod_network_p2p_sent_bytes_{TAG}
Browse files Browse the repository at this point in the history
  • Loading branch information
algorandskiy committed Aug 19, 2024
1 parent 3eb13b0 commit 34f8746
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 83 deletions.
109 changes: 109 additions & 0 deletions network/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright (C) 2019-2024 Algorand, Inc.
// This file is part of go-algorand
//
// go-algorand is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// go-algorand is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with go-algorand. If not, see <https://www.gnu.org/licenses/>.

package network

import (
"github.com/algorand/go-algorand/protocol"
"github.com/algorand/go-algorand/util/metrics"
)

func init() {
// all tags are tracked by ws net
tagStringListWs := make([]string, len(protocol.TagList))
for i, t := range protocol.TagList {
tagStringListWs[i] = string(t)
}
networkSentBytesByTag = metrics.NewTagCounterFiltered("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network for {TAG} messages", tagStringListWs, "UNK")
networkReceivedBytesByTag = metrics.NewTagCounterFiltered("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network for {TAG} messages", tagStringListWs, "UNK")
networkMessageReceivedByTag = metrics.NewTagCounterFiltered("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network for {TAG} messages", tagStringListWs, "UNK")
networkMessageSentByTag = metrics.NewTagCounterFiltered("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network for {TAG} messages", tagStringListWs, "UNK")
networkHandleCountByTag = metrics.NewTagCounterFiltered("algod_network_rx_handle_countbytag_{TAG}", "count of handler calls in the receive thread for {TAG} messages", tagStringListWs, "UNK")
networkHandleMicrosByTag = metrics.NewTagCounterFiltered("algod_network_rx_handle_microsbytag_{TAG}", "microseconds spent by protocol handlers in the receive thread for {TAG} messages", tagStringListWs, "UNK")

// all but gossipSub tags are tracked by p2p net
// the remaining tags are tracked by gossipSub tracer p2p sub-package
tagStringListP2P := make([]string, len(protocol.TagList))
for i, t := range protocol.TagList {
if _, ok := gossipSubTags[t]; !ok {
tagStringListWs[i] = string(t)
}
}
networkP2PSentBytesByTag = metrics.NewTagCounterFiltered("algod_network_p2p_sent_bytes_{TAG}", "Number of bytes that were sent over the network for {TAG} messages", tagStringListP2P, "UNK")
networkP2PReceivedBytesByTag = metrics.NewTagCounterFiltered("algod_network_p2p_received_bytes_{TAG}", "Number of bytes that were received from the network for {TAG} messages", tagStringListP2P, "UNK")
networkP2PMessageReceivedByTag = metrics.NewTagCounterFiltered("algod_network_p2p_message_received_{TAG}", "Number of complete messages that were received from the network for {TAG} messages", tagStringListP2P, "UNK")
networkP2PMessageSentByTag = metrics.NewTagCounterFiltered("algod_network_p2p_message_sent_{TAG}", "Number of complete messages that were sent to the network for {TAG} messages", tagStringListP2P, "UNK")
}

var networkSentBytesTotal = metrics.MakeCounter(metrics.NetworkSentBytesTotal)
var networkP2PSentBytesTotal = metrics.MakeCounter(metrics.NetworkP2PSentBytesTotal)
var networkSentBytesByTag *metrics.TagCounter
var networkP2PSentBytesByTag *metrics.TagCounter
var networkReceivedBytesTotal = metrics.MakeCounter(metrics.NetworkReceivedBytesTotal)
var networkP2PReceivedBytesTotal = metrics.MakeCounter(metrics.NetworkP2PReceivedBytesTotal)
var networkReceivedBytesByTag *metrics.TagCounter
var networkP2PReceivedBytesByTag *metrics.TagCounter

var networkMessageReceivedTotal = metrics.MakeCounter(metrics.NetworkMessageReceivedTotal)
var networkP2PMessageReceivedTotal = metrics.MakeCounter(metrics.NetworkP2PMessageReceivedTotal)
var networkMessageReceivedByTag *metrics.TagCounter
var networkP2PMessageReceivedByTag *metrics.TagCounter
var networkMessageSentTotal = metrics.MakeCounter(metrics.NetworkMessageSentTotal)
var networkP2PMessageSentTotal = metrics.MakeCounter(metrics.NetworkP2PMessageSentTotal)
var networkMessageSentByTag *metrics.TagCounter
var networkP2PMessageSentByTag *metrics.TagCounter

var networkHandleMicrosByTag *metrics.TagCounter
var networkHandleCountByTag *metrics.TagCounter

var networkConnectionsDroppedTotal = metrics.MakeCounter(metrics.NetworkConnectionsDroppedTotal)
var networkMessageQueueMicrosTotal = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_message_sent_queue_micros_total", Description: "Total microseconds message spent waiting in queue to be sent"})

var duplicateNetworkMessageReceivedTotal = metrics.MakeCounter(metrics.DuplicateNetworkMessageReceivedTotal)
var duplicateNetworkMessageReceivedBytesTotal = metrics.MakeCounter(metrics.DuplicateNetworkMessageReceivedBytesTotal)
var duplicateNetworkFilterReceivedTotal = metrics.MakeCounter(metrics.DuplicateNetworkFilterReceivedTotal)
var outgoingNetworkMessageFilteredOutTotal = metrics.MakeCounter(metrics.OutgoingNetworkMessageFilteredOutTotal)
var outgoingNetworkMessageFilteredOutBytesTotal = metrics.MakeCounter(metrics.OutgoingNetworkMessageFilteredOutBytesTotal)
var unknownProtocolTagMessagesTotal = metrics.MakeCounter(metrics.UnknownProtocolTagMessagesTotal)

var networkIncomingConnections = metrics.MakeGauge(metrics.NetworkIncomingConnections)
var networkOutgoingConnections = metrics.MakeGauge(metrics.NetworkOutgoingConnections)

var networkIncomingBufferMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_rx_buffer_micros_total", Description: "microseconds spent by incoming messages on the receive buffer"})
var networkHandleMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_rx_handle_micros_total", Description: "microseconds spent by protocol handlers in the receive thread"})

var networkBroadcasts = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcasts_total", Description: "number of broadcast operations"})
var networkBroadcastQueueFull = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcast_queue_full_total", Description: "number of messages that were drops due to full broadcast queue"})
var networkBroadcastQueueMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcast_queue_micros_total", Description: "microseconds broadcast requests sit on queue"})
var networkBroadcastSendMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcast_send_micros_total", Description: "microseconds spent broadcasting"})
var networkBroadcastsDropped = metrics.MakeCounter(metrics.MetricName{Name: "algod_broadcasts_dropped_total", Description: "number of broadcast messages not sent to any peer"})
var networkPeerBroadcastDropped = metrics.MakeCounter(metrics.MetricName{Name: "algod_peer_broadcast_dropped_total", Description: "number of broadcast messages not sent to some peer"})

var networkPeerIdentityDisconnect = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_identity_duplicate", Description: "number of times identity challenge cause us to disconnect a peer"})
var networkPeerIdentityError = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_identity_error", Description: "number of times an error occurs (besides expected) when processing identity challenges"})
var networkPeerAlreadyClosed = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_peer_already_closed", Description: "number of times a peer would be added but the peer connection is already closed"})

var networkSlowPeerDrops = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_slow_drops_total", Description: "number of peers dropped for being slow to send to"})
var networkIdlePeerDrops = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_idle_drops_total", Description: "number of peers dropped due to idle connection"})

var minPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_min_ping_seconds", Description: "Network round trip time to fastest peer in seconds."})

Check failure on line 102 in network/metrics.go

View workflow job for this annotation

GitHub Actions / reviewdog-warnings

[Lint Warnings] reported by reviewdog 🐶 var `minPing` is unused (unused) Raw Output: network/metrics.go:102:5: var `minPing` is unused (unused) var minPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_min_ping_seconds", Description: "Network round trip time to fastest peer in seconds."}) ^
var meanPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_mean_ping_seconds", Description: "Network round trip time to average peer in seconds."})

Check failure on line 103 in network/metrics.go

View workflow job for this annotation

GitHub Actions / reviewdog-warnings

[Lint Warnings] reported by reviewdog 🐶 var `meanPing` is unused (unused) Raw Output: network/metrics.go:103:5: var `meanPing` is unused (unused) var meanPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_mean_ping_seconds", Description: "Network round trip time to average peer in seconds."}) ^
var medianPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_median_ping_seconds", Description: "Network round trip time to median peer in seconds."})

Check failure on line 104 in network/metrics.go

View workflow job for this annotation

GitHub Actions / reviewdog-warnings

[Lint Warnings] reported by reviewdog 🐶 var `medianPing` is unused (unused) Raw Output: network/metrics.go:104:5: var `medianPing` is unused (unused) var medianPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_median_ping_seconds", Description: "Network round trip time to median peer in seconds."}) ^
var maxPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_max_ping_seconds", Description: "Network round trip time to slowest peer in seconds."})

Check failure on line 105 in network/metrics.go

View workflow job for this annotation

GitHub Actions / reviewdog-warnings

[Lint Warnings] reported by reviewdog 🐶 var `maxPing` is unused (unused) Raw Output: network/metrics.go:105:5: var `maxPing` is unused (unused) var maxPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_max_ping_seconds", Description: "Network round trip time to slowest peer in seconds."}) ^

var peers = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peers", Description: "Number of active peers."})
var incomingPeers = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_incoming_peers", Description: "Number of active incoming peers."})
var outgoingPeers = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_outgoing_peers", Description: "Number of active outgoing peers."})
22 changes: 16 additions & 6 deletions network/p2p/pubsubTracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/protocol"

ap "github.com/algorand/go-algorand/protocol"
"github.com/algorand/go-algorand/util/metrics"
)

Expand All @@ -30,9 +31,15 @@ var transactionMessagesP2PRejectMessage = metrics.NewTagCounter(metrics.Transact
var transactionMessagesP2PDuplicateMessage = metrics.MakeCounter(metrics.TransactionMessagesP2PDuplicateMessage)
var transactionMessagesP2PDeliverMessage = metrics.MakeCounter(metrics.TransactionMessagesP2PDeliverMessage)
var transactionMessagesP2PUnderdeliverableMessage = metrics.MakeCounter(metrics.TransactionMessagesP2PUndeliverableMessage)
var transactionMessagesP2PValidateMessage = metrics.MakeCounter(metrics.TransactionMessagesP2PValidateMessage)
var transactionMessagesP2PSentMessages = metrics.MakeCounter(metrics.TransactionMessagesP2PSentMessage)
var transactionMessagesP2PSentBytes = metrics.MakeCounter(metrics.TransactionMessagesP2PSentBytes)

// This list must be exclusive of the gossipSubTags list in ../metrics.go
// TODO: as adding more messages into gossipSub need to maintain a mapping of topic to tag.
// There is a benefic of using const string in a comparison `*rpc.Publish[i].Topic == TXTopicName` below since it most likely optimized to a single instruction.
var tagTXList = []string{string(ap.Transaction)}
var networkP2PSentBytesByTag = metrics.NewTagCounterFiltered("algod_network_p2p_sent_bytes_{TAG}", "Number of bytes that were sent over the network for {TAG} messages", tagTXList, "")
var networkP2PReceivedBytesByTag = metrics.NewTagCounterFiltered("algod_network_p2p_received_bytes_{TAG}", "Number of bytes that were received from the network for {TAG} messages", tagTXList, "")
var networkP2PMessageReceivedByTag = metrics.NewTagCounterFiltered("algod_network_p2p_message_received_{TAG}", "Number of complete messages that were received from the network for {TAG} messages", tagTXList, "")
var networkP2PMessageSentByTag = metrics.NewTagCounterFiltered("algod_network_p2p_message_sent_{TAG}", "Number of complete messages that were sent to the network for {TAG} messages", tagTXList, "")

// pubsubTracer is a tracer for pubsub events used to track metrics.
type pubsubTracer struct{}
Expand All @@ -57,7 +64,10 @@ func (t pubsubTracer) Prune(p peer.ID, topic string) {}

// ValidateMessage is invoked when a message first enters the validation pipeline.
func (t pubsubTracer) ValidateMessage(msg *pubsub.Message) {
transactionMessagesP2PValidateMessage.Inc(nil)
if msg != nil && msg.Topic != nil && *msg.Topic == TXTopicName {
networkP2PReceivedBytesByTag.Add(string(ap.Transaction), uint64(len(msg.Data)))
networkP2PMessageReceivedByTag.Add(string(ap.Transaction), 1)

Check warning on line 69 in network/p2p/pubsubTracer.go

View check run for this annotation

Codecov / codecov/patch

network/p2p/pubsubTracer.go#L67-L69

Added lines #L67 - L69 were not covered by tests
}
}

// DeliverMessage is invoked when a message is delivered
Expand Down Expand Up @@ -92,8 +102,8 @@ func (t pubsubTracer) SendRPC(rpc *pubsub.RPC, p peer.ID) {
if rpc != nil && len(rpc.Publish) > 0 {
for i := range rpc.Publish {
if rpc.Publish[i] != nil && rpc.Publish[i].Topic != nil && *rpc.Publish[i].Topic == TXTopicName {
transactionMessagesP2PSentMessages.Inc(nil)
transactionMessagesP2PSentBytes.AddUint64(uint64(len(rpc.Publish[0].Data)), nil)
networkP2PSentBytesByTag.Add(string(ap.Transaction), uint64(len(rpc.Publish[0].Data)))
networkP2PMessageSentByTag.Add(string(ap.Transaction), 1)

Check warning on line 106 in network/p2p/pubsubTracer.go

View check run for this annotation

Codecov / codecov/patch

network/p2p/pubsubTracer.go#L101-L106

Added lines #L101 - L106 were not covered by tests
}
}
}
Expand Down
8 changes: 7 additions & 1 deletion network/p2pNetwork.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ type p2pPeerStats struct {
txReceived atomic.Uint64
}

// gossipSubTags defines protocol messages that are relayed using GossipSub
var gossipSubTags = map[protocol.Tag]string{
protocol.TxnTag: p2p.TXTopicName,
}

// NewP2PNetwork returns an instance of GossipNode that uses the p2p.Service
func NewP2PNetwork(log logging.Logger, cfg config.Local, datadir string, phonebookAddresses []string, genesisID string, networkID protocol.NetworkID, node NodeInfo, identityOpts *identityOpts) (*P2PNetwork, error) {
const readBufferLen = 2048
Expand All @@ -214,7 +219,7 @@ func NewP2PNetwork(log logging.Logger, cfg config.Local, datadir string, phonebo
config: cfg,
genesisID: genesisID,
networkID: networkID,
topicTags: map[protocol.Tag]string{protocol.TxnTag: p2p.TXTopicName},
topicTags: gossipSubTags,
wsPeers: make(map[peer.ID]*wsPeer),
wsPeersToIDs: make(map[*wsPeer]peer.ID),
peerStats: make(map[peer.ID]*p2pPeerStats),
Expand Down Expand Up @@ -791,6 +796,7 @@ func (n *P2PNetwork) wsStreamHandler(ctx context.Context, p2pPeer peer.ID, strea
conn: &wsPeerConnP2P{stream: stream},
outgoing: !incoming,
identity: netIdentPeerID,
peerType: peerTypeP2P,
}
protos, err := n.pstore.GetProtocols(p2pPeer)
if err != nil {
Expand Down
30 changes: 0 additions & 30 deletions network/wsNetwork.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ import (
tools_network "github.com/algorand/go-algorand/tools/network"
"github.com/algorand/go-algorand/tools/network/dnssec"
"github.com/algorand/go-algorand/util"
"github.com/algorand/go-algorand/util/metrics"
)

const incomingThreads = 20
Expand Down Expand Up @@ -117,35 +116,6 @@ const wsMaxHeaderBytes = 4096
// used from the ReservedFDs pool, as this pool is meant for short-lived usage (dns queries, disk i/o, etc.)
const ReservedHealthServiceConnections = 10

var networkIncomingConnections = metrics.MakeGauge(metrics.NetworkIncomingConnections)
var networkOutgoingConnections = metrics.MakeGauge(metrics.NetworkOutgoingConnections)

var networkIncomingBufferMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_rx_buffer_micros_total", Description: "microseconds spent by incoming messages on the receive buffer"})
var networkHandleMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_rx_handle_micros_total", Description: "microseconds spent by protocol handlers in the receive thread"})

var networkBroadcasts = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcasts_total", Description: "number of broadcast operations"})
var networkBroadcastQueueFull = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcast_queue_full_total", Description: "number of messages that were drops due to full broadcast queue"})
var networkBroadcastQueueMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcast_queue_micros_total", Description: "microseconds broadcast requests sit on queue"})
var networkBroadcastSendMicros = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_broadcast_send_micros_total", Description: "microseconds spent broadcasting"})
var networkBroadcastsDropped = metrics.MakeCounter(metrics.MetricName{Name: "algod_broadcasts_dropped_total", Description: "number of broadcast messages not sent to any peer"})
var networkPeerBroadcastDropped = metrics.MakeCounter(metrics.MetricName{Name: "algod_peer_broadcast_dropped_total", Description: "number of broadcast messages not sent to some peer"})

var networkPeerIdentityDisconnect = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_identity_duplicate", Description: "number of times identity challenge cause us to disconnect a peer"})
var networkPeerIdentityError = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_identity_error", Description: "number of times an error occurs (besides expected) when processing identity challenges"})
var networkPeerAlreadyClosed = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_peer_already_closed", Description: "number of times a peer would be added but the peer connection is already closed"})

var networkSlowPeerDrops = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_slow_drops_total", Description: "number of peers dropped for being slow to send to"})
var networkIdlePeerDrops = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_idle_drops_total", Description: "number of peers dropped due to idle connection"})

var minPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_min_ping_seconds", Description: "Network round trip time to fastest peer in seconds."})
var meanPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_mean_ping_seconds", Description: "Network round trip time to average peer in seconds."})
var medianPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_median_ping_seconds", Description: "Network round trip time to median peer in seconds."})
var maxPing = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peer_max_ping_seconds", Description: "Network round trip time to slowest peer in seconds."})

var peers = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_peers", Description: "Number of active peers."})
var incomingPeers = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_incoming_peers", Description: "Number of active incoming peers."})
var outgoingPeers = metrics.MakeGauge(metrics.MetricName{Name: "algod_network_outgoing_peers", Description: "Number of active outgoing peers."})

// peerDisconnectionAckDuration defines the time we would wait for the peer disconnection to complete.
const peerDisconnectionAckDuration = 5 * time.Second

Expand Down
Loading

0 comments on commit 34f8746

Please sign in to comment.