Skip to content

Commit

Permalink
Merge pull request #324 from jzwlqx/feature/metrics-helper
Browse files Browse the repository at this point in the history
add probe help
  • Loading branch information
jzwlqx authored Dec 10, 2024
2 parents ff82d54 + 1856efc commit 2d9605d
Show file tree
Hide file tree
Showing 16 changed files with 192 additions and 72 deletions.
13 changes: 9 additions & 4 deletions pkg/exporter/probe/legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ func BuildStandardMetricsLabelValues(entity *nettop.Entity) []string {
return append(metaPodLabels, BuildAdditionalLabelsValues(entity.GetLabels())...)
}

type LegacyMetric struct {
Name string
Help string
}

func InitAdditionalLabels(additionalLabels []string) error {
if len(additionalLabels) == 0 {
return nil
Expand Down Expand Up @@ -84,15 +89,15 @@ func newMetricsName(module, name string) string {

type LegacyCollector func() (map[string]map[uint32]uint64, error)

func NewLegacyBatchMetrics(module string, metrics []string, collector LegacyCollector) prometheus.Collector {
func NewLegacyBatchMetrics(module string, metrics []LegacyMetric, collector LegacyCollector) prometheus.Collector {
return newLegacyBatchMetrics(module, metrics, collector)
}

func newLegacyBatchMetrics(module string, metrics []string, collector LegacyCollector) prometheus.Collector {
func newLegacyBatchMetrics(module string, metrics []LegacyMetric, collector LegacyCollector) prometheus.Collector {
descs := make(map[string]*prometheus.Desc)
for _, m := range metrics {
newName := newMetricsName(module, m)
descs[newName] = prometheus.NewDesc(newName, "", StandardMetricsLabels, nil)
newName := newMetricsName(module, m.Name)
descs[newName] = prometheus.NewDesc(newName, m.Help, StandardMetricsLabels, nil)
}
return &legacyBatchMetrics{
module: module,
Expand Down
16 changes: 14 additions & 2 deletions pkg/exporter/probe/nlconntrack/conntrackmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,19 @@ var (
MaxEntries = "maxentries"

// stats of conntrack status summary
conntrackMetrics = []string{Found, Invalid, Ignore, Insert, InsertFailed, Drop, EarlyDrop, Error, SearchRestart, Entries, MaxEntries}
conntrackMetrics = []probe.LegacyMetric{
{Name: Found, Help: "The total number of tracked connections found in the conntrack table."},
{Name: Invalid, Help: "The total number of invalid connections encountered."},
{Name: Ignore, Help: "The total number of connections that were ignored by the conntrack module."},
{Name: Insert, Help: "The total number of connections inserted into the conntrack table."},
{Name: InsertFailed, Help: "The total number of failed attempts to insert a connection into the conntrack table."},
{Name: Drop, Help: "The total number of connections dropped from the conntrack table."},
{Name: EarlyDrop, Help: "The total number of connections dropped early before they were fully established."},
{Name: Error, Help: "The total number of errors encountered while managing connections in the conntrack table."},
{Name: SearchRestart, Help: "The total number of times the search for a connection entry was restarted."},
{Name: Entries, Help: "The current number of connections tracked in the conntrack table."},
{Name: MaxEntries, Help: "The maximum number of entries allowed in the conntrack table."},
}
)

func metricsProbeCreator() (probe.MetricsProbe, error) {
Expand Down Expand Up @@ -80,7 +92,7 @@ func (c *conntrackMetricsProbe) CollectOnce() (map[string]map[uint32]uint64, err
}

for _, metric := range conntrackMetrics {
resMap[metric] = map[uint32]uint64{uint32(nettop.InitNetns): stats[metric]}
resMap[metric.Name] = map[uint32]uint64{uint32(nettop.InitNetns): stats[metric.Name]}
}

return resMap, nil
Expand Down
11 changes: 9 additions & 2 deletions pkg/exporter/probe/nlqdisc/nlqdiscstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,14 @@ var (
Backlog = "backlog"
Overlimits = "overlimits"

qdiscMetrics = []string{Bytes, Packets, Drops, Qlen, Backlog, Overlimits}
qdiscMetrics = []probe.LegacyMetric{
{Name: Bytes, Help: "The total number of bytes transmitted through the queuing discipline."},
{Name: Packets, Help: "The total number of packets transmitted through the queuing discipline."},
{Name: Drops, Help: "The total number of packets dropped by the queuing discipline."},
{Name: Qlen, Help: "The current length of the queue (the number of packets queued)."},
{Name: Backlog, Help: "The total amount of data currently in the queue (in bytes)."},
{Name: Overlimits, Help: "The total number of packets that exceeded the configured limits."},
}
)

func init() {
Expand Down Expand Up @@ -80,7 +87,7 @@ func (p *Probe) Stop(_ context.Context) error {
func (p *Probe) CollectOnce() (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)
for _, metric := range qdiscMetrics {
resMap[metric] = make(map[uint32]uint64)
resMap[metric.Name] = make(map[uint32]uint64)
}

ets := nettop.GetAllUniqueNetnsEntity()
Expand Down
4 changes: 2 additions & 2 deletions pkg/exporter/probe/procfd/procfd.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ func fdProbeCreator() (probe.MetricsProbe, error) {
Subsystem: probeName,
VariableLabels: probe.StandardMetricsLabels,
SingleMetricsOpts: []probe.SingleMetricsOpts{
{Name: OpenFD, ValueType: prometheus.GaugeValue},
{Name: OpenSocket, ValueType: prometheus.GaugeValue},
{Name: OpenFD, Help: "The total number of open file descriptors for the process", ValueType: prometheus.GaugeValue},
{Name: OpenSocket, Help: "The total number of open sockets for the process", ValueType: prometheus.GaugeValue},
},
}
metrics := probe.NewBatchMetrics(opts, p.collectOnce)
Expand Down
8 changes: 4 additions & 4 deletions pkg/exporter/probe/procio/procio.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ func ioProbeCreator() (probe.MetricsProbe, error) {
Subsystem: probeName,
VariableLabels: probe.StandardMetricsLabels,
SingleMetricsOpts: []probe.SingleMetricsOpts{
{Name: IOReadSyscall, ValueType: prometheus.CounterValue},
{Name: IOWriteSyscall, ValueType: prometheus.CounterValue},
{Name: IOReadBytes, ValueType: prometheus.CounterValue},
{Name: IOWriteBytes, ValueType: prometheus.CounterValue},
{Name: IOReadSyscall, Help: "The total number of read system calls made by the process", ValueType: prometheus.CounterValue},
{Name: IOWriteSyscall, Help: "The total number of write system calls made by the process", ValueType: prometheus.CounterValue},
{Name: IOReadBytes, Help: "The total number of bytes read by the process", ValueType: prometheus.CounterValue},
{Name: IOWriteBytes, Help: "The total number of bytes written by the process", ValueType: prometheus.CounterValue},
},
}
metrics := probe.NewBatchMetrics(opts, p.collectOnce)
Expand Down
8 changes: 7 additions & 1 deletion pkg/exporter/probe/procipvs/ipvsservicestats.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ var (
IncomingBytes = "incomingbytes"
OutgoingBytes = "outgoingbytes"

IPVSMetrics = []string{Connections, IncomingPackets, OutgoingBytes, IncomingBytes, OutgoingPackets}
IPVSMetrics = []probe.LegacyMetric{
{Name: Connections, Help: "The total number of connections handled by the IPVS (IP Virtual Server)"},
{Name: IncomingPackets, Help: "The total number of incoming packets processed by the IPVS"},
{Name: OutgoingBytes, Help: "The total number of bytes sent out by the IPVS"},
{Name: IncomingBytes, Help: "The total number of bytes received by the IPVS"},
{Name: OutgoingPackets, Help: "The total number of outgoing packets processed by the IPVS"},
}
)

func init() {
Expand Down
13 changes: 11 additions & 2 deletions pkg/exporter/probe/procnetdev/procnetdev.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,16 @@ const (
)

var (
NetdevMetrics = []string{RxBytes, RxErrors, TxBytes, TxErrors, RxPackets, RxDropped, TxPackets, TxDropped}
NetdevMetrics = []probe.LegacyMetric{
{Name: RxBytes, Help: "The total number of bytes received on the network interface."},
{Name: RxErrors, Help: "The total number of errors encountered while receiving on the network interface."},
{Name: TxBytes, Help: "The total number of bytes transmitted on the network interface."},
{Name: TxErrors, Help: "The total number of errors encountered while transmitting on the network interface."},
{Name: RxPackets, Help: "The total number of packets received on the network interface."},
{Name: RxDropped, Help: "The total number of received packets that were dropped on the network interface."},
{Name: TxPackets, Help: "The total number of packets transmitted on the network interface."},
{Name: TxDropped, Help: "The total number of transmitted packets that were dropped on the network interface."},
}
)

func init() {
Expand Down Expand Up @@ -60,7 +69,7 @@ func (s *ProcNetdev) CollectOnce() (map[string]map[uint32]uint64, error) {
func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)
for _, m := range NetdevMetrics {
resMap[m] = make(map[uint32]uint64)
resMap[m.Name] = make(map[uint32]uint64)
}

netdev := getAllNetdev(nslist)
Expand Down
74 changes: 38 additions & 36 deletions pkg/exporter/probe/procnetstat/procnetstat.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,38 +60,40 @@ const (
)

var (
TCPExtMetrics = []string{TCPListenDrops,
TCPListenOverflows,
TCPSynRetrans,
TCPFastRetrans,
TCPRetransFail,
TCPTimeouts,
TCPAbortOnClose,
TCPAbortOnMemory,
TCPAbortOnTimeout,
TCPAbortOnLinger,
TCPAbortOnData,
TCPAbortFailed,
TCPACKSkippedSynRecv,
TCPACKSkippedPAWS,
TCPACKSkippedSeq,
TCPACKSkippedFinWait2,
TCPACKSkippedTimeWait,
TCPACKSkippedChallenge,
TCPRcvQDrop,
TCPMemoryPressures,
TCPMemoryPressuresChrono,
PAWSActive,
PAWSEstab,
EmbryonicRsts,
TCPWinProbe,
TCPKeepAlive,
TCPMTUPFail,
TCPMTUPSuccess,
TCPZeroWindowDrop,
TCPBacklogDrop,
PFMemallocDrop,
TCPWqueueTooBig}
TCPExtMetrics = []probe.LegacyMetric{
{Name: TCPListenDrops, Help: "The total number of TCP connection requests that were dropped because the listen queue was full."},
{Name: TCPListenOverflows, Help: "The total number of times the TCP listen queue has overflown."},
{Name: TCPSynRetrans, Help: "The total number of SYN packets that were retransmitted."},
{Name: TCPFastRetrans, Help: "The total number of fast retransmissions made by TCP."},
{Name: TCPRetransFail, Help: "The total number of failed retransmissions in TCP."},
{Name: TCPTimeouts, Help: "The total number of TCP timeouts."},
{Name: TCPAbortOnClose, Help: "The number of TCP connections that were aborted on close."},
{Name: TCPAbortOnMemory, Help: "The number of TCP connections that were aborted due to memory allocation failures."},
{Name: TCPAbortOnTimeout, Help: "The number of TCP connections that were aborted due to timeouts."},
{Name: TCPAbortOnLinger, Help: "The number of TCP connections that were aborted due to linger timeouts."},
{Name: TCPAbortOnData, Help: "The number of TCP connections that were aborted due to data-related issues."},
{Name: TCPAbortFailed, Help: "The number of attempts to abort TCP connections that failed."},
{Name: TCPACKSkippedSynRecv, Help: "The number of ACKs skipped while in SYN_RECV state."},
{Name: TCPACKSkippedPAWS, Help: "The number of ACKs skipped due to PAWS (Protection Against Wrapped Sequence numbers)."},
{Name: TCPACKSkippedSeq, Help: "The number of ACKs skipped due to sequence number issues."},
{Name: TCPACKSkippedFinWait2, Help: "The number of ACKs skipped while in FIN_WAIT_2 state."},
{Name: TCPACKSkippedTimeWait, Help: "The number of ACKs skipped while in TIME_WAIT state."},
{Name: TCPACKSkippedChallenge, Help: "The number of ACKs skipped due to challenges in the communication."},
{Name: TCPRcvQDrop, Help: "The total number of received packets that were dropped due to queue overflow."},
{Name: TCPMemoryPressures, Help: "The total number of occasions where the TCP stack experienced memory pressure."},
{Name: TCPMemoryPressuresChrono, Help: "Chronological count of TCP memory pressure events."},
{Name: PAWSActive, Help: "Indicates whether the PAWS mechanism is active."},
{Name: PAWSEstab, Help: "The number of established connections utilizing PAWS."},
{Name: EmbryonicRsts, Help: "The number of embryonic (half-open) connections that were reset."},
{Name: TCPWinProbe, Help: "The total number of window probes sent to check for window size."},
{Name: TCPKeepAlive, Help: "The total number of TCP keepalive packets sent."},
{Name: TCPMTUPFail, Help: "The total number of MTU (Maximum Transmission Unit) probe failures."},
{Name: TCPMTUPSuccess, Help: "The total number of successful MTU (Maximum Transmission Unit) discoveries."},
{Name: TCPZeroWindowDrop, Help: "The total number of packets dropped due to a zero window condition."},
{Name: TCPBacklogDrop, Help: "The total number of packets dropped from the TCP backlog queue."},
{Name: PFMemallocDrop, Help: "The total number of packets dropped due to PF_MEMALLOC allocations failing."},
{Name: TCPWqueueTooBig, Help: "The total number of TCP send queue drops due to the queue being too large."},
}
)

func init() {
Expand Down Expand Up @@ -129,7 +131,7 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)

for _, stat := range TCPExtMetrics {
resMap[stat] = make(map[uint32]uint64)
resMap[stat.Name] = make(map[uint32]uint64)
}

for _, et := range nslist {
Expand All @@ -141,13 +143,13 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {

extstats := stats[ProtocolTCPExt]
for _, stat := range TCPExtMetrics {
if _, ok := extstats[stat]; ok {
data, err := strconv.ParseUint(extstats[stat], 10, 64)
if _, ok := extstats[stat.Name]; ok {
data, err := strconv.ParseUint(extstats[stat.Name], 10, 64)
if err != nil {
log.Errorf("%s failed parse stat %s, pid: %d err: %v", probeName, stat, et.GetPid(), err)
continue
}
resMap[stat][uint32(et.GetNetns())] += data
resMap[stat.Name][uint32(et.GetNetns())] += data
}
}
}
Expand Down
52 changes: 47 additions & 5 deletions pkg/exporter/probe/procsnmp/procsnmp.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,53 @@ const (
)

var (
TCPStatMetrcis = []string{TCPActiveOpens, TCPPassiveOpens, TCPRetransSegs, TCPAttemptFails, TCPEstabResets, TCPCurrEstab, TCPInSegs, TCPOutSegs, TCPInErrs, TCPOutRsts}
UDPStatMetrics = []string{UDPInDatagrams, UDPNoPorts, UDPInErrors, UDPOutDatagrams, UDPRcvbufErrors, UDPSndbufErrors, UDPInCsumErrors, UDPIgnoredMulti}
IPMetrics = []string{IPForwarding, IPDefaultTTL, IPInReceives, IPInHdrErrors, IPInAddrErrors, IPForwDatagrams, IPInUnknownProtos, IPInDiscards, IPInDelivers, IPOutRequests, IPOutDiscards, IPOutNoRoutes, IPReasmTimeout, IPReasmReqds, IPReasmOKs, IPReasmFails, IPFragOKs, IPFragFails, IPFragCreates}
TCPStatMetrcis = []probe.LegacyMetric{
{Name: TCPActiveOpens, Help: "The number of active TCP connections opened."},
{Name: TCPPassiveOpens, Help: "The number of passive TCP connections opened (i.e., connections established by accepting incoming connections)."},
{Name: TCPRetransSegs, Help: "The total number of segments that have been retransmitted."},
{Name: TCPAttemptFails, Help: "The number of failed attempts to establish a TCP connection."},
{Name: TCPEstabResets, Help: "The number of established TCP connections that were reset."},
{Name: TCPCurrEstab, Help: "The current number of established TCP connections."},
{Name: TCPInSegs, Help: "The total number of TCP segments received."},
{Name: TCPOutSegs, Help: "The total number of TCP segments sent."},
{Name: TCPInErrs, Help: "The total number of erroneous packets received on TCP."},
{Name: TCPOutRsts, Help: "The total number of TCP segments sent with the RST flag set."},
}

UDPStatMetrics = []probe.LegacyMetric{
{Name: UDPInDatagrams, Help: "The total number of UDP datagrams received."},
{Name: UDPNoPorts, Help: "The total number of UDP datagrams received for which there was no port at the destination."},
{Name: UDPInErrors, Help: "The total number of erroneous received UDP packets."},
{Name: UDPOutDatagrams, Help: "The total number of UDP datagrams sent."},
{Name: UDPRcvbufErrors, Help: "The total number of UDP datagrams dropped due to socket receive buffer errors."},
{Name: UDPSndbufErrors, Help: "The total number of UDP datagrams dropped due to socket send buffer errors."},
{Name: UDPInCsumErrors, Help: "The total number of UDP datagrams received with a checksum error."},
{Name: UDPIgnoredMulti, Help: "The total number of received UDP multicast packets that were ignored."},
}

IPMetrics = []probe.LegacyMetric{
{Name: IPForwarding, Help: "Indicates whether IP forwarding is enabled (1 for enabled, 0 for disabled)."},
{Name: IPDefaultTTL, Help: "The default time-to-live (TTL) value for IP packets."},
{Name: IPInReceives, Help: "The total number of IP packets received."},
{Name: IPInHdrErrors, Help: "The total number of received IP packets that had a header error."},
{Name: IPInAddrErrors, Help: "The total number of received IP packets that were discarded due to address errors."},
{Name: IPForwDatagrams, Help: "The total number of IP packets forwarded by this machine."},
{Name: IPInUnknownProtos, Help: "The total number of received IP packets for which the protocol is not known."},
{Name: IPInDiscards, Help: "The total number of received IP packets that were discarded."},
{Name: IPInDelivers, Help: "The total number of delivered IP packets."},
{Name: IPOutRequests, Help: "The total number of IP packets sent out."},
{Name: IPOutDiscards, Help: "The total number of outgoing IP packets that were discarded."},
{Name: IPOutNoRoutes, Help: "The total number of outgoing IP packets for which no route could be found."},
{Name: IPReasmTimeout, Help: "The total number of times that IP reassembly timed out."},
{Name: IPReasmReqds, Help: "The total number of IP reassembly requests made."},
{Name: IPReasmOKs, Help: "The total number of successful IP reassembly operations."},
{Name: IPReasmFails, Help: "The total number of failed IP reassembly operations."},
{Name: IPFragOKs, Help: "The total number of IP packets that were fragmented successfully."},
{Name: IPFragFails, Help: "The total number of IP packets that failed to fragment."},
{Name: IPFragCreates, Help: "The total number of IP fragments created."},
}

metricsMap = map[string][]string{
metricsMap = map[string][]probe.LegacyMetric{
TCP: TCPStatMetrcis,
UDP: UDPStatMetrics,
IP: IPMetrics,
Expand Down Expand Up @@ -165,7 +207,7 @@ func collect() (map[string]map[string]map[uint32]uint64, error) {
for proto, metricsList := range metricsMap {
res[proto] = make(map[string]map[uint32]uint64)
for _, metrics := range metricsList {
res[proto][metrics] = make(map[uint32]uint64)
res[proto][metrics.Name] = make(map[uint32]uint64)
}
}

Expand Down
13 changes: 10 additions & 3 deletions pkg/exporter/probe/procsock/procsock.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,15 @@ const (
)

var (
TCPSockStatMetrics = []string{TCPSockInuse, TCPSockOrphan, TCPSockTimewait, TCPSockeAlloc, TCPSockeMem}
probeName = "sock"
TCPSockStatMetrics = []probe.LegacyMetric{
{Name: TCPSockInuse, Help: "The total number of TCP sockets currently in use."},
{Name: TCPSockOrphan, Help: "The total number of orphaned TCP sockets."},
{Name: TCPSockTimewait, Help: "The total number of TCP sockets in the TIME_WAIT state."},
{Name: TCPSockeAlloc, Help: "The total number of TCP sockets allocated."},
{Name: TCPSockeMem, Help: "The total amount of memory allocated for TCP sockets."},
}

probeName = "sock"
)

func init() {
Expand Down Expand Up @@ -70,7 +77,7 @@ type tcpsockstat struct {
func collect() (resMap map[string]map[uint32]uint64, err error) {
resMap = make(map[string]map[uint32]uint64)
for _, stat := range TCPSockStatMetrics {
resMap[stat] = map[uint32]uint64{}
resMap[stat.Name] = map[uint32]uint64{}
}

// for _, nslogic := range nslist {
Expand Down
9 changes: 6 additions & 3 deletions pkg/exporter/probe/procsoftnet/procsoftnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ const (
)

var (
softnetMetrics = []string{SNProcessed, SNDropped}
softnetMetrics = []probe.LegacyMetric{
{Name: SNProcessed, Help: "The total number of packets processed by the softnet layer"},
{Name: SNDropped, Help: "The total number of packets dropped by the softnet layer"},
}
)

func init() {
Expand Down Expand Up @@ -63,7 +66,7 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)

for _, m := range softnetMetrics {
resMap[m] = map[uint32]uint64{}
resMap[m.Name] = map[uint32]uint64{}
}

for _, ns := range nslist {
Expand All @@ -72,7 +75,7 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
continue
}
for _, m := range softnetMetrics {
resMap[m][uint32(ns.GetNetns())] = stat[m]
resMap[m.Name][uint32(ns.GetNetns())] = stat[m.Name]
}
}
return resMap, nil
Expand Down
Loading

0 comments on commit 2d9605d

Please sign in to comment.