From 3580c73c5802c507bf9b76f45b6922bbb3815b5b Mon Sep 17 00:00:00 2001 From: Navid Yaghoobi Date: Sun, 9 Jun 2024 09:39:13 +1000 Subject: [PATCH] new metric - container network dropped,errors,packets Signed-off-by: Navid Yaghoobi --- .packit.yaml | 12 +- README.md | 95 ++++----- collector/container.go | 463 +++++++++++++++++++++++++---------------- pdcs/container.go | 72 ++++--- 4 files changed, 372 insertions(+), 270 deletions(-) diff --git a/.packit.yaml b/.packit.yaml index 0909f3dc2..57e6e61d0 100644 --- a/.packit.yaml +++ b/.packit.yaml @@ -12,18 +12,14 @@ jobs: - openssl-devel - rpkg targets: - - fedora-all-x86_64 - - fedora-all-aarch64 - - fedora-all-ppc64le - - fedora-all-s390x + - fedora-latest-stable + - fedora-development - epel-9-x86_64 - epel-9-aarch64 - - epel-9-ppc64le - - epel-9-s390x - centos-stream-9-x86_64 - centos-stream-9-aarch64 - - centos-stream-9-ppc64le - - centos-stream-9-s390x + - centos-stream-10-x86_64 + - centos-stream-10-aarch64 actions: post-upstream-clone: - "rpkg spec --outdir ./" diff --git a/README.md b/README.md index 555a56c2f..6fa125963 100644 --- a/README.md +++ b/README.md @@ -88,137 +88,116 @@ The table below list all existing collector and their description. # TYPE podman_container_info gauge podman_container_info{id="19286a13dc23",image="docker.io/library/sonarqube:latest",name="sonar01",pod_id="",pod_name="",ports="0.0.0.0:9000->9000/tcp"} 1 podman_container_info{id="482113b805f7",image="docker.io/library/httpd:latest",name="web_server",pod_id="",pod_name="",ports="0.0.0.0:8000->80/tcp"} 1 -podman_container_info{id="642490688d9c",image="docker.io/grafana/grafana:latest",name="grafana",pod_id="",pod_name="",ports="0.0.0.0:3000->3000/tcp"} 1 -podman_container_info{id="ad36e85960a1",image="docker.io/library/busybox:latest",name="busybox01",pod_id="3e8bae64e9af",pod_name="pod01",ports=""} 1 -podman_container_info{id="dda983cc3ecf",image="localhost/podman-pause:4.1.0-1651853754",name="3e8bae64e9af-infra",pod_id="3e8bae64e9af",pod_name="pod01",ports=""} 1 # HELP podman_container_state Container current state (-1=unknown,0=created,1=initialized,2=running,3=stopped,4=paused,5=exited,6=removing,7=stopping). # TYPE podman_container_state gauge podman_container_state{id="19286a13dc23",pod_id="",pod_name=""} 2 podman_container_state{id="482113b805f7",pod_id="",pod_name=""} 4 -podman_container_state{id="642490688d9c",pod_id="",pod_name=""} 2 -podman_container_state{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 5 -podman_container_state{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 2 # HELP podman_container_block_input_total Container block input. # TYPE podman_container_block_input_total counter podman_container_block_input_total{id="19286a13dc23",pod_id="",pod_name=""} 49152 podman_container_block_input_total{id="482113b805f7",pod_id="",pod_name=""} 0 -podman_container_block_input_total{id="642490688d9c",pod_id="",pod_name=""} 1.41533184e+08 -podman_container_block_input_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_block_input_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 0 # HELP podman_container_block_output_total Container block output. # TYPE podman_container_block_output_total counter podman_container_block_output_total{id="19286a13dc23",pod_id="",pod_name=""} 1.790976e+06 podman_container_block_output_total{id="482113b805f7",pod_id="",pod_name=""} 8192 -podman_container_block_output_total{id="642490688d9c",pod_id="",pod_name=""} 4.69248e+07 -podman_container_block_output_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_block_output_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 0 # HELP podman_container_cpu_seconds_total total CPU time spent for container in seconds. # TYPE podman_container_cpu_seconds_total counter podman_container_cpu_seconds_total{id="19286a13dc23",pod_id="",pod_name=""} 83.231904 podman_container_cpu_seconds_total{id="482113b805f7",pod_id="",pod_name=""} 0.069712 -podman_container_cpu_seconds_total{id="642490688d9c",pod_id="",pod_name=""} 3.028685 -podman_container_cpu_seconds_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_cpu_seconds_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 0.011687 # HELP podman_container_cpu_system_seconds_total total system CPU time spent for container in seconds. # TYPE podman_container_cpu_system_seconds_total counter podman_container_cpu_system_seconds_total{id="19286a13dc23",pod_id="",pod_name=""} 0.007993418 podman_container_cpu_system_seconds_total{id="482113b805f7",pod_id="",pod_name=""} 4.8591e-05 -podman_container_cpu_system_seconds_total{id="642490688d9c",pod_id="",pod_name=""} 0.00118734 -podman_container_cpu_system_seconds_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_cpu_system_seconds_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 9.731e-06 # HELP podman_container_created_seconds Container creation time in unixtime. # TYPE podman_container_created_seconds gauge podman_container_created_seconds{id="19286a13dc23",pod_id="",pod_name=""} 1.655859887e+09 podman_container_created_seconds{id="482113b805f7",pod_id="",pod_name=""} 1.655859728e+09 -podman_container_created_seconds{id="642490688d9c",pod_id="",pod_name=""} 1.655859511e+09 -podman_container_created_seconds{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.655859858e+09 -podman_container_created_seconds{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 1.655859839e+09 # HELP podman_container_started_seconds Container started time in unixtime. # TYPE podman_container_started_seconds gauge podman_container_started_seconds{id="19286a13dc23",pod_id="",pod_name=""} 1.659253804e+09 podman_container_started_seconds{id="482113b805f7",pod_id="",pod_name=""} 1.659253804e+09 -podman_container_started_seconds{id="642490688d9c",pod_id="",pod_name=""} 1.660642996e+09 -podman_container_started_seconds{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.66064284e+09 -podman_container_started_seconds{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 1.66064284e+09 # HELP podman_container_exit_code Container exit code, if the container has not exited or restarted then the exit code will be 0. # TYPE podman_container_exit_code gauge -podman_container_exit_code{id="19286a13dc23",pod_id="",pod_name=""} 0 +podman_container_exit_code{id="19286a13dc23",pod_id="",pod_name=""} 130 podman_container_exit_code{id="482113b805f7",pod_id="",pod_name=""} 0 -podman_container_exit_code{id="642490688d9c",pod_id="",pod_name=""} 0 -podman_container_exit_code{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 130 -podman_container_exit_code{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 0 # HELP podman_container_exited_seconds Container exited time in unixtime. # TYPE podman_container_exited_seconds gauge podman_container_exited_seconds{id="19286a13dc23",pod_id="",pod_name=""} 1.659253805e+09 podman_container_exited_seconds{id="482113b805f7",pod_id="",pod_name=""} 1.659253805e+09 -podman_container_exited_seconds{id="642490688d9c",pod_id="",pod_name=""} 1.659253804e+09 -podman_container_exited_seconds{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.660643511e+09 -podman_container_exited_seconds{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 1.660643511e+09 # HELP podman_container_mem_limit_bytes Container memory limit. # TYPE podman_container_mem_limit_bytes gauge podman_container_mem_limit_bytes{id="19286a13dc23",pod_id="",pod_name=""} 9.713655808e+09 podman_container_mem_limit_bytes{id="482113b805f7",pod_id="",pod_name=""} 9.713655808e+09 -podman_container_mem_limit_bytes{id="642490688d9c",pod_id="",pod_name=""} 9.713655808e+09 -podman_container_mem_limit_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_mem_limit_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 9.713655808e+09 # HELP podman_container_mem_usage_bytes Container memory usage. # TYPE podman_container_mem_usage_bytes gauge podman_container_mem_usage_bytes{id="19286a13dc23",pod_id="",pod_name=""} 1.029062656e+09 podman_container_mem_usage_bytes{id="482113b805f7",pod_id="",pod_name=""} 2.748416e+06 -podman_container_mem_usage_bytes{id="642490688d9c",pod_id="",pod_name=""} 3.67616e+07 -podman_container_mem_usage_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_mem_usage_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 49152 + +# HELP podman_container_net_input_dropped_total Container network input dropped. +# TYPE podman_container_net_input_dropped_total counter +podman_container_net_input_dropped_total{id="19286a13dc23",pod_id="",pod_name=""} 0 +podman_container_net_input_dropped_total{id="482113b805f7",pod_id="",pod_name=""} 0 + +# HELP podman_container_net_input_errors_total Container network input errors. +# TYPE podman_container_net_input_errors_total counter +podman_container_net_input_errors_total{id="19286a13dc23",pod_id="",pod_name=""} 0 +podman_container_net_input_errors_total{id="482113b805f7",pod_id="",pod_name=""} 0 + +# HELP podman_container_net_input_packets_total Container network input packets. +# TYPE podman_container_net_input_packets_total counter +podman_container_net_input_packets_total{id="19286a13dc23",pod_id="",pod_name=""} 26 +podman_container_net_input_packets_total{id="482113b805f7",pod_id="",pod_name=""} 0 # HELP podman_container_net_input_total Container network input. # TYPE podman_container_net_input_total counter -podman_container_net_input_total{id="19286a13dc23",pod_id="",pod_name=""} 430 -podman_container_net_input_total{id="482113b805f7",pod_id="",pod_name=""} 430 -podman_container_net_input_total{id="642490688d9c",pod_id="",pod_name=""} 4323 -podman_container_net_input_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_net_input_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 430 +podman_container_net_input_total{id="19286a13dc23",pod_id="",pod_name=""} 2060 +podman_container_net_input_total{id="482113b805f7",pod_id="",pod_name=""} 0 + +# HELP podman_container_net_output_dropped_total Container network output dropped. +# TYPE podman_container_net_output_dropped_total counter +podman_container_net_output_dropped_total{id="19286a13dc23",pod_id="",pod_name=""} 0 +podman_container_net_output_dropped_total{id="482113b805f7",pod_id="",pod_name=""} 0 + +# HELP podman_container_net_output_errors_total Container network output errors. +# TYPE podman_container_net_output_errors_total counter +podman_container_net_output_errors_total{id="19286a13dc23",pod_id="",pod_name=""} 0 +podman_container_net_output_errors_total{id="482113b805f7",pod_id="",pod_name=""} 0 + +# HELP podman_container_net_output_packets_total Container network output packets. +# TYPE podman_container_net_output_packets_total counter +podman_container_net_output_packets_total{id="19286a13dc23",pod_id="",pod_name=""} 13 +podman_container_net_output_packets_total{id="482113b805f7",pod_id="",pod_name=""} 0 # HELP podman_container_net_output_total Container network output. # TYPE podman_container_net_output_total counter -podman_container_net_output_total{id="19286a13dc23",pod_id="",pod_name=""} 110 -podman_container_net_output_total{id="482113b805f7",pod_id="",pod_name=""} 110 -podman_container_net_output_total{id="642490688d9c",pod_id="",pod_name=""} 12071 -podman_container_net_output_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_net_output_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 110 +podman_container_net_output_total{id="19286a13dc23",pod_id="",pod_name=""} 978 +podman_container_net_output_total{id="482113b805f7",pod_id="",pod_name=""} 0 # HELP podman_container_pids Container pid number. # TYPE podman_container_pids gauge podman_container_pids{id="19286a13dc23",pod_id="",pod_name=""} 94 podman_container_pids{id="482113b805f7",pod_id="",pod_name=""} 82 -podman_container_pids{id="642490688d9c",pod_id="",pod_name=""} 14 -podman_container_pids{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 -podman_container_pids{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 1 # HELP podman_container_rootfs_size_bytes Container root filesystem size in bytes. # TYPE podman_container_rootfs_size_bytes gauge podman_container_rootfs_size_bytes{id="19286a13dc23",pod_id="",pod_name=""} 1.452382e+06 podman_container_rootfs_size_bytes{id="482113b805f7",pod_id="",pod_name=""} 1.135744e+06 -podman_container_rootfs_size_bytes{id="642490688d9c",pod_id="",pod_name=""} 1.72771905e+08 -podman_container_rootfs_size_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.135744e+06 -podman_container_rootfs_size_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 1.035744e+06 # HELP podman_container_rw_size_bytes Container top read-write layer size in bytes. # TYPE podman_container_rw_size_bytes gauge -podman_container_rw_size_bytes{id="19286a13dc23",pod_id="",pod_name=""} 0 -podman_container_rw_size_bytes{id="482113b805f7",pod_id="",pod_name=""} 0 -podman_container_rw_size_bytes{id="642490688d9c",pod_id="",pod_name=""} 26261 -podman_container_rw_size_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 3551 -podman_container_rw_size_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 0 +podman_container_rw_size_bytes{id="19286a13dc23",pod_id="",pod_name=""} 26261 +podman_container_rw_size_bytes{id="482113b805f7",pod_id="",pod_name=""} 3551 ``` #### `pod` diff --git a/collector/container.go b/collector/container.go index 3114399cd..7099fd1a1 100644 --- a/collector/container.go +++ b/collector/container.go @@ -7,25 +7,31 @@ import ( ) type containerCollector struct { - info typedDesc - state typedDesc - health typedDesc - created typedDesc - started typedDesc - exited typedDesc - exitCode typedDesc - pids typedDesc - cpu typedDesc - cpuSystem typedDesc - memUsage typedDesc - memLimit typedDesc - netInput typedDesc - netOutput typedDesc - blockInput typedDesc - blockOutput typedDesc - rwSize typedDesc - rootFsSize typedDesc - logger log.Logger + info typedDesc + state typedDesc + health typedDesc + created typedDesc + started typedDesc + exited typedDesc + exitCode typedDesc + pids typedDesc + cpu typedDesc + cpuSystem typedDesc + memUsage typedDesc + memLimit typedDesc + netInput typedDesc + netInputDropped typedDesc + netInputErrors typedDesc + netInputPackets typedDesc + netOutput typedDesc + netOutputDropped typedDesc + netOutputErrors typedDesc + netOutputPackets typedDesc + blockInput typedDesc + blockOutput typedDesc + rwSize typedDesc + rootFsSize typedDesc + logger log.Logger } type containerDescLabels struct { @@ -79,9 +85,27 @@ func NewContainerStatsCollector(logger log.Logger) (Collector, error) { netInput: typedDesc{ nil, prometheus.CounterValue, }, + netInputDropped: typedDesc{ + nil, prometheus.CounterValue, + }, + netInputErrors: typedDesc{ + nil, prometheus.CounterValue, + }, + netInputPackets: typedDesc{ + nil, prometheus.CounterValue, + }, netOutput: typedDesc{ nil, prometheus.CounterValue, }, + netOutputDropped: typedDesc{ + nil, prometheus.CounterValue, + }, + netOutputErrors: typedDesc{ + nil, prometheus.CounterValue, + }, + netOutputPackets: typedDesc{ + nil, prometheus.CounterValue, + }, blockInput: typedDesc{ nil, prometheus.CounterValue, }, @@ -125,180 +149,259 @@ func (c *containerCollector) Update(ch chan<- prometheus.Metric) error { cntLabelsInfo.labels, nil, ) - stateDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "state"), - //nolint:lll - "Container current state (-1=unknown,0=created,1=initialized,2=running,3=stopped,4=paused,5=exited,6=removing,7=stopping).", - defaultContainersLabel, nil, - ) - - healthDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "health"), - "Container current health (-1=unknown,0=healthy,1=unhealthy,2=starting).", - defaultContainersLabel, nil, - ) - - createdDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "created_seconds"), - "Container creation time in unixtime.", - defaultContainersLabel, nil, - ) - - startedDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "started_seconds"), - "Container started time in unixtime.", - defaultContainersLabel, nil, - ) - - exitedDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "exited_seconds"), - "Container exited time in unixtime.", - defaultContainersLabel, nil, - ) - - exitedCodeDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "exit_code"), - "Container exit code, if the container has not exited or restarted then the exit code will be 0.", - defaultContainersLabel, nil, - ) - - pidsDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "pids"), - "Container pid number.", - defaultContainersLabel, nil, - ) - - cpuDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "cpu_seconds_total"), - "total CPU time spent for container in seconds.", - defaultContainersLabel, nil, - ) - - cpuSystemDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "cpu_system_seconds_total"), - "total system CPU time spent for container in seconds.", - defaultContainersLabel, nil, - ) - - memUsageDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "mem_usage_bytes"), - "Container memory usage.", - defaultContainersLabel, nil, - ) - - memLimitDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "mem_limit_bytes"), - "Container memory limit.", - defaultContainersLabel, nil, - ) - - netInputDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "net_input_total"), - "Container network input in bytes.", - defaultContainersLabel, nil, - ) - - netOutputDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "net_output_total"), - "Container network output in bytes.", - defaultContainersLabel, nil, - ) - - blockInputDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "block_input_total"), - "Container block input in bytes.", - defaultContainersLabel, nil, - ) - - blockOutputDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "block_output_total"), - "Container block output in bytes.", - defaultContainersLabel, nil, - ) - - rwSizeDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "rw_size_bytes"), - "Container top read-write layer size in bytes.", - defaultContainersLabel, nil, - ) - - rootFsSizeDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "rootfs_size_bytes"), - "Container root filesystem size in bytes.", - defaultContainersLabel, nil, - ) - c.info.desc = infoDesc - c.state.desc = stateDesc - c.health.desc = healthDesc - c.created.desc = createdDesc - c.started.desc = startedDesc - c.exited.desc = exitedDesc - c.exitCode.desc = exitedCodeDesc - c.pids.desc = pidsDesc - c.cpu.desc = cpuDesc - c.cpuSystem.desc = cpuSystemDesc - c.memUsage.desc = memUsageDesc - c.memLimit.desc = memLimitDesc - c.netInput.desc = netInputDesc - c.netOutput.desc = netOutputDesc - c.blockInput.desc = blockInputDesc - c.blockOutput.desc = blockOutputDesc - c.rwSize.desc = rwSizeDesc - c.rootFsSize.desc = rootFsSizeDesc cntStat := getContainerStat(rep.ID, statReports) ch <- c.info.mustNewConstMetric(1, cntLabelsInfo.labelsValue...) + c.updateInfo(ch, &rep, cntLabelsInfo, defaultContainersLabel, enhanceAllMetrics) //nolint:gosec + c.updateStats(ch, &rep, cntStat, cntLabelsInfo, defaultContainersLabel, enhanceAllMetrics) //nolint:gosec + } - if enhanceAllMetrics { - ch <- c.state.mustNewConstMetric(float64(rep.State), cntLabelsInfo.labelsValue...) - ch <- c.health.mustNewConstMetric(float64(rep.Health), cntLabelsInfo.labelsValue...) - ch <- c.created.mustNewConstMetric(float64(rep.Created), cntLabelsInfo.labelsValue...) - ch <- c.started.mustNewConstMetric(float64(rep.Started), cntLabelsInfo.labelsValue...) - ch <- c.exited.mustNewConstMetric(float64(rep.Exited), cntLabelsInfo.labelsValue...) - ch <- c.exitCode.mustNewConstMetric(float64(rep.ExitCode), cntLabelsInfo.labelsValue...) - ch <- c.rwSize.mustNewConstMetric(float64(rep.RwSize), cntLabelsInfo.labelsValue...) - ch <- c.rootFsSize.mustNewConstMetric(float64(rep.RootFsSize), cntLabelsInfo.labelsValue...) - - if cntStat != nil { - ch <- c.pids.mustNewConstMetric(float64(cntStat.PIDs), cntLabelsInfo.labelsValue...) - ch <- c.cpu.mustNewConstMetric(cntStat.CPU, cntLabelsInfo.labelsValue...) - ch <- c.cpuSystem.mustNewConstMetric(cntStat.CPUSystem, cntLabelsInfo.labelsValue...) - ch <- c.memUsage.mustNewConstMetric(float64(cntStat.MemUsage), cntLabelsInfo.labelsValue...) - ch <- c.memLimit.mustNewConstMetric(float64(cntStat.MemLimit), cntLabelsInfo.labelsValue...) - ch <- c.netInput.mustNewConstMetric(float64(cntStat.NetInput), cntLabelsInfo.labelsValue...) - ch <- c.netOutput.mustNewConstMetric(float64(cntStat.NetOutput), cntLabelsInfo.labelsValue...) - ch <- c.blockInput.mustNewConstMetric(float64(cntStat.BlockInput), cntLabelsInfo.labelsValue...) - ch <- c.blockOutput.mustNewConstMetric(float64(cntStat.BlockOutput), cntLabelsInfo.labelsValue...) - } + return nil +} - continue - } +func (c *containerCollector) updateInfo( + ch chan<- prometheus.Metric, + rep *pdcs.Container, + cntLabelsInfo *containerDescLabels, + defaultLabels []string, + enhance bool, +) { + stateDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "state"), + //nolint:lll + "Container current state (-1=unknown,0=created,1=initialized,2=running,3=stopped,4=paused,5=exited,6=removing,7=stopping).", + defaultLabels, nil, + ) + + healthDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "health"), + "Container current health (-1=unknown,0=healthy,1=unhealthy,2=starting).", + defaultLabels, nil, + ) + + createdDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "created_seconds"), + "Container creation time in unixtime.", + defaultLabels, nil, + ) + + startedDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "started_seconds"), + "Container started time in unixtime.", + defaultLabels, nil, + ) + + exitedDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "exited_seconds"), + "Container exited time in unixtime.", + defaultLabels, nil, + ) + + exitedCodeDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "exit_code"), + "Container exit code, if the container has not exited or restarted then the exit code will be 0.", + defaultLabels, nil, + ) + + rwSizeDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "rw_size_bytes"), + "Container top read-write layer size in bytes.", + defaultLabels, nil, + ) + + rootFsSizeDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "rootfs_size_bytes"), + "Container root filesystem size in bytes.", + defaultLabels, nil, + ) + + c.state.desc = stateDesc + c.health.desc = healthDesc + c.created.desc = createdDesc + c.started.desc = startedDesc + c.exited.desc = exitedDesc + c.exitCode.desc = exitedCodeDesc + c.rwSize.desc = rwSizeDesc + c.rootFsSize.desc = rootFsSizeDesc + + if enhance { + ch <- c.state.mustNewConstMetric(float64(rep.State), cntLabelsInfo.labelsValue...) + ch <- c.health.mustNewConstMetric(float64(rep.Health), cntLabelsInfo.labelsValue...) + ch <- c.created.mustNewConstMetric(float64(rep.Created), cntLabelsInfo.labelsValue...) + ch <- c.started.mustNewConstMetric(float64(rep.Started), cntLabelsInfo.labelsValue...) + ch <- c.exited.mustNewConstMetric(float64(rep.Exited), cntLabelsInfo.labelsValue...) + ch <- c.exitCode.mustNewConstMetric(float64(rep.ExitCode), cntLabelsInfo.labelsValue...) + ch <- c.rwSize.mustNewConstMetric(float64(rep.RwSize), cntLabelsInfo.labelsValue...) + ch <- c.rootFsSize.mustNewConstMetric(float64(rep.RootFsSize), cntLabelsInfo.labelsValue...) + + return + } - ch <- c.state.mustNewConstMetric(float64(rep.State), rep.ID, rep.PodID, rep.PodName) - ch <- c.health.mustNewConstMetric(float64(rep.Health), rep.ID, rep.PodID, rep.PodName) - ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.ID, rep.PodID, rep.PodName) - ch <- c.started.mustNewConstMetric(float64(rep.Started), rep.ID, rep.PodID, rep.PodName) - ch <- c.exited.mustNewConstMetric(float64(rep.Exited), rep.ID, rep.PodID, rep.PodName) - ch <- c.exitCode.mustNewConstMetric(float64(rep.ExitCode), rep.ID, rep.PodID, rep.PodName) - ch <- c.rwSize.mustNewConstMetric(float64(rep.RwSize), rep.ID, rep.PodID, rep.PodName) - ch <- c.rootFsSize.mustNewConstMetric(float64(rep.RootFsSize), rep.ID, rep.PodID, rep.PodName) + ch <- c.state.mustNewConstMetric(float64(rep.State), rep.ID, rep.PodID, rep.PodName) + ch <- c.health.mustNewConstMetric(float64(rep.Health), rep.ID, rep.PodID, rep.PodName) + ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.ID, rep.PodID, rep.PodName) + ch <- c.started.mustNewConstMetric(float64(rep.Started), rep.ID, rep.PodID, rep.PodName) + ch <- c.exited.mustNewConstMetric(float64(rep.Exited), rep.ID, rep.PodID, rep.PodName) + ch <- c.exitCode.mustNewConstMetric(float64(rep.ExitCode), rep.ID, rep.PodID, rep.PodName) + ch <- c.rwSize.mustNewConstMetric(float64(rep.RwSize), rep.ID, rep.PodID, rep.PodName) + ch <- c.rootFsSize.mustNewConstMetric(float64(rep.RootFsSize), rep.ID, rep.PodID, rep.PodName) +} +func (c *containerCollector) updateStats( + ch chan<- prometheus.Metric, + rep *pdcs.Container, + cntStat *pdcs.ContainerStat, + cntLabelsInfo *containerDescLabels, + defaultLabels []string, + enhance bool, +) { + pidsDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "pids"), + "Container pid number.", + defaultLabels, nil, + ) + + cpuDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "cpu_seconds_total"), + "total CPU time spent for container in seconds.", + defaultLabels, nil, + ) + + cpuSystemDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "cpu_system_seconds_total"), + "total system CPU time spent for container in seconds.", + defaultLabels, nil, + ) + + memUsageDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "mem_usage_bytes"), + "Container memory usage.", + defaultLabels, nil, + ) + + memLimitDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "mem_limit_bytes"), + "Container memory limit.", + defaultLabels, nil, + ) + + netInputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_input_total"), + "Container network input in bytes.", + defaultLabels, nil, + ) + + netInputDroppedDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_input_dropped_total"), + "Container network input dropped.", + defaultLabels, nil, + ) + + netInputErrorsDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_input_errors_total"), + "Container network input errors.", + defaultLabels, nil, + ) + + netInputPacketsDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_input_packets_total"), + "Container network input packets.", + defaultLabels, nil, + ) + + netOutputDroppedDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_output_dropped_total"), + "Container network output dropped.", + defaultLabels, nil, + ) + + netOutputErrorsDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_output_errors_total"), + "Container network output errors.", + defaultLabels, nil, + ) + + netOutputPacketsDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_output_packets_total"), + "Container network output packets.", + defaultLabels, nil, + ) + + netOutputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_output_total"), + "Container network output in bytes.", + defaultLabels, nil, + ) + + blockInputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "block_input_total"), + "Container block input in bytes.", + defaultLabels, nil, + ) + + blockOutputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "block_output_total"), + "Container block output in bytes.", + defaultLabels, nil, + ) + + c.pids.desc = pidsDesc + c.cpu.desc = cpuDesc + c.cpuSystem.desc = cpuSystemDesc + c.memUsage.desc = memUsageDesc + c.memLimit.desc = memLimitDesc + c.netInput.desc = netInputDesc + c.netInputDropped.desc = netInputDroppedDesc + c.netInputErrors.desc = netInputErrorsDesc + c.netInputPackets.desc = netInputPacketsDesc + c.netOutput.desc = netOutputDesc + c.netOutputDropped.desc = netOutputDroppedDesc + c.netOutputErrors.desc = netOutputErrorsDesc + c.netOutputPackets.desc = netOutputPacketsDesc + c.blockInput.desc = blockInputDesc + c.blockOutput.desc = blockOutputDesc + + if enhance { if cntStat != nil { - ch <- c.pids.mustNewConstMetric(float64(cntStat.PIDs), rep.ID, rep.PodID, rep.PodName) - ch <- c.cpu.mustNewConstMetric(cntStat.CPU, rep.ID, rep.PodID, rep.PodName) - ch <- c.cpuSystem.mustNewConstMetric(cntStat.CPUSystem, rep.ID, rep.PodID, rep.PodName) - ch <- c.memUsage.mustNewConstMetric(float64(cntStat.MemUsage), rep.ID, rep.PodID, rep.PodName) - ch <- c.memLimit.mustNewConstMetric(float64(cntStat.MemLimit), rep.ID, rep.PodID, rep.PodName) - ch <- c.netInput.mustNewConstMetric(float64(cntStat.NetInput), rep.ID, rep.PodID, rep.PodName) - ch <- c.netOutput.mustNewConstMetric(float64(cntStat.NetOutput), rep.ID, rep.PodID, rep.PodName) - ch <- c.blockInput.mustNewConstMetric(float64(cntStat.BlockInput), rep.ID, rep.PodID, rep.PodName) - ch <- c.blockOutput.mustNewConstMetric(float64(cntStat.BlockOutput), rep.ID, rep.PodID, rep.PodName) + ch <- c.pids.mustNewConstMetric(float64(cntStat.PIDs), cntLabelsInfo.labelsValue...) + ch <- c.cpu.mustNewConstMetric(cntStat.CPU, cntLabelsInfo.labelsValue...) + ch <- c.cpuSystem.mustNewConstMetric(cntStat.CPUSystem, cntLabelsInfo.labelsValue...) + ch <- c.memUsage.mustNewConstMetric(float64(cntStat.MemUsage), cntLabelsInfo.labelsValue...) + ch <- c.memLimit.mustNewConstMetric(float64(cntStat.MemLimit), cntLabelsInfo.labelsValue...) + ch <- c.netInput.mustNewConstMetric(float64(cntStat.NetInput), cntLabelsInfo.labelsValue...) + ch <- c.netInputDropped.mustNewConstMetric(float64(cntStat.NetInputDropped), cntLabelsInfo.labelsValue...) + ch <- c.netInputErrors.mustNewConstMetric(float64(cntStat.NetInputErrors), cntLabelsInfo.labelsValue...) + ch <- c.netInputPackets.mustNewConstMetric(float64(cntStat.NetInputPackets), cntLabelsInfo.labelsValue...) + ch <- c.netOutput.mustNewConstMetric(float64(cntStat.NetOutput), cntLabelsInfo.labelsValue...) + ch <- c.netOutputDropped.mustNewConstMetric(float64(cntStat.NetOutputDropped), cntLabelsInfo.labelsValue...) + ch <- c.netOutputErrors.mustNewConstMetric(float64(cntStat.NetOutputErrors), cntLabelsInfo.labelsValue...) + ch <- c.netOutputPackets.mustNewConstMetric(float64(cntStat.NetOutputPackets), cntLabelsInfo.labelsValue...) + ch <- c.blockInput.mustNewConstMetric(float64(cntStat.BlockInput), cntLabelsInfo.labelsValue...) + ch <- c.blockOutput.mustNewConstMetric(float64(cntStat.BlockOutput), cntLabelsInfo.labelsValue...) } + + return } - return nil + if cntStat != nil { + ch <- c.pids.mustNewConstMetric(float64(cntStat.PIDs), rep.ID, rep.PodID, rep.PodName) + ch <- c.cpu.mustNewConstMetric(cntStat.CPU, rep.ID, rep.PodID, rep.PodName) + ch <- c.cpuSystem.mustNewConstMetric(cntStat.CPUSystem, rep.ID, rep.PodID, rep.PodName) + ch <- c.memUsage.mustNewConstMetric(float64(cntStat.MemUsage), rep.ID, rep.PodID, rep.PodName) + ch <- c.memLimit.mustNewConstMetric(float64(cntStat.MemLimit), rep.ID, rep.PodID, rep.PodName) + ch <- c.netInput.mustNewConstMetric(float64(cntStat.NetInput), rep.ID, rep.PodID, rep.PodName) + ch <- c.netInputDropped.mustNewConstMetric(float64(cntStat.NetInputDropped), rep.ID, rep.PodID, rep.PodName) + ch <- c.netInputErrors.mustNewConstMetric(float64(cntStat.NetInputErrors), rep.ID, rep.PodID, rep.PodName) + ch <- c.netInputPackets.mustNewConstMetric(float64(cntStat.NetInputPackets), rep.ID, rep.PodID, rep.PodName) + ch <- c.netOutput.mustNewConstMetric(float64(cntStat.NetOutput), rep.ID, rep.PodID, rep.PodName) + ch <- c.netOutputDropped.mustNewConstMetric(float64(cntStat.NetOutputDropped), rep.ID, rep.PodID, rep.PodName) + ch <- c.netOutputErrors.mustNewConstMetric(float64(cntStat.NetOutputErrors), rep.ID, rep.PodID, rep.PodName) + ch <- c.netOutputPackets.mustNewConstMetric(float64(cntStat.NetOutputPackets), rep.ID, rep.PodID, rep.PodName) + ch <- c.blockInput.mustNewConstMetric(float64(cntStat.BlockInput), rep.ID, rep.PodID, rep.PodName) + ch <- c.blockOutput.mustNewConstMetric(float64(cntStat.BlockOutput), rep.ID, rep.PodID, rep.PodName) + } } func (c *containerCollector) getContainerDescLabel(rep pdcs.Container) *containerDescLabels { diff --git a/pdcs/container.go b/pdcs/container.go index 4668ec917..5d683ca7c 100644 --- a/pdcs/container.go +++ b/pdcs/container.go @@ -39,17 +39,23 @@ type Container struct { // ContainerStat implements container's stat. type ContainerStat struct { - ID string - Name string - PIDs uint64 - CPU float64 - CPUSystem float64 - MemUsage uint64 - MemLimit uint64 - NetInput uint64 - NetOutput uint64 - BlockInput uint64 - BlockOutput uint64 + ID string + Name string + PIDs uint64 + CPU float64 + CPUSystem float64 + MemUsage uint64 + MemLimit uint64 + NetInput uint64 + NetOutput uint64 + NetInputDropped uint64 + NetInputErrors uint64 + NetInputPackets uint64 + NetOutputDropped uint64 + NetOutputErrors uint64 + NetOutputPackets uint64 + BlockInput uint64 + BlockOutput uint64 } type containerSizeCache struct { @@ -156,27 +162,45 @@ func ContainersStats() ([]ContainerStat, error) { for _, rep := range statReport { var ( - netInput uint64 - netOutput uint64 + netInput uint64 + netInputDropped uint64 + netInputErrors uint64 + netInputPackets uint64 + netOutput uint64 + netOutputDropped uint64 + netOutputErrors uint64 + netOutputPackets uint64 ) for _, net := range rep.Network { netInput += net.RxBytes + netInputDropped += net.RxDropped + netInputErrors += net.RxErrors + netInputPackets += net.RxPackets netOutput += net.TxBytes + netOutputDropped += net.TxDropped + netOutputErrors += net.TxErrors + netOutputPackets += net.TxPackets } stat = append(stat, ContainerStat{ - ID: getID(rep.ContainerID), - Name: rep.Name, - PIDs: rep.PIDs, - CPU: float64(rep.CPUNano) / nano, - CPUSystem: float64(rep.CPUSystemNano) / nano, - MemUsage: rep.MemUsage, - MemLimit: rep.MemLimit, - NetInput: netInput, - NetOutput: netOutput, - BlockInput: rep.BlockInput, - BlockOutput: rep.BlockOutput, + ID: getID(rep.ContainerID), + Name: rep.Name, + PIDs: rep.PIDs, + CPU: float64(rep.CPUNano) / nano, + CPUSystem: float64(rep.CPUSystemNano) / nano, + MemUsage: rep.MemUsage, + MemLimit: rep.MemLimit, + NetInput: netInput, + NetInputDropped: netInputDropped, + NetInputErrors: netInputErrors, + NetInputPackets: netInputPackets, + NetOutput: netOutput, + NetOutputDropped: netOutputDropped, + NetOutputErrors: netOutputErrors, + NetOutputPackets: netOutputPackets, + BlockInput: rep.BlockInput, + BlockOutput: rep.BlockOutput, }) }