diff --git a/go.mod b/go.mod index dfb4638..7752f6b 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/prometheus/client_golang v1.17.0 github.com/stretchr/testify v1.8.4 github.com/valyala/fasthttp v1.50.0 + golang.org/x/sync v0.4.0 golang.org/x/tools v0.14.0 ) @@ -67,7 +68,7 @@ require ( github.com/motoki317/sc v1.6.0 github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect - github.com/prometheus/procfs v0.11.1 // indirect + github.com/prometheus/procfs v0.11.1 golang.org/x/exp v0.0.0-20230315142452-642cacee5cc0 golang.org/x/sys v0.13.0 // indirect google.golang.org/protobuf v1.31.0 // indirect diff --git a/procfs.go b/procfs.go new file mode 100644 index 0000000..4682376 --- /dev/null +++ b/procfs.go @@ -0,0 +1,211 @@ +package isutools + +import ( + "fmt" + "log" + "os" + "strconv" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/procfs" + "golang.org/x/sync/singleflight" +) + +const ( + prometheusNamespace = "isutools" + prometheusSubsystem = "procfs" + defaultProcFS = "/proc" + statTTL = time.Second + procUpdateRate = 2 * time.Second +) + +func init() { + if !Enable { + return + } + + procFSPath, ok := os.LookupEnv("ISUTOOLS_PROCFS") + if !ok { + procFSPath = defaultProcFS + } + + procFS, err := procfs.NewFS(procFSPath) + if err != nil { + log.Printf("failed to init procfs: %v", err) + return + } + + cpuInfos, err := procFS.CPUInfo() + if err != nil { + log.Printf("failed to get cpuinfo: %v", err) + return + } + + if len(cpuInfos) == 0 { + log.Printf("cpuinfo is empty") + return + } + + hertz := cpuInfos[0].CPUMHz + hertsGauge := promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: prometheusNamespace, + Subsystem: prometheusSubsystem, + Name: "cpu_hertz", + }) + hertsGauge.Set(float64(hertz)) + + go updateProcListWorker(procFS) +} + +func updateProcListWorker(procFS procfs.FS) { + ticker := time.NewTicker(2 * time.Second) + procMap := map[int]*procMetrics{} + for range ticker.C { + procs, err := procFS.AllProcs() + if err != nil { + log.Printf("failed to get all procs: %v", err) + continue + } + + activeProcMap := make(map[int]struct{}, len(procs)) + for _, proc := range procs { + if proc.PID <= 0 { + continue + } + + activeProcMap[proc.PID] = struct{}{} + + if _, ok := procMap[proc.PID]; !ok { + pm, err := newProcMetrics(proc) + if err != nil { + log.Printf("failed to new proc metrics: %v", err) + continue + } + procMap[proc.PID] = pm + } + } + + for pid, pm := range procMap { + if _, ok := activeProcMap[pid]; !ok { + pm.unregister() + delete(procMap, pid) + } + } + } +} + +var sf = &singleflight.Group{} + +type procMetrics struct { + proc procfs.Proc + stat procfs.ProcStat + statExpiresAt time.Time + collectors []prometheus.Collector +} + +func newProcMetrics(proc procfs.Proc) (*procMetrics, error) { + stat, err := proc.Stat() + if err != nil { + return nil, fmt.Errorf("failed to get process stat: %w", err) + } + + pm := &procMetrics{ + proc: proc, + stat: stat, + statExpiresAt: time.Now().Add(statTTL), + } + + labels := map[string]string{ + "cmd": stat.Comm, + "pid": strconv.Itoa(proc.PID), + } + + userCPUGauge := promauto.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: prometheusNamespace, + Subsystem: prometheusSubsystem, + Name: "user_cpu_clock_count", + ConstLabels: labels, + }, pm.userCPUFunc) + pm.collectors = append(pm.collectors, userCPUGauge) + + systemCPUGauge := promauto.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: prometheusNamespace, + Subsystem: prometheusSubsystem, + Name: "system_cpu_clock_count", + ConstLabels: labels, + }, pm.systemCPUFunc) + pm.collectors = append(pm.collectors, systemCPUGauge) + + memoryGauge := promauto.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: prometheusNamespace, + Subsystem: prometheusSubsystem, + Name: "memory", + ConstLabels: labels, + }, pm.memoryFunc) + pm.collectors = append(pm.collectors, memoryGauge) + + return pm, nil +} + +func (pm *procMetrics) unregister() { + for _, c := range pm.collectors { + prometheus.Unregister(c) + } +} + +func (pm *procMetrics) getStat() (procfs.ProcStat, error) { + iStat, err, _ := sf.Do(strconv.Itoa(pm.proc.PID), func() (interface{}, error) { + if pm.statExpiresAt.After(time.Now()) { + return pm.stat, nil + } + + newStat, err := pm.proc.Stat() + if err != nil { + return nil, fmt.Errorf("failed to get process stat: %w", err) + } + + pm.stat = newStat + pm.statExpiresAt = time.Now().Add(statTTL) + + return pm.stat, nil + }) + if err != nil { + return procfs.ProcStat{}, err + } + + stat, ok := iStat.(procfs.ProcStat) + if !ok { + return procfs.ProcStat{}, fmt.Errorf("failed to cast stat") + } + + return stat, nil +} + +func (pm *procMetrics) userCPUFunc() float64 { + stat, err := pm.getStat() + if err != nil { + return 0 + } + + return float64(stat.UTime) +} + +func (pm *procMetrics) systemCPUFunc() float64 { + stat, err := pm.getStat() + if err != nil { + return 0 + } + + return float64(stat.STime) +} + +func (pm *procMetrics) memoryFunc() float64 { + stat, err := pm.getStat() + if err != nil { + return 0 + } + + return float64(stat.ResidentMemory()) +}