diff --git a/pkg/collector/helper.go b/pkg/collector/helper.go index 8c30ec91..217e117d 100644 --- a/pkg/collector/helper.go +++ b/pkg/collector/helper.go @@ -14,6 +14,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/mahendrapaipuri/ceems/internal/osexec" + "github.com/prometheus/procfs" ) type Device struct { @@ -244,6 +245,87 @@ func GetAMDGPUDevices(rocmSmiPath string, logger log.Logger) (map[int]Device, er return parseAmdSmioutput(string(rocmSmiOutput), logger), nil } +// cgroupProcs returns a map of active cgroups and processes contained in each cgroup. +func cgroupProcs(fs procfs.FS, idRegex *regexp.Regexp, targetEnvVars []string, procFilter func(string) bool) (map[string][]procfs.Proc, error) { + // Get all active procs + allProcs, err := fs.AllProcs() + if err != nil { + return nil, err + } + + // If no idRegex provided, return empty + if idRegex == nil { + return nil, errors.New("cgroup IDs cannot be retrieved due to empty regex") + } + + cgroups := make(map[string][]procfs.Proc) + + for _, proc := range allProcs { + // Get cgroup ID from regex + var cgroupID string + + cgrps, err := proc.Cgroups() + if err != nil || len(cgrps) == 0 { + continue + } + + for _, cgrp := range cgrps { + cgroupIDMatches := idRegex.FindStringSubmatch(cgrp.Path) + if len(cgroupIDMatches) <= 1 { + continue + } + + cgroupID = cgroupIDMatches[1] + + break + } + + // If no cgroupID found, ignore + if cgroupID == "" { + continue + } + + // if targetEnvVars is not empty check if this env vars is present for the process + // We dont check for the value of env var. Presence of env var is enough to + // trigger the profiling of that process + if len(targetEnvVars) > 0 { + environ, err := proc.Environ() + if err != nil { + continue + } + + for _, env := range environ { + for _, targetEnvVar := range targetEnvVars { + if strings.HasPrefix(env, targetEnvVar) { + goto check_process + } + } + } + + // If target env var(s) is not found, return + continue + } + + check_process: + // Ignore processes where command line matches the regex + if procFilter != nil { + procCmdLine, err := proc.CmdLine() + if err != nil || len(procCmdLine) == 0 { + continue + } + + // Ignore process if matches found + if procFilter(strings.Join(procCmdLine, " ")) { + continue + } + } + + cgroups[cgroupID] = append(cgroups[cgroupID], proc) + } + + return cgroups, nil +} + // fileExists checks if given file exists or not. func fileExists(filename string) bool { info, err := os.Stat(filename) diff --git a/pkg/collector/perf.go b/pkg/collector/perf.go index e7115292..b5ab4164 100644 --- a/pkg/collector/perf.go +++ b/pkg/collector/perf.go @@ -1,5 +1,5 @@ -//go:build !perf -// +build !perf +//go:build !noperf +// +build !noperf package collector @@ -1123,79 +1123,11 @@ func discoverer(data interface{}) error { return security.ErrSecurityCtxDataAssertion } - allProcs, err := d.procfs.AllProcs() + cgroups, err := cgroupProcs(d.procfs, d.cgroupManager.idRegex, d.targetEnvVars, d.cgroupManager.procFilter) if err != nil { return err } - cgroups := make(map[string][]procfs.Proc) - - for _, proc := range allProcs { - // if targetEnvVars is not empty check if this env vars is present for the process - // We dont check for the value of env var. Presence of env var is enough to - // trigger the profiling of that process - if len(d.targetEnvVars) > 0 { - environ, err := proc.Environ() - if err != nil { - continue - } - - for _, env := range environ { - for _, targetEnvVar := range d.targetEnvVars { - if strings.HasPrefix(env, targetEnvVar) { - goto check_process - } - } - } - - // If target env var(s) is not found, return - continue - } - - check_process: - - // Ignore processes where command line matches the regex - if d.cgroupManager.procFilter != nil { - procCmdLine, err := proc.CmdLine() - if err != nil || len(procCmdLine) == 0 { - continue - } - - // Ignore process if matches found - if d.cgroupManager.procFilter(strings.Join(procCmdLine, " ")) { - continue - } - } - - // Get cgroup ID from regex - var cgroupID string - - if d.cgroupManager.idRegex != nil { - cgroups, err := proc.Cgroups() - if err != nil || len(cgroups) == 0 { - continue - } - - for _, cgroup := range cgroups { - cgroupIDMatches := d.cgroupManager.idRegex.FindStringSubmatch(cgroup.Path) - if len(cgroupIDMatches) <= 1 { - continue - } - - cgroupID = cgroupIDMatches[1] - - break - } - } - - // If no cgroupID found, ignore - if cgroupID == "" { - continue - } - - cgroups[cgroupID] = append(cgroups[cgroupID], proc) - } - // Read cgroups proc map into d d.cgroups = cgroups diff --git a/pkg/collector/perf_test.go b/pkg/collector/perf_test.go index e94a3246..cd804197 100644 --- a/pkg/collector/perf_test.go +++ b/pkg/collector/perf_test.go @@ -1,5 +1,5 @@ -//go:build !perf -// +build !perf +//go:build !noperf +// +build !noperf package collector diff --git a/pkg/collector/rdma.go b/pkg/collector/rdma.go new file mode 100644 index 00000000..52c51878 --- /dev/null +++ b/pkg/collector/rdma.go @@ -0,0 +1,667 @@ +//go:build !nordma +// +build !nordma + +package collector + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "regexp" + "strconv" + "strings" + "sync" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/mahendrapaipuri/ceems/internal/osexec" + "github.com/mahendrapaipuri/ceems/internal/security" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs" + "github.com/prometheus/procfs/sysfs" +) + +const rdmaCollectorSubsystem = "rdma" + +// CLI opts. +var ( + rdmaStatsEnabled = CEEMSExporterApp.Flag( + "collector.rdma.stats", + "Enables collection of RDMA stats (default: disabled)", + ).Default("false").Bool() + + // test related opts. + rdmaCmd = CEEMSExporterApp.Flag( + "collector.rdma.cmd", + "Path to rdma command", + ).Default("").Hidden().String() +) + +type mr struct { + num int + len uint64 + dev string +} + +type cq struct { + num int + len uint64 + dev string +} + +type qp struct { + num int + dev string + port string + hwCounters map[string]uint64 +} + +type rdmaCollector struct { + sysfs sysfs.FS + procfs procfs.FS + logger log.Logger + cgroupManager *cgroupManager + hostname string + isAvailable bool + rdmaCmd string + qpModes map[string]bool + securityContexts map[string]*security.SecurityContext + metricDescs map[string]*prometheus.Desc + hwCounters []string +} + +// Security context names. +const ( + rdmaExecCmdCtx = "rdma_exec_cmd" +) + +// NewRDMACollector returns a new Collector exposing RAPL metrics. +func NewRDMACollector(logger log.Logger, cgManager *cgroupManager) (*rdmaCollector, error) { + sysfs, err := sysfs.NewFS(*sysPath) + if err != nil { + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + // Instantiate a new Proc FS + procfs, err := procfs.NewFS(*procfsPath) + if err != nil { + return nil, err + } + + // Setup RDMA command + var rdmaCmdPath string + if *rdmaCmd != "" { + rdmaCmdPath = *rdmaCmd + } else { + if rdmaCmdPath, err = exec.LookPath("rdma"); err != nil { + level.Error(logger). + Log("msg", "rdma command not found. Not all RDMA metrics will be reported.", "err", err) + } + } + + // Check if RDMA devices exist + _, err = sysfs.InfiniBandClass() + if err != nil && errors.Is(err, os.ErrNotExist) { + level.Error(logger). + Log("msg", "RDMA devices do not exist. RDMA collector wont return any data", "err", err) + + return &rdmaCollector{isAvailable: false}, nil + } + + // Get current qp mode + // We cannot turn on per PID counters when link is already being used by a process. + // So we keep a state variable of modes of all links and attempt to turn them on + // on every scrape request if they are not turned on already. + // As this per PID counters are only supported by Mellanox devices, we setup + // this map only for them. This map will be nil for other types of devices + qpModes, err := qpMode(rdmaCmdPath) + if err != nil { + level.Error(logger). + Log("msg", "Failed to get RDMA qp mode", "err", err) + } + + // If per QP counters are enabled, we need to disable them when exporter exits. + // So create a security context with cap_setuid and cap_setgid to be able to + // disable per QP counters + // + // Setup necessary capabilities. + securityContexts := make(map[string]*security.SecurityContext) + + if len(qpModes) > 0 { + level.Info(logger).Log("msg", "Per-PID QP stats available") + + caps := setupCollectorCaps(logger, rdmaCollectorSubsystem, []string{"cap_setuid", "cap_setgid"}) + + // Setup new security context(s) + securityContexts[rdmaExecCmdCtx], err = security.NewSecurityContext(rdmaExecCmdCtx, caps, security.ExecAsUser, logger) + if err != nil { + level.Error(logger).Log("msg", "Failed to create a security context for RDMA collector", "err", err) + + return nil, err + } + } + + // Port counters descriptions. + portCountersDecs := map[string]string{ + "port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded", + "port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port", + "port_data_received_bytes_total": "Number of data octets received on all links", + "port_data_transmitted_bytes_total": "Number of data octets transmitted on all links", + "port_discards_received_total": "Number of inbound packets discarded by the port because the port is down or congested", + "port_discards_transmitted_total": "Number of outbound packets discarded by the port because the port is down or congested", + "port_errors_received_total": "Number of packets containing an error that were received on this port", + "port_packets_received_total": "Number of packets received on all VLs by this port (including errors)", + "port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)", + "state_id": "State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)", + } + + // HW counters descriptions. + hwCountersDecs := map[string]string{ + "rx_write_requests": "Number of received write requests for the associated QPs", + "rx_read_requests": "Number of received read requests for the associated QPs", + "rx_atomic_requests": "Number of received atomic request for the associated QPs", + "req_cqe_error": "Number of times requester detected CQEs completed with errors", + "req_cqe_flush_error": "Number of times requester detected CQEs completed with flushed errors", + "req_remote_access_errors": "Number of times requester detected remote access errors", + "req_remote_invalid_request": "Number of times requester detected remote invalid request errors", + "resp_cqe_error": "Number of times responder detected CQEs completed with errors", + "resp_cqe_flush_error": "Number of times responder detected CQEs completed with flushed errors", + "resp_local_length_error": "Number of times responder detected local length errors", + "resp_remote_access_errors": "Number of times responder detected remote access errors", + } + + // HW counters descriptions. + wpsCountersDecs := map[string]string{ + "qps_active": "Number of active QPs", + "cqs_active": "Number of active CQs", + "mrs_active": "Number of active MRs", + "cqe_len_active": "Length of active CQs", + "mrs_len_active": "Length of active MRs", + } + + metricDescs := make(map[string]*prometheus.Desc) + + for metricName, description := range portCountersDecs { + metricDescs[metricName] = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, rdmaCollectorSubsystem, metricName), + description, + []string{"manager", "hostname", "device", "port"}, + nil, + ) + } + + var hwCounters []string + for metricName, description := range hwCountersDecs { + hwCounters = append(hwCounters, metricName) + metricDescs[metricName] = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, rdmaCollectorSubsystem, metricName), + description, + []string{"manager", "hostname", "device", "port", "uuid"}, + nil, + ) + } + + for metricName, description := range wpsCountersDecs { + metricDescs[metricName] = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, rdmaCollectorSubsystem, metricName), + description, + []string{"manager", "hostname", "device", "port", "uuid"}, + nil, + ) + } + + return &rdmaCollector{ + sysfs: sysfs, + procfs: procfs, + logger: logger, + cgroupManager: cgManager, + hostname: hostname, + rdmaCmd: rdmaCmdPath, + isAvailable: true, + qpModes: qpModes, + securityContexts: securityContexts, + metricDescs: metricDescs, + hwCounters: hwCounters, + }, nil +} + +// Update implements Collector and exposes RDMA related metrics. +func (c *rdmaCollector) Update(ch chan<- prometheus.Metric) error { + if !c.isAvailable { + return ErrNoData + } + + // Check QP modes and attempt to enable PID if not already done + if err := c.perPIDCounters(true); err != nil { + level.Error(c.logger).Log("msg", "Failed to enable Per-PID QP stats", "err", err) + } + + return c.update(ch) +} + +// Stop releases system resources used by the collector. +func (c *rdmaCollector) Stop(_ context.Context) error { + level.Debug(c.logger).Log("msg", "Stopping", "collector", rdmaCollectorSubsystem) + + return c.perPIDCounters(false) +} + +// perPIDCounters enables/disables per PID counters for supported devices. +func (c *rdmaCollector) perPIDCounters(enable bool) error { + // If there no supported devices, return + if c.qpModes == nil { + return nil + } + + // Return if there is no security context found + securityCtx, ok := c.securityContexts[rdmaExecCmdCtx] + if !ok { + return security.ErrNoSecurityCtx + } + + // Set per QP counters off when exiting + var allErrs error + + for link, mode := range c.qpModes { + if mode != enable { + var cmd []string + if enable { + cmd = []string{"rdma", "statistic", "qp", "set", "link", link, "auto", "type,pid", "on"} + } else { + cmd = []string{"rdma", "statistic", "qp", "set", "link", link, "auto", "off"} + } + + // Execute command as root + dataPtr := &security.ExecSecurityCtxData{ + Cmd: cmd, + Logger: c.logger, + UID: 0, + GID: 0, + } + + // If command didnt return error, we successfully enabled/disabled mode + if err := securityCtx.Exec(dataPtr); err != nil { + allErrs = errors.Join(allErrs, err) + } else { + c.qpModes[link] = enable + } + } + } + + if allErrs != nil { + return allErrs + } + + return nil +} + +// update fetches different RDMA stats. +func (c *rdmaCollector) update(ch chan<- prometheus.Metric) error { + // First get cgroups and their associated procs + procCgroup, err := c.procCgroups() + if err != nil { + level.Error(c.logger).Log("msg", "Failed to fetch active cgroups", "err", err) + + return ErrNoData + } + + // Initialise a wait group + wg := sync.WaitGroup{} + + // Fetch MRs + wg.Add(1) + + go func(p map[string]string) { + defer wg.Done() + + mrs, err := c.devMR(p) + if err != nil { + level.Error(c.logger).Log("msg", "Failed to fetch RDMA MR stats", "err", err) + + return + } + + for uuid, mr := range mrs { + ch <- prometheus.MustNewConstMetric(c.metricDescs["mrs_active"], prometheus.GaugeValue, float64(mr.num), c.cgroupManager.manager, c.hostname, mr.dev, "", uuid) + ch <- prometheus.MustNewConstMetric(c.metricDescs["mrs_len_active"], prometheus.GaugeValue, float64(mr.len), c.cgroupManager.manager, c.hostname, mr.dev, "", uuid) + } + }(procCgroup) + + // Fetch CQs + wg.Add(1) + + go func(p map[string]string) { + defer wg.Done() + + cqs, err := c.devCQ(p) + if err != nil { + level.Error(c.logger).Log("msg", "Failed to fetch RDMA CQ stats", "err", err) + + return + } + + for uuid, cq := range cqs { + ch <- prometheus.MustNewConstMetric(c.metricDescs["cqs_active"], prometheus.GaugeValue, float64(cq.num), c.cgroupManager.manager, c.hostname, cq.dev, "", uuid) + ch <- prometheus.MustNewConstMetric(c.metricDescs["cqe_len_active"], prometheus.GaugeValue, float64(cq.len), c.cgroupManager.manager, c.hostname, cq.dev, "", uuid) + } + }(procCgroup) + + // Fetch QPs + wg.Add(1) + + go func(p map[string]string) { + defer wg.Done() + + qps, err := c.linkQP(p) + if err != nil { + level.Error(c.logger).Log("msg", "Failed to fetch RDMA QP stats", "err", err) + + return + } + + for uuid, qp := range qps { + ch <- prometheus.MustNewConstMetric(c.metricDescs["qps_active"], prometheus.GaugeValue, float64(qp.num), c.cgroupManager.manager, c.hostname, qp.dev, qp.port, uuid) + + for _, hwCounter := range c.hwCounters { + if qp.hwCounters[hwCounter] > 0 { + ch <- prometheus.MustNewConstMetric(c.metricDescs[hwCounter], prometheus.CounterValue, float64(qp.hwCounters[hwCounter]), c.cgroupManager.manager, c.hostname, qp.dev, qp.port, uuid) + } + } + } + }(procCgroup) + + // Fetch sys wide counters + wg.Add(1) + + go func() { + defer wg.Done() + + counters, err := c.linkCountersSysWide() + if err != nil { + level.Error(c.logger).Log("msg", "Failed to fetch system wide RDMA counters", "err", err) + + return + } + + var vType prometheus.ValueType + + for link, cnts := range counters { + l := strings.Split(link, "/") + device := l[0] + port := l[1] + + for n, v := range cnts { + if v > 0 { + if n == "state_id" { + vType = prometheus.GaugeValue + } else { + vType = prometheus.CounterValue + } + ch <- prometheus.MustNewConstMetric(c.metricDescs[n], vType, float64(v), c.cgroupManager.manager, c.hostname, device, port) + } + } + } + }() + + // Wait for all go routines + wg.Wait() + + return nil +} + +// procCgroups returns cgroup ID of all relevant processes. +func (c *rdmaCollector) procCgroups() (map[string]string, error) { + // First get cgroups and their associated procs + cgroups, err := cgroupProcs(c.procfs, c.cgroupManager.idRegex, nil, c.cgroupManager.procFilter) + if err != nil { + level.Error(c.logger).Log("msg", "Failed to fetch active cgroups", "err", err) + + return nil, err + } + + // Make invert mapping of cgroups + procCgroup := make(map[string]string) + + for cgroupID, procs := range cgroups { + for _, proc := range procs { + p := strconv.FormatInt(int64(proc.PID), 10) + procCgroup[p] = cgroupID + } + } + + return procCgroup, nil +} + +// devMR returns Memory Regions (MRs) stats of all active cgroups. +func (c *rdmaCollector) devMR(procCgroup map[string]string) (map[string]*mr, error) { + // Arguments to command + args := []string{"resource", "show", "mr"} + + // Execute command + out, err := osexec.Execute(c.rdmaCmd, args, nil) + if err != nil { + return nil, err + } + + // Define regexes + devRegex := regexp.MustCompile(`^dev\s*([a-z0-9_]+)`) + pidRegex := regexp.MustCompile(`.+?pid\s*([\d]+)`) + mrlenRegex := regexp.MustCompile(`.+?mrlen\s*([\d]+)`) + + // Read line by line and match dev, pid and mrlen + mrs := make(map[string]*mr) + + for _, line := range strings.Split(string(out), "\n") { + if devMatch := devRegex.FindStringSubmatch(line); len(devMatch) > 1 { + if pidMatch := pidRegex.FindStringSubmatch(line); len(pidMatch) > 1 { + if uuid, ok := procCgroup[pidMatch[1]]; ok { + if mrLenMatch := mrlenRegex.FindStringSubmatch(line); len(mrLenMatch) > 1 { + if l, err := strconv.ParseUint(mrLenMatch[1], 10, 64); err == nil { + if _, ok := mrs[uuid]; ok { + mrs[uuid].num++ + mrs[uuid].len += l + } else { + mrs[uuid] = &mr{1, l, devMatch[1]} + } + } + } + } + } + } + } + + return mrs, nil +} + +// devCQ returns Completion Queues (CQs) stats of all active cgroups. +func (c *rdmaCollector) devCQ(procCgroup map[string]string) (map[string]*cq, error) { + // Arguments to command + args := []string{"resource", "show", "cq"} + + // Execute command + out, err := osexec.Execute(c.rdmaCmd, args, nil) + if err != nil { + return nil, err + } + + // Define regexes + devRegex := regexp.MustCompile(`^dev\s*([a-z0-9_]+)`) + pidRegex := regexp.MustCompile(`.+?pid\s*([\d]+)`) + cqeRegex := regexp.MustCompile(`.+?cqe\s*([\d]+)`) + + // Read line by line and match dev, pid and mrlen + cqs := make(map[string]*cq) + + for _, line := range strings.Split(string(out), "\n") { + if devMatch := devRegex.FindStringSubmatch(line); len(devMatch) > 1 { + if pidMatch := pidRegex.FindStringSubmatch(line); len(pidMatch) > 1 { + if uuid, ok := procCgroup[pidMatch[1]]; ok { + if cqeMatch := cqeRegex.FindStringSubmatch(line); len(cqeMatch) > 1 { + if l, err := strconv.ParseUint(cqeMatch[1], 10, 64); err == nil { + if _, ok := cqs[uuid]; ok { + cqs[uuid].num++ + cqs[uuid].len += l + } else { + cqs[uuid] = &cq{1, l, devMatch[1]} + } + } + } + } + } + } + } + + return cqs, nil +} + +// linkQP returns Queue Pairs (QPs) stats of all active cgroups. +func (c *rdmaCollector) linkQP(procCgroup map[string]string) (map[string]*qp, error) { + // Arguments to command + args := []string{"resource", "show", "qp"} + + // Execute command + out, err := osexec.Execute(c.rdmaCmd, args, nil) + if err != nil { + return nil, err + } + + // Define regexes + linkRegex := regexp.MustCompile(`^link\s*([a-z0-9_/]+)`) + pidRegex := regexp.MustCompile(`.+?pid\s*([\d]+)`) + + // Read line by line and match dev, pid and mrlen + qps := make(map[string]*qp) + + for _, line := range strings.Split(string(out), "\n") { + if linkMatch := linkRegex.FindStringSubmatch(line); len(linkMatch) > 1 { + if pidMatch := pidRegex.FindStringSubmatch(line); len(pidMatch) > 1 { + if uuid, ok := procCgroup[pidMatch[1]]; ok { + if _, ok := qps[uuid]; ok { + qps[uuid].num++ + } else { + link := strings.Split(linkMatch[1], "/") + if len(link) == 2 { + qps[uuid] = &qp{1, link[0], link[1], make(map[string]uint64)} + } + } + } + } + } + } + + // If per PID counters are enabled, fetch them + if len(c.qpModes) > 0 { + // Arguments to command + args := []string{"statistic", "qp", "show"} + + // Execute command + out, err := osexec.Execute(c.rdmaCmd, args, nil) + if err != nil { + level.Error(c.logger).Log("msg", "Failed to fetch per PID QP stats", "err", err) + + return qps, nil + } + + for _, line := range strings.Split(string(out), "\n") { + if linkMatch := linkRegex.FindStringSubmatch(line); len(linkMatch) > 1 { + for _, hwCounter := range c.hwCounters { + if pidMatch := pidRegex.FindStringSubmatch(line); len(pidMatch) > 1 { + if uuid, ok := procCgroup[pidMatch[1]]; ok { + counterRegex := regexp.MustCompile(fmt.Sprintf(`.+?%s\s*([\d]+)`, hwCounter)) + if counterMatch := counterRegex.FindStringSubmatch(line); len(counterMatch) > 1 { + if v, err := strconv.ParseUint(counterMatch[1], 10, 64); err == nil { + if _, ok := qps[uuid]; !ok { + link := strings.Split(linkMatch[1], "/") + qps[uuid] = &qp{1, link[0], link[1], make(map[string]uint64)} + } + + qps[uuid].hwCounters[hwCounter] = v + } + } + } + } + } + } + } + } + + return qps, nil +} + +// linkCountersSysWide returns system wide counters of all RDMA devices. +func (c *rdmaCollector) linkCountersSysWide() (map[string]map[string]uint64, error) { + devices, err := c.sysfs.InfiniBandClass() + if err != nil { + return nil, fmt.Errorf("error obtaining InfiniBand class info: %w", err) + } + + counters := make(map[string]map[string]uint64) + + for _, device := range devices { + for _, port := range device.Ports { + link := fmt.Sprintf("%s/%d", device.Name, port.Port) + counters[link] = map[string]uint64{ + "port_constraint_errors_received_total": sanitizeMetric(port.Counters.PortRcvConstraintErrors), + "port_constraint_errors_transmitted_total": sanitizeMetric(port.Counters.PortXmitConstraintErrors), + "port_data_received_bytes_total": sanitizeMetric(port.Counters.PortRcvData), + "port_data_transmitted_bytes_total": sanitizeMetric(port.Counters.PortXmitData), + "port_discards_received_total": sanitizeMetric(port.Counters.PortRcvDiscards), + "port_discards_transmitted_total": sanitizeMetric(port.Counters.PortXmitDiscards), + "port_errors_received_total": sanitizeMetric(port.Counters.PortRcvErrors), + "port_packets_received_total": sanitizeMetric(port.Counters.PortRcvPackets), + "port_packets_transmitted_total": sanitizeMetric(port.Counters.PortXmitPackets), + "state_id": uint64(port.StateID), + } + } + } + + return counters, nil +} + +// sanitizeMetric returns 0 if pointer is nil else metrics value. +func sanitizeMetric(value *uint64) uint64 { + if value == nil { + return 0 + } + + return *value +} + +// qpMode returns current QP mode for all links. +func qpMode(rdmaCmd string) (map[string]bool, error) { + args := []string{"statistic", "qp", "mode"} + + // Execute command + out, err := osexec.Execute(rdmaCmd, args, nil) + if err != nil { + return nil, err + } + + // Define regexes + linkRegex := regexp.MustCompile(`^link\s*([a-z0-9_/]+)`) + autoRegex := regexp.MustCompile(`.+?auto\s*([a-z,]+)`) + + // Split output and get mode for each device + linkMode := make(map[string]bool) + + for _, line := range strings.Split(string(out), "\n") { + if linkMatch := linkRegex.FindStringSubmatch(line); len(linkMatch) > 1 && strings.HasPrefix(linkMatch[1], "mlx") { + if autoMatch := autoRegex.FindStringSubmatch(line); len(autoMatch) > 1 { + if autoMatch[1] == "off" { + linkMode[linkMatch[1]] = false + } else { + linkMode[linkMatch[1]] = true + } + } + } + } + + return linkMode, nil +} + +// rdmaCollectorEnabled returns true if RDMA stats are enabled. +func rdmaCollectorEnabled() bool { + return *rdmaStatsEnabled +} diff --git a/pkg/collector/rdma_test.go b/pkg/collector/rdma_test.go new file mode 100644 index 00000000..b3d1ef3d --- /dev/null +++ b/pkg/collector/rdma_test.go @@ -0,0 +1,265 @@ +//go:build !nordma +// +build !nordma + +package collector + +import ( + "context" + "testing" + + "github.com/containerd/cgroups/v3" + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs" + "github.com/prometheus/procfs/sysfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRDMACollector(t *testing.T) { + _, err := CEEMSExporterApp.Parse([]string{ + "--path.procfs", "testdata/proc", + "--path.sysfs", "testdata/sys", + "--collector.rdma.stats", + "--collector.rdma.cmd", "testdata/rdma", + }) + require.NoError(t, err) + + // cgroup manager + cgManager := &cgroupManager{ + mode: cgroups.Unified, + mountPoint: "testdata/sys/fs/cgroup/system.slice/slurmstepd.scope", + idRegex: slurmCgroupPathRegex, + procFilter: func(p string) bool { + return slurmIgnoreProcsRegex.MatchString(p) + }, + } + + collector, err := NewPerfCollector(log.NewNopLogger(), cgManager) + require.NoError(t, err) + + // Setup background goroutine to capture metrics. + metrics := make(chan prometheus.Metric) + defer close(metrics) + + go func() { + i := 0 + for range metrics { + i++ + } + }() + + err = collector.Update(metrics) + require.NoError(t, err) + + err = collector.Stop(context.Background()) + require.NoError(t, err) +} + +func TestDevMR(t *testing.T) { + _, err := CEEMSExporterApp.Parse([]string{ + "--path.procfs", "testdata/proc", + }) + require.NoError(t, err) + + // cgroup manager + cgManager := &cgroupManager{ + mode: cgroups.Unified, + idRegex: slurmCgroupPathRegex, + procFilter: func(p string) bool { + return slurmIgnoreProcsRegex.MatchString(p) + }, + } + + // Instantiate a new Proc FS + procfs, err := procfs.NewFS(*procfsPath) + require.NoError(t, err) + + c := rdmaCollector{ + logger: log.NewNopLogger(), + rdmaCmd: "testdata/rdma", + procfs: procfs, + cgroupManager: cgManager, + } + + // Get cgroup IDs + procCgroup, err := c.procCgroups() + require.NoError(t, err) + + expectedMRs := map[string]*mr{ + "1320003": {2, 4194304, "mlx5_0"}, + "4824887": {2, 4194304, "mlx5_0"}, + } + + // Get MR stats + mrs, err := c.devMR(procCgroup) + require.NoError(t, err) + assert.Equal(t, expectedMRs, mrs) +} + +func TestDevCQ(t *testing.T) { + _, err := CEEMSExporterApp.Parse([]string{ + "--path.procfs", "testdata/proc", + }) + require.NoError(t, err) + + // cgroup manager + cgManager := &cgroupManager{ + mode: cgroups.Unified, + idRegex: slurmCgroupPathRegex, + procFilter: func(p string) bool { + return slurmIgnoreProcsRegex.MatchString(p) + }, + } + + // Instantiate a new Proc FS + procfs, err := procfs.NewFS(*procfsPath) + require.NoError(t, err) + + c := rdmaCollector{ + logger: log.NewNopLogger(), + rdmaCmd: "testdata/rdma", + procfs: procfs, + cgroupManager: cgManager, + } + + // Get cgroup IDs + procCgroup, err := c.procCgroups() + require.NoError(t, err) + + expectedCQs := map[string]*cq{ + "1320003": {2, 8190, "mlx5_0"}, + "4824887": {2, 8190, "mlx5_0"}, + } + + // Get MR stats + cqs, err := c.devCQ(procCgroup) + require.NoError(t, err) + assert.Equal(t, expectedCQs, cqs) +} + +func TestLinkQP(t *testing.T) { + _, err := CEEMSExporterApp.Parse([]string{ + "--path.procfs", "testdata/proc", + }) + require.NoError(t, err) + + // cgroup manager + cgManager := &cgroupManager{ + mode: cgroups.Unified, + idRegex: slurmCgroupPathRegex, + procFilter: func(p string) bool { + return slurmIgnoreProcsRegex.MatchString(p) + }, + } + + // Instantiate a new Proc FS + procfs, err := procfs.NewFS(*procfsPath) + require.NoError(t, err) + + c := rdmaCollector{ + logger: log.NewNopLogger(), + rdmaCmd: "testdata/rdma", + procfs: procfs, + cgroupManager: cgManager, + qpModes: map[string]bool{"mlx5_0": true}, + hwCounters: []string{"rx_write_requests", "rx_read_requests"}, + } + + // Get cgroup IDs + procCgroup, err := c.procCgroups() + require.NoError(t, err) + + expected := map[string]*qp{ + "1320003": {16, "mlx5_0", "1", map[string]uint64{"rx_read_requests": 0, "rx_write_requests": 41988882}}, + "4824887": {16, "mlx5_0", "1", map[string]uint64{"rx_write_requests": 0, "rx_read_requests": 0}}, + } + + // Get MR stats + qps, err := c.linkQP(procCgroup) + require.NoError(t, err) + assert.Equal(t, expected, qps) +} + +func TestLinkCountersSysWide(t *testing.T) { + _, err := CEEMSExporterApp.Parse([]string{ + "--path.sysfs", "testdata/sys", + }) + require.NoError(t, err) + + // cgroup manager + cgManager := &cgroupManager{ + mode: cgroups.Unified, + idRegex: slurmCgroupPathRegex, + procFilter: func(p string) bool { + return slurmIgnoreProcsRegex.MatchString(p) + }, + } + + // Instantiate a new Proc FS + sysfs, err := sysfs.NewFS(*sysPath) + require.NoError(t, err) + + c := rdmaCollector{ + logger: log.NewNopLogger(), + sysfs: sysfs, + cgroupManager: cgManager, + hwCounters: []string{"rx_write_requests", "rx_read_requests"}, + } + + expected := map[string]map[string]uint64{ + "hfi1_0/1": { + "port_constraint_errors_received_total": 0x0, + "port_constraint_errors_transmitted_total": 0x0, + "port_data_received_bytes_total": 0x1416445f428, + "port_data_transmitted_bytes_total": 0xfec563343c, + "port_discards_received_total": 0x0, + "port_discards_transmitted_total": 0x0, + "port_errors_received_total": 0x0, + "port_packets_received_total": 0x2607abd3, + "port_packets_transmitted_total": 0x21dfdb88, + "state_id": 0x4, + }, + "mlx4_0/1": { + "port_constraint_errors_received_total": 0x0, + "port_constraint_errors_transmitted_total": 0x0, + "port_data_received_bytes_total": 0x21194bae4, + "port_data_transmitted_bytes_total": 0x18b043df3c, + "port_discards_received_total": 0x0, + "port_discards_transmitted_total": 0x0, + "port_errors_received_total": 0x0, + "port_packets_received_total": 0x532195c, + "port_packets_transmitted_total": 0x51c32e2, + "state_id": 0x4, + }, + "mlx4_0/2": { + "port_constraint_errors_received_total": 0x0, + "port_constraint_errors_transmitted_total": 0x0, + "port_data_received_bytes_total": 0x24a9d24c0, + "port_data_transmitted_bytes_total": 0x18b7b6d468, + "port_discards_received_total": 0x0, + "port_discards_transmitted_total": 0x0, + "port_errors_received_total": 0x0, + "port_packets_received_total": 0x5531960, + "port_packets_transmitted_total": 0x5484702, + "state_id": 0x4, + }, + "mlx5_0/1": { + "port_constraint_errors_received_total": 0x0, + "port_constraint_errors_transmitted_total": 0x0, + "port_data_received_bytes_total": 0x10e1a85288, + "port_data_transmitted_bytes_total": 0xa7aeb10cfc0, + "port_discards_received_total": 0x0, + "port_discards_transmitted_total": 0x0, + "port_errors_received_total": 0x0, + "port_packets_received_total": 0x204c9520, + "port_packets_transmitted_total": 0x28a29aec4, + "state_id": 0x4, + }, + } + + // Get MR stats + counters, err := c.linkCountersSysWide() + require.NoError(t, err) + assert.Equal(t, expected, counters) +} diff --git a/pkg/collector/slurm.go b/pkg/collector/slurm.go index db389173..427c4e07 100644 --- a/pkg/collector/slurm.go +++ b/pkg/collector/slurm.go @@ -101,6 +101,7 @@ type slurmCollector struct { cgroupCollector *cgroupCollector perfCollector *perfCollector ebpfCollector *ebpfCollector + rdmaCollector *rdmaCollector hostname string gpuDevs map[int]Device procFS procfs.FS @@ -175,6 +176,18 @@ func NewSlurmCollector(logger log.Logger) (Collector, error) { } } + // Start new instance of rdmaCollector + var rdmaCollector *rdmaCollector + + if rdmaCollectorEnabled() { + rdmaCollector, err = NewRDMACollector(logger, cgroupManager) + if err != nil { + level.Info(logger).Log("msg", "Failed to create RDMA collector", "err", err) + + return nil, err + } + } + // Attempt to get GPU devices var gpuTypes []string @@ -220,6 +233,7 @@ func NewSlurmCollector(logger log.Logger) (Collector, error) { cgroupCollector: cgCollector, perfCollector: perfCollector, ebpfCollector: ebpfCollector, + rdmaCollector: rdmaCollector, hostname: hostname, gpuDevs: gpuDevs, procFS: procFS, @@ -300,6 +314,19 @@ func (c *slurmCollector) Update(ch chan<- prometheus.Metric) error { }() } + if rdmaCollectorEnabled() { + wg.Add(1) + + go func() { + defer wg.Done() + + // Update RDMA metrics + if err := c.rdmaCollector.Update(ch); err != nil { + level.Error(c.logger).Log("msg", "Failed to update RDMA stats", "err", err) + } + }() + } + // Wait for all go routines wg.Wait() @@ -330,6 +357,13 @@ func (c *slurmCollector) Stop(ctx context.Context) error { } } + // Stop rdmaCollector + if rdmaCollectorEnabled() { + if err := c.rdmaCollector.Stop(ctx); err != nil { + level.Error(c.logger).Log("msg", "Failed to stop RDMA collector", "err", err) + } + } + return nil } diff --git a/pkg/collector/slurm_test.go b/pkg/collector/slurm_test.go index a7c9ef2f..ffaa5ef6 100644 --- a/pkg/collector/slurm_test.go +++ b/pkg/collector/slurm_test.go @@ -36,10 +36,12 @@ func TestNewSlurmCollector(t *testing.T) { []string{ "--path.cgroupfs", "testdata/sys/fs/cgroup", "--path.procfs", "testdata/proc", + "--path.sysfs", "testdata/sys", "--collector.slurm.gpu-job-map-path", "testdata/gpujobmap", "--collector.slurm.swap-memory-metrics", "--collector.slurm.psi-metrics", "--collector.perf.hardware-events", + "--collector.rdma.stats", "--collector.slurm.nvidia-smi-path", "testdata/nvidia-smi", "--collector.cgroups.force-version", "v2", }, diff --git a/pkg/collector/testdata/output/e2e-test-cgroupsv2-nvidia-ipmiutil-output.txt b/pkg/collector/testdata/output/e2e-test-cgroupsv2-nvidia-ipmiutil-output.txt index a9652bab..8d8653c9 100644 --- a/pkg/collector/testdata/output/e2e-test-cgroupsv2-nvidia-ipmiutil-output.txt +++ b/pkg/collector/testdata/output/e2e-test-cgroupsv2-nvidia-ipmiutil-output.txt @@ -102,6 +102,56 @@ ceems_meminfo_MemTotal_bytes{hostname=""} 1.6042172416e+10 # TYPE ceems_rapl_package_joules_total counter ceems_rapl_package_joules_total{hostname="",index="0",path="pkg/collector/testdata/sys/class/powercap/intel-rapl:0"} 258218.293244 ceems_rapl_package_joules_total{hostname="",index="1",path="pkg/collector/testdata/sys/class/powercap/intel-rapl:1"} 130570.505826 +# HELP ceems_rdma_cqe_len_active Length of active CQs +# TYPE ceems_rdma_cqe_len_active gauge +ceems_rdma_cqe_len_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="1320003"} 8190 +ceems_rdma_cqe_len_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="4824887"} 8190 +# HELP ceems_rdma_cqs_active Number of active CQs +# TYPE ceems_rdma_cqs_active gauge +ceems_rdma_cqs_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="1320003"} 2 +ceems_rdma_cqs_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="4824887"} 2 +# HELP ceems_rdma_mrs_active Number of active MRs +# TYPE ceems_rdma_mrs_active gauge +ceems_rdma_mrs_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="1320003"} 2 +ceems_rdma_mrs_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="4824887"} 2 +# HELP ceems_rdma_mrs_len_active Length of active MRs +# TYPE ceems_rdma_mrs_len_active gauge +ceems_rdma_mrs_len_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="1320003"} 4.194304e+06 +ceems_rdma_mrs_len_active{device="mlx5_0",hostname="",manager="slurm",port="",uuid="4824887"} 4.194304e+06 +# HELP ceems_rdma_port_data_received_bytes_total Number of data octets received on all links +# TYPE ceems_rdma_port_data_received_bytes_total counter +ceems_rdma_port_data_received_bytes_total{device="hfi1_0",hostname="",manager="slurm",port="1"} 1.380366808104e+12 +ceems_rdma_port_data_received_bytes_total{device="mlx4_0",hostname="",manager="slurm",port="1"} 8.884894436e+09 +ceems_rdma_port_data_received_bytes_total{device="mlx4_0",hostname="",manager="slurm",port="2"} 9.841747136e+09 +ceems_rdma_port_data_received_bytes_total{device="mlx5_0",hostname="",manager="slurm",port="1"} 7.2505381512e+10 +# HELP ceems_rdma_port_data_transmitted_bytes_total Number of data octets transmitted on all links +# TYPE ceems_rdma_port_data_transmitted_bytes_total counter +ceems_rdma_port_data_transmitted_bytes_total{device="hfi1_0",hostname="",manager="slurm",port="1"} 1.094233306172e+12 +ceems_rdma_port_data_transmitted_bytes_total{device="mlx4_0",hostname="",manager="slurm",port="1"} 1.0603645318e+11 +ceems_rdma_port_data_transmitted_bytes_total{device="mlx4_0",hostname="",manager="slurm",port="2"} 1.0616142756e+11 +ceems_rdma_port_data_transmitted_bytes_total{device="mlx5_0",hostname="",manager="slurm",port="1"} 1.1523046035392e+13 +# HELP ceems_rdma_port_packets_received_total Number of packets received on all VLs by this port (including errors) +# TYPE ceems_rdma_port_packets_received_total counter +ceems_rdma_port_packets_received_total{device="hfi1_0",hostname="",manager="slurm",port="1"} 6.38036947e+08 +ceems_rdma_port_packets_received_total{device="mlx4_0",hostname="",manager="slurm",port="1"} 8.7169372e+07 +ceems_rdma_port_packets_received_total{device="mlx4_0",hostname="",manager="slurm",port="2"} 8.9332064e+07 +ceems_rdma_port_packets_received_total{device="mlx5_0",hostname="",manager="slurm",port="1"} 5.41889824e+08 +# HELP ceems_rdma_port_packets_transmitted_total Number of packets transmitted on all VLs from this port (including errors) +# TYPE ceems_rdma_port_packets_transmitted_total counter +ceems_rdma_port_packets_transmitted_total{device="hfi1_0",hostname="",manager="slurm",port="1"} 5.68318856e+08 +ceems_rdma_port_packets_transmitted_total{device="mlx4_0",hostname="",manager="slurm",port="1"} 8.5734114e+07 +ceems_rdma_port_packets_transmitted_total{device="mlx4_0",hostname="",manager="slurm",port="2"} 8.862285e+07 +ceems_rdma_port_packets_transmitted_total{device="mlx5_0",hostname="",manager="slurm",port="1"} 1.0907922116e+10 +# HELP ceems_rdma_qps_active Number of active QPs +# TYPE ceems_rdma_qps_active gauge +ceems_rdma_qps_active{device="mlx5_0",hostname="",manager="slurm",port="1",uuid="1320003"} 16 +ceems_rdma_qps_active{device="mlx5_0",hostname="",manager="slurm",port="1",uuid="4824887"} 16 +# HELP ceems_rdma_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer) +# TYPE ceems_rdma_state_id gauge +ceems_rdma_state_id{device="hfi1_0",hostname="",manager="slurm",port="1"} 4 +ceems_rdma_state_id{device="mlx4_0",hostname="",manager="slurm",port="1"} 4 +ceems_rdma_state_id{device="mlx4_0",hostname="",manager="slurm",port="2"} 4 +ceems_rdma_state_id{device="mlx5_0",hostname="",manager="slurm",port="1"} 4 # HELP ceems_scrape_collector_duration_seconds ceems_exporter: Duration of a collector scrape. # TYPE ceems_scrape_collector_duration_seconds gauge # HELP ceems_scrape_collector_success ceems_exporter: Whether a collector succeeded. diff --git a/pkg/collector/testdata/rdma b/pkg/collector/testdata/rdma new file mode 100755 index 00000000..eec4bd86 --- /dev/null +++ b/pkg/collector/testdata/rdma @@ -0,0 +1,118 @@ +#!/bin/sh + +sub_help(){ + echo "Usage: rdma [ OPTIONS ] OBJECT { COMMAND | help }" + echo " rdma [ -f[orce] ] -b[atch] filename" + echo "where OBJECT := { dev | link | resource | system | statistic | help }" + echo " OPTIONS := { -V[ersion] | -d[etails] | -j[son] | -p[retty] -r[aw]}" +} + +print_mr(){ + echo """dev mlx5_0 mrn 4 mrlen 2097152 pdn 9 pid 46231 comm ib_write_bw +dev mlx5_0 mrn 5 mrlen 2097152 pdn 8 pid 46235 comm ib_write_bw +dev mlx5_0 mrn 4 mrlen 2097152 pdn 9 pid 46236 comm ib_write_bw +dev mlx5_0 mrn 5 mrlen 2097152 pdn 8 pid 46281 comm ib_write_bw""" +} + +print_cq(){ + echo """dev mlx5_0 cqn 1 cqe 2047 users 5 poll-ctx WORKQUEUE adaptive-moderation on comm [ib_core] +dev mlx5_0 cqn 2 cqe 255 users 1 poll-ctx DIRECT adaptive-moderation on comm [mlx5_ib] +dev mlx5_0 cqn 3 cqe 255 users 0 poll-ctx DIRECT adaptive-moderation on comm [mlx5_ib] +dev mlx5_0 cqn 8 cqe 4095 users 32 adaptive-moderation off ctxn 4 pid 46231 comm ib_write_bw +dev mlx5_0 cqn 9 cqe 4095 users 32 adaptive-moderation off ctxn 5 pid 46235 comm ib_write_bw +dev mlx5_0 cqn 8 cqe 4095 users 32 adaptive-moderation off ctxn 4 pid 46236 comm ib_write_bw +dev mlx5_0 cqn 9 cqe 4095 users 32 adaptive-moderation off ctxn 5 pid 46281 comm ib_write_bw""" +} + +print_qp(){ + echo """link mlx5_0/- lqpn 0 type SMI state RTS sq-psn 0 comm [ib_core] +link mlx5_0/- lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core] +link mlx5_0/1 lqpn 101 type UD state RTS sq-psn 79 comm [ib_core] +link mlx5_0/1 lqpn 813 rqpn 814 type RC state RTS rq-psn 9940491 sq-psn 2406910 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 814 rqpn 813 type RC state RTR rq-psn 2406926 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 815 rqpn 816 type RC state RTR rq-psn 13129518 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 816 rqpn 815 type RC state RTS rq-psn 5560784 sq-psn 13129534 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 817 rqpn 818 type RC state RTR rq-psn 11593195 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 818 rqpn 817 type RC state RTS rq-psn 9218980 sq-psn 11593210 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 819 rqpn 820 type RC state RTR rq-psn 5734471 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 820 rqpn 819 type RC state RTS rq-psn 16423148 sq-psn 5734486 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 821 rqpn 822 type RC state RTR rq-psn 869801 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 822 rqpn 821 type RC state RTS rq-psn 9391558 sq-psn 869817 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 823 rqpn 824 type RC state RTR rq-psn 5156666 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 824 rqpn 823 type RC state RTS rq-psn 9298810 sq-psn 5156682 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 825 rqpn 826 type RC state RTR rq-psn 15415907 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 826 rqpn 825 type RC state RTS rq-psn 11846939 sq-psn 15415923 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 827 rqpn 828 type RC state RTR rq-psn 6749855 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 828 rqpn 827 type RC state RTS rq-psn 4257602 sq-psn 6749872 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 829 rqpn 830 type RC state RTR rq-psn 4637926 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 830 rqpn 829 type RC state RTS rq-psn 16710024 sq-psn 4637942 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 831 rqpn 832 type RC state RTR rq-psn 15710300 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 832 rqpn 831 type RC state RTS rq-psn 7371059 sq-psn 15710316 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 833 rqpn 834 type RC state RTR rq-psn 9654443 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 834 rqpn 833 type RC state RTS rq-psn 5445009 sq-psn 9654460 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 835 rqpn 836 type RC state RTR rq-psn 14796958 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 836 rqpn 835 type RC state RTS rq-psn 1943687 sq-psn 14796974 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 837 rqpn 838 type RC state RTR rq-psn 1242029 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 838 rqpn 837 type RC state RTS rq-psn 1082929 sq-psn 1242045 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 839 rqpn 840 type RC state RTR rq-psn 15154813 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 840 rqpn 839 type RC state RTS rq-psn 10133331 sq-psn 15154829 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 841 rqpn 842 type RC state RTR rq-psn 9704396 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 842 rqpn 841 type RC state RTS rq-psn 7626827 sq-psn 9704413 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw +link mlx5_0/1 lqpn 843 rqpn 844 type RC state RTR rq-psn 11722317 sq-psn 0 path-mig-state MIGRATED pdn 9 pid 46235 comm ib_write_bw +link mlx5_0/1 lqpn 844 rqpn 843 type RC state RTS rq-psn 16033001 sq-psn 11722333 path-mig-state MIGRATED pdn 8 pid 46231 comm ib_write_bw""" +} + +print_qp_stat(){ + echo """link mlx5_0/1 cntn 4 qp-type RC pid 46231 comm ib_write_bw rx_write_requests 0 rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 out_of_sequence 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0 + LQPN: <813,816,818,820,822,824,826,828,830,832,834,836,838,840,842,844> +link mlx5_0/1 cntn 5 qp-type RC pid 46235 comm ib_write_bw rx_write_requests 41988882 rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 out_of_sequence 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0 + LQPN: <814,815,817,819,821,823,825,827,829,831,833,835,837,839,841,843>""" +} + +sub_resource(){ + case $2 in + "mr") + print_mr + ;; + "cq") + print_cq + ;; + "qp") + print_qp + ;; + *) + shift + echo "Error: unknown subcommand for resource." >&2 + exit 1 + ;; + esac +} + +sub_statistic(){ + case $1 in + "qp") + print_qp_stat + ;; + *) + shift + echo "Error: unknown subcommand for statistic." >&2 + exit 1 + ;; + esac +} + +subcommand=$1 +case $subcommand in + "" | "-h" | "--help") + sub_help + ;; + *) + shift + sub_${subcommand} $@ + if [ $? = 127 ]; then + echo "Error: '$subcommand' is not a known subcommand." >&2 + echo " Run '$ProgName --help' for a list of known subcommands." >&2 + exit 1 + fi + ;; +esac diff --git a/pkg/collector/testdata/sys.ttar b/pkg/collector/testdata/sys.ttar index 323a6a24..a880bdf4 100644 --- a/pkg/collector/testdata/sys.ttar +++ b/pkg/collector/testdata/sys.ttar @@ -1,10 +1,643 @@ -# Archive created by ttar -C pkg/collector/fixtures -c -f pkg/collector/fixtures/sys.ttar sys +# Archive created by ttar -C pkg/collector/testdata -c -f pkg/collector/testdata/sys.ttar sys Directory: sys Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/hfi1_0 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/board_id +Lines: 1 +HPE 100Gb 1-port OP101 QSFP28 x16 PCIe Gen3 with Intel Omni-Path Adapter +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/fw_ver +Lines: 1 +1.27.0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/hfi1_0/ports +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/hfi1_0/ports/1 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/hfi1_0/ports/1/counters +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/VL15_dropped +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/excessive_buffer_overrun_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/link_downed +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/link_error_recovery +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/local_link_integrity_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_rcv_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_rcv_data +Lines: 1 +345091702026 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_rcv_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_rcv_packets +Lines: 1 +638036947 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_rcv_remote_physical_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_rcv_switch_relay_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_xmit_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_xmit_data +Lines: 1 +273558326543 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_xmit_discards +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_xmit_packets +Lines: 1 +568318856 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/port_xmit_wait +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/counters/symbol_error +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/phys_state +Lines: 1 +5: LinkUp +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/rate +Lines: 1 +100 Gb/sec (4X EDR) +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/hfi1_0/ports/1/state +Lines: 1 +4: ACTIVE +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx4_0 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/board_id +Lines: 1 +SM_1141000001000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/fw_ver +Lines: 1 +2.31.5050 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/hca_type +Lines: 1 +MT4099 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx4_0/ports +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx4_0/ports/1 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx4_0/ports/1/counters +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/VL15_dropped +Lines: 1 +0 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/excessive_buffer_overrun_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/link_downed +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/link_error_recovery +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/local_link_integrity_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_data +Lines: 1 +2221223609 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_packets +Lines: 1 +87169372 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_remote_physical_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_switch_relay_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_data +Lines: 1 +26509113295 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_discards +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_packets +Lines: 1 +85734114 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_wait +Lines: 1 +3599 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/counters/symbol_error +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/phys_state +Lines: 1 +5: LinkUp +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/rate +Lines: 1 +40 Gb/sec (4X QDR) +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/1/state +Lines: 1 +4: ACTIVE +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx4_0/ports/2 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx4_0/ports/2/counters +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/VL15_dropped +Lines: 1 +0 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/excessive_buffer_overrun_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/link_downed +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/link_error_recovery +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/local_link_integrity_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_data +Lines: 1 +2460436784 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_packets +Lines: 1 +89332064 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_remote_physical_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_switch_relay_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_data +Lines: 1 +26540356890 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_discards +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_packets +Lines: 1 +88622850 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_wait +Lines: 1 +3846 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/counters/symbol_error +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/phys_state +Lines: 1 +5: LinkUp +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/rate +Lines: 1 +40 Gb/sec (4X QDR) +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx4_0/ports/2/state +Lines: 1 +4: ACTIVE +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx5_0 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/board_id +Lines: 1 +SM_2001000001034 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/fw_ver +Lines: 1 +14.28.2006 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/hca_type +Lines: 1 +MT4118 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx5_0/ports +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx5_0/ports/1 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx5_0/ports/1/counters +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/VL15_dropped +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/excessive_buffer_overrun_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/link_downed +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/link_error_recovery +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/local_link_integrity_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/multicast_rcv_packets +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/multicast_xmit_packets +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_rcv_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_rcv_data +Lines: 1 +18126345378 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_rcv_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_rcv_packets +Lines: 1 +541889824 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_rcv_remote_physical_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_rcv_switch_relay_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_xmit_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_xmit_data +Lines: 1 +2880761508848 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_xmit_discards +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_xmit_packets +Lines: 1 +10907922116 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/port_xmit_wait +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/symbol_error +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/unicast_rcv_packets +Lines: 1 +541889824 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/counters/unicast_xmit_packets +Lines: 1 +10907922116 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/infiniband/mlx5_0/ports/1/hw_counters +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/duplicate_request +Lines: 1 +41 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/implied_nak_seq_err +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/lifespan +Lines: 1 +10 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/local_ack_timeout_err +Lines: 1 +131 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/out_of_buffer +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/out_of_sequence +Lines: 1 +1 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/packet_seq_err +Lines: 1 +1 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/req_cqe_error +Lines: 1 +3481 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/req_cqe_flush_error +Lines: 1 +80 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/req_remote_access_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/req_remote_invalid_request +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/resp_cqe_error +Lines: 1 +8109 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/resp_cqe_flush_error +Lines: 1 +4708 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/resp_local_length_error +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/resp_remote_access_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/rnr_nak_retry_err +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/roce_adp_retrans +Lines: 1 +99 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/roce_adp_retrans_to +Lines: 1 +4 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/roce_slow_restart +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/roce_slow_restart_cnps +Lines: 1 +131 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/roce_slow_restart_trans +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/rx_atomic_requests +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/rx_dct_connect +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/rx_read_requests +Lines: 1 +175528982 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/hw_counters/rx_write_requests +Lines: 1 +742114 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/phys_state +Lines: 1 +4: ACTIVE +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/rate +Lines: 1 +25 Gb/sec (1X EDR) +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/infiniband/mlx5_0/ports/1/state +Lines: 1 +4: ACTIVE +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/powercap Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index ff88c762..7bab3f0b 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -349,6 +349,8 @@ then --collector.slurm.gpu-type="nvidia" \ --collector.slurm.nvidia-smi-path="pkg/collector/testdata/nvidia-smi" \ --collector.slurm.gpu-job-map-path="pkg/collector/testdata/gpujobmap" \ + --collector.rdma.stats \ + --collector.rdma.cmd="pkg/collector/testdata/rdma" \ --collector.empty-hostname-label \ --collector.ipmi_dcmi.test-mode \ --web.listen-address "127.0.0.1:${port}" \ diff --git a/website/cspell.json b/website/cspell.json index 1206cf0e..a7e8404c 100644 --- a/website/cspell.json +++ b/website/cspell.json @@ -51,7 +51,8 @@ "ebpf", "cpus", "memsw", - "retrans" + "retrans", + "Mellanox" ], // flagWords - list of words to be always considered incorrect // This is useful for offensive words and common spelling errors. diff --git a/website/docs/components/ceems-exporter.md b/website/docs/components/ceems-exporter.md index 07f2cc32..3ff0cd87 100644 --- a/website/docs/components/ceems-exporter.md +++ b/website/docs/components/ceems-exporter.md @@ -26,6 +26,7 @@ metrics like IO, networking, performance _etc_. Currently available sub-collecto - Perf sub-collector: Exports hardware, software and cache performance metrics - eBPF sub-collector: Exports IO and network related metrics +- RDMA sub-collector: Exports selected RDMA stats These sub-collectors are not meant to work alone and they can enabled only when a main collector that monitors resource manager's compute units is activated. @@ -177,6 +178,53 @@ per protocol (TCP/UDP) and per IP family (IPv4/IPv6). - Number of retransmission bytes (only for TCP) - Number of retransmission packets (only for TCP) +### RDMA sub-collector + +Data transfer in RDMA happens directly between RDMA NIC and remote machine memory bypassing +CPU. Thus, it is hard to trace the RDMA's data transfer on a compute unit granularity. However, +the system wide data transfer metrics are readily available at `/sys/class/infiniband` +pseudo-filesystem. Thus, this sub-collector exports important system wide RDMA stats along +with few low-level metrics on a compute unit level. + +#### System wide RDMA stats + +- Number of data octets received on all links +- Number of data octets transmitted on all links +- Number of packets received on all VLs by this port (including errors) +- Number of packets transmitted on all VLs from this port (including errors) +- Number of packets received on the switch physical port that are discarded +- Number of packets not transmitted from the switch physical port +- Number of inbound packets discarded by the port because the port is down or congested +- Number of outbound packets discarded by the port because the port is down or congested +- Number of packets containing an error that were received on this port +- State of the InfiniBand port + +#### Per compute unit RDMA stats + +- Number of active Queue Pairs (QPs) +- Number of active Completion Queues (CQs) +- Number of active Memory Regions (MRs) +- Length of active CQs +- Length of active MRs + +In the case of Mellanox devices, following metrics are available for each compute unit: + +- Number of received write requests for the associated QPs +- Number of received read requests for the associated QPs +- Number of received atomic request for the associated QPs +- Number of times requester detected CQEs completed with errors +- Number of times requester detected CQEs completed with flushed errors +- Number of times requester detected remote access errors +- Number of times requester detected remote invalid request errors +- Number of times responder detected CQEs completed with errors +- Number of times responder detected CQEs completed with flushed errors +- Number of times responder detected local length errors +- Number of times responder detected remote access errors + +In order to interpret these metrics, please take a look at this +[very nice blog](https://cuterwrite.top/en/p/rdma-element/) which explains internals +of RDMA very well. + ## Collectors ### Slurm collector diff --git a/website/docs/components/metrics.md b/website/docs/components/metrics.md index b322ecf7..af99094b 100644 --- a/website/docs/components/metrics.md +++ b/website/docs/components/metrics.md @@ -79,3 +79,29 @@ shows the collector that metric belongs to. | ebpf | ceems_ebpf_egress_bytes_total | manager, uuid, proto, family | Total number of egress bytes of protocol `proto` and family `family` by compute unit identified by label `uuid`. | | ebpf | ceems_ebpf_retrans_packets_total | manager, uuid, proto, family | Total number of retransmission packets of protocol `proto` and family `family` by compute unit identified by label `uuid` (Only for TCP). | | ebpf | ceems_ebpf_retrans_bytes_total | manager, uuid, proto, family | Total number of retransmission bytes of protocol `proto` and family `family` by compute unit identified by label `uuid`. | +| rdma | ceems_rdma_port_constraint_errors_received_total | manager, device, port | Total number of packets received on the switch physical port that are discarded (system-wide metric). | +| rdma | ceems_rdma_port_constraint_errors_transmitted_total | manager, device, port | Total number of packets not transmitted from the switch physical port (system-wide metric). | +| rdma | ceems_rdma_port_data_received_bytes_total | manager, device, port | Total number of data octets received on all links (system-wide metric). | +| rdma | ceems_rdma_port_data_transmitted_bytes_total | manager, device, port | Total number of data octets transmitted on all links (system-wide metric). | +| rdma | ceems_rdma_port_discards_received_total | manager, device, port | Total number of inbound packets discarded by the port because the port is down or congested (system-wide metric). | +| rdma | ceems_rdma_port_discards_transmitted_total | manager, device, port | Total number of outbound packets discarded by the port because the port is down or congested (system-wide metric). | +| rdma | ceems_rdma_port_errors_received_total | manager, device, port | Total number of packets containing an error that were received on this port (system-wide metric). | +| rdma | ceems_rdma_port_packets_received_total | manager, device, port | Total number of packets received on all VLs by this port (including errors) (system-wide metric). | +| rdma | ceems_rdma_port_packets_transmitted_total | manager, device, port | Total number of packets transmitted on all VLs from this port (including errors). | +| rdma | ceems_rdma_state_id | manager, device, port | State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer). | +| rdma | ceems_rdma_rx_write_requests | manager, uuid, device, port | Total number of received write requests for the associated QPs for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_rx_read_requests | manager, uuid, device, port | Total number of Number of received read requests for the associated QPs for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_rx_atomic_requests | manager, uuid, device, port | Total number of received atomic request for the associated QPs for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_req_cqe_error | manager, uuid, device, port | Total number of times requester detected CQEs completed with errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_req_cqe_flush_error | manager, uuid, device, port | Total number of times requester detected CQEs completed with flushed errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_req_remote_access_errors | manager, uuid, device, port | Total number of times requester detected remote access errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_req_remote_invalid_request | manager, uuid, device, port | Total number of times requester detected remote invalid request errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_resp_cqe_error | manager, uuid, device, port | Total number of times responder detected CQEs completed with errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_resp_cqe_flush_error | manager, uuid, device, port | Total number of times responder detected CQEs completed with flushed errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_resp_local_length_error | manager, uuid, device, port | Total number of times responder detected local length errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_resp_remote_access_errors | manager, uuid, device, port | Total number of times responder detected remote access errors for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_qps_active | manager, uuid, device, port | Total number of active QPs for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_cqs_active | manager, uuid, device, port | Total number of active CQs for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_mrs_active | manager, uuid, device, port | Total number of active MRs for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_cqe_len_active | manager, uuid, device, port | Total Length of active CQEs for device `device` and compute unit identified by label `uuid`. | +| rdma | ceems_rdma_mrs_len_active | manager, uuid, device, port | Total Length of active MRs for device `device` and compute unit identified by label `uuid`. | diff --git a/website/md-link-check.json b/website/md-link-check.json index 83f30a88..311f8a35 100644 --- a/website/md-link-check.json +++ b/website/md-link-check.json @@ -18,6 +18,9 @@ }, { "pattern": "https://tbhaxor.com/understanding-linux-capabilities/" + }, + { + "pattern": "https://cuterwrite.top/en/p/rdma-element/" } ], "replacementPatterns": [