diff --git a/etc/slurm/README.md b/etc/slurm/README.md index 82d8b20a..b6d55ac5 100644 --- a/etc/slurm/README.md +++ b/etc/slurm/README.md @@ -1,22 +1,22 @@ # SLURM epilog and prolog scripts -CEEMS exporter needs to perform few privileged actions to collect certain information of +CEEMS exporter needs to perform few privileged actions to collect certain information of compute units. An example [systemd service file](https://github.com/mahendrapaipuri/ceems/blob/main/build/package/ceems_exporter/ceems_exporter.service) -provided in the repo shows the linux capabilities necessary for these privileged actions. +provided in the repo shows the linux capabilities necessary for these privileged actions. -If the operators would like to avoid privileges on CEEMS exporter and run it fully in -userland an alternative approach, in SLURM context, is to use Epilog and Prolog scripts -to write the necessary job information to a file that is readable by CEEMS exporter. +If the operators would like to avoid privileges on CEEMS exporter and run it fully in +userland an alternative approach, in SLURM context, is to use Epilog and Prolog scripts +to write the necessary job information to a file that is readable by CEEMS exporter. This directory provides those scripts that should be used with SLURM. -An example [systemd service file](https://github.com/mahendrapaipuri/ceems/blob/main/init/systemd/ceems_exporter_no_privs.service) -is also provided in the repo that can be used along with these prolog and epilog scripts. +An example [systemd service file](https://github.com/mahendrapaipuri/ceems/blob/main/init/systemd/ceems_exporter_no_privs.service) +is also provided in the repo that can be used along with these prolog and epilog scripts. > [!IMPORTANT] -> The CLI arguments `--collector.slurm.job.props.path` and `--collector.slurm.gpu.job.map.path` -are hidden and cannot be seen in `ceems_exporter --help` output. However, these arguments +> The CLI arguments `--collector.slurm.job-props-path` and `--collector.slurm.gpu-job-map-path` +are hidden and cannot be seen in `ceems_exporter --help` output. However, these arguments exists in the exporter and can be used. -Even with such prolog and epilog scripts, operators should grant the user running CEEMS -exporter permissions to run `ipmi-dcmi` command as this command can be executable by only -`root` by default. +Even with such prolog and epilog scripts, operators should grant the user running CEEMS +exporter permissions to run `ipmi-dcmi` command as this command can be executable by only +`root` by default. diff --git a/etc/slurm/prolog.d/gpujobmap.sh b/etc/slurm/prolog.d/gpujobmap.sh index 68c3e5f2..6f5daed2 100755 --- a/etc/slurm/prolog.d/gpujobmap.sh +++ b/etc/slurm/prolog.d/gpujobmap.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Need to use this path in --collector.nvidia.gpu.job.map.path flag for ceems_exporter +# Need to use this path in --collector.slurm.gpu-job-map-path flag for ceems_exporter DEST=/run/gpujobmap [ -e $DEST ] || mkdir -m 755 $DEST diff --git a/etc/slurm/prolog.d/slurmjobprops.sh b/etc/slurm/prolog.d/slurmjobprops.sh index 49513988..c49d6e19 100755 --- a/etc/slurm/prolog.d/slurmjobprops.sh +++ b/etc/slurm/prolog.d/slurmjobprops.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Need to use this path in --collector.slurm.job.props.path flag for ceems_exporter +# Need to use this path in --collector.slurm.job-props-path flag for ceems_exporter DEST=/run/slurmjobprops [ -e $DEST ] || mkdir -m 755 $DEST diff --git a/pkg/collector/cli.go b/pkg/collector/cli.go index b93e861c..6c438906 100644 --- a/pkg/collector/cli.go +++ b/pkg/collector/cli.go @@ -2,8 +2,9 @@ package collector import ( "fmt" + std_log "log" "net/http" - _ "net/http/pprof" // #nosec + "net/http/pprof" "os" "os/user" "runtime" @@ -97,12 +98,20 @@ func (b *CEEMSExporter) Main() error { maxProcs = b.App.Flag( "runtime.gomaxprocs", "The target number of CPUs Go will run on (GOMAXPROCS)", ).Envar("GOMAXPROCS").Default("1").Int() + enableDebugServer = b.App.Flag( + "web.debug-server", + "Enable debug server (default: disabled).", + ).Default("false").Bool() + debugServerAddr = b.App.Flag( + "web.debug-server.listen-address", + "Address on which debug server will be exposed. Running debug server on localhost is strongly recommended.", + ).Default("localhost:8010").String() toolkitFlags = kingpinflag.AddFlags(&b.App, ":9010") ) // This is hidden flag only used for e2e testing emptyHostnameLabel = b.App.Flag( - "collector.empty.hostname.label", + "collector.empty-hostname-label", "Use empty hostname in labels. Only for testing. (default is disabled)", ).Hidden().Default("false").Bool() @@ -145,6 +154,33 @@ func (b *CEEMSExporter) Main() error { runtime.GOMAXPROCS(*maxProcs) level.Debug(logger).Log("msg", "Go MAXPROCS", "procs", runtime.GOMAXPROCS(0)) + // Reset default routes (removing access to profiling) + http.DefaultServeMux = http.NewServeMux() + + if *enableDebugServer { + // Recreating routes to profiling manually + pprofServeMux := http.NewServeMux() + pprofServeMux.HandleFunc("/debug/pprof/", pprof.Index) + pprofServeMux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + pprofServeMux.HandleFunc("/debug/pprof/profile", pprof.Profile) + pprofServeMux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + + go func() { + debugServer := &http.Server{ + // slowloris attack: https://app.deepsource.com/directory/analyzers/go/issues/GO-S2112 + ReadHeaderTimeout: 2 * time.Second, + // Only use routes for the profiling interface + Handler: pprofServeMux, + // Exposing them on loopback on a specific port for debbuging access + Addr: *debugServerAddr, + } + + if err := debugServer.ListenAndServe(); err != nil { + std_log.Println("Failed to start debug server", "err", err) + } + }() + } + http.Handle(*metricsPath, b.newHandler(!*disableExporterMetrics, *maxRequests, logger)) if *metricsPath != "/" { diff --git a/pkg/collector/ipmi.go b/pkg/collector/ipmi.go index 92c62d31..516bb4f0 100644 --- a/pkg/collector/ipmi.go +++ b/pkg/collector/ipmi.go @@ -123,9 +123,13 @@ type impiCollector struct { */ var ( - ipmiDcmiCmd = CEEMSExporterApp.Flag( + ipmiDcmiCmdDepr = CEEMSExporterApp.Flag( "collector.ipmi.dcmi.cmd", "IPMI DCMI command to get system power statistics. Use full path to executables.", + ).Hidden().Default("").String() + ipmiDcmiCmd = CEEMSExporterApp.Flag( + "collector.ipmi_dcmi.cmd", + "IPMI DCMI command to get system power statistics. Use full path to executables.", ).Default("").String() ipmiDcmiCmds = []string{ @@ -165,6 +169,10 @@ func init() { // NewIPMICollector returns a new Collector exposing IMPI DCMI power metrics. func NewIPMICollector(logger log.Logger) (Collector, error) { + if *ipmiDcmiCmdDepr != "" { + level.Warn(logger).Log("msg", "flag --collector.ipmi.dcmi.cmd has been deprecated. Use --collector.ipmi_dcmi.cmd instead.") + } + var execMode string // Initialize metricDesc map @@ -191,10 +199,14 @@ func NewIPMICollector(logger log.Logger) (Collector, error) { // If no IPMI command is provided, try to find one var cmdSlice []string - if *ipmiDcmiCmd == "" { + if *ipmiDcmiCmd == "" && *ipmiDcmiCmdDepr == "" { cmdSlice = findIPMICmd() } else { - cmdSlice = strings.Split(*ipmiDcmiCmd, " ") + if *ipmiDcmiCmdDepr != "" { + cmdSlice = strings.Split(*ipmiDcmiCmdDepr, " ") + } else { + cmdSlice = strings.Split(*ipmiDcmiCmd, " ") + } } level.Debug(logger).Log( diff --git a/pkg/collector/meminfo.go b/pkg/collector/meminfo.go index ca1f1ae3..693fee20 100644 --- a/pkg/collector/meminfo.go +++ b/pkg/collector/meminfo.go @@ -25,10 +25,16 @@ type meminfoCollector struct { hostname string } -var meminfoAllStatistics = CEEMSExporterApp.Flag( - "collector.meminfo.all.stats", - "Enable collecting all meminfo stats (default is disabled).", -).Default("false").Bool() +var ( + meminfoAllStatisticsDepr = CEEMSExporterApp.Flag( + "collector.meminfo.all.stats", + "Enable collecting all meminfo stats (default: disabled).", + ).Hidden().Default("false").Bool() + meminfoAllStatistics = CEEMSExporterApp.Flag( + "collector.meminfo.all-stats", + "Enable collecting all meminfo stats (default: disabled).", + ).Default("false").Bool() +) func init() { RegisterCollector(memInfoSubsystem, defaultEnabled, NewMeminfoCollector) @@ -36,6 +42,10 @@ func init() { // NewMeminfoCollector returns a new Collector exposing memory stats. func NewMeminfoCollector(logger log.Logger) (Collector, error) { + if *meminfoAllStatisticsDepr { + level.Warn(logger).Log("msg", "flag --collector.meminfo.all.stats has been deprecated. Use --collector.meminfo.all-stats instead") + } + return &meminfoCollector{ logger: logger, hostname: hostname, @@ -56,7 +66,7 @@ func (c *meminfoCollector) Update(ch chan<- prometheus.Metric) error { // Export only MemTotal, MemFree and MemAvailable fields if meminfoAllStatistics is false var memInfoStats map[string]float64 - if *meminfoAllStatistics { + if *meminfoAllStatistics || *meminfoAllStatisticsDepr { memInfoStats = memInfo } else { memInfoStats = map[string]float64{ diff --git a/pkg/collector/slurm.go b/pkg/collector/slurm.go index b1af4b1c..2ae9aae6 100644 --- a/pkg/collector/slurm.go +++ b/pkg/collector/slurm.go @@ -31,43 +31,51 @@ const ( ) var ( - metricLock = sync.RWMutex{} - collectSwapMemoryStats = CEEMSExporterApp.Flag( + metricLock = sync.RWMutex{} + collectSwapMemoryStatsDepre = CEEMSExporterApp.Flag( "collector.slurm.swap.memory.metrics", "Enables collection of swap memory metrics (default: disabled)", + ).Default("false").Hidden().Bool() + collectSwapMemoryStats = CEEMSExporterApp.Flag( + "collector.slurm.swap-memory-metrics", + "Enables collection of swap memory metrics (default: disabled)", ).Default("false").Bool() - collectPSIStats = CEEMSExporterApp.Flag( + collectPSIStatsDepre = CEEMSExporterApp.Flag( "collector.slurm.psi.metrics", "Enables collection of PSI metrics (default: disabled)", + ).Default("false").Hidden().Bool() + collectPSIStats = CEEMSExporterApp.Flag( + "collector.slurm.psi-metrics", + "Enables collection of PSI metrics (default: disabled)", ).Default("false").Bool() useJobIDHash = CEEMSExporterApp.Flag( - "collector.slurm.create.unique.jobids", + "collector.slurm.create-unique-jobids", `Enables calculation of a unique hash based job UUID (default: disabled). UUID is calculated based on SLURM_JOBID, SLURM_JOB_USER, SLURM_JOB_ACCOUNT, SLURM_JOB_NODELIST.`, ).Default("false").Hidden().Bool() gpuType = CEEMSExporterApp.Flag( - "collector.slurm.gpu.type", + "collector.slurm.gpu-type", "GPU device type. Currently only nvidia and amd devices are supported.", - ).Enum("nvidia", "amd") + ).Hidden().Enum("nvidia", "amd") jobStatPath = CEEMSExporterApp.Flag( - "collector.slurm.job.props.path", + "collector.slurm.job-props-path", `Directory containing files with job properties. Files should be named after SLURM_JOBID with contents as "$SLURM_JOB_USER $SLURM_JOB_ACCOUNT $SLURM_JOB_NODELIST" in the same order.`, ).Default("/run/slurmjobprops").Hidden().String() gpuStatPath = CEEMSExporterApp.Flag( - "collector.slurm.gpu.job.map.path", + "collector.slurm.gpu-job-map-path", "Path to file that maps GPU ordinals to job IDs.", ).Default("/run/gpujobmap").Hidden().String() forceCgroupsVersion = CEEMSExporterApp.Flag( - "collector.slurm.force.cgroups.version", + "collector.slurm.force-cgroups-version", "Set cgroups version manually. Used only for testing.", ).Hidden().Enum("v1", "v2") nvidiaSmiPath = CEEMSExporterApp.Flag( - "collector.slurm.nvidia.smi.path", + "collector.slurm.nvidia-smi-path", "Absolute path to nvidia-smi binary. Use only for testing.", ).Hidden().Default("").String() rocmSmiPath = CEEMSExporterApp.Flag( - "collector.slurm.rocm.smi.path", + "collector.slurm.rocm-smi-path", "Absolute path to rocm-smi binary. Use only for testing.", ).Hidden().Default("").String() ) @@ -161,6 +169,15 @@ func init() { // NewSlurmCollector returns a new Collector exposing a summary of cgroups. func NewSlurmCollector(logger log.Logger) (Collector, error) { + // Log deprecation notices + if *collectPSIStatsDepre { + level.Warn(logger).Log("msg", "flag --collector.slurm.psi.metrics has been deprecated. Use --collector.slurm.psi-metrics instead") + } + + if *collectSwapMemoryStatsDepre { + level.Warn(logger).Log("msg", "flag --collector.slurm.swap.memory.metrics has been deprecated. Use --collector.slurm.swap-memory-metrics instead") + } + var cgroupsVersion string var cgroupsRootPath string @@ -196,13 +213,25 @@ func NewSlurmCollector(logger log.Logger) (Collector, error) { } // Attempt to get GPU devices + var gpuTypes []string + var gpuDevs map[int]Device var err error - gpuDevs, err = GetGPUDevices(*gpuType, logger) - if err == nil { - level.Info(logger).Log("msg", "GPU devices found") + if *gpuType != "" { + gpuTypes = []string{*gpuType} + } else { + gpuTypes = []string{"nvidia", "amd"} + } + + for _, gpuType := range gpuTypes { + gpuDevs, err = GetGPUDevices(gpuType, logger) + if err == nil { + level.Info(logger).Log("msg", "GPU devices found", "type", gpuType) + + break + } } // Get total memory of host @@ -405,13 +434,13 @@ func (c *slurmCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric(c.jobMemoryFailCount, prometheus.GaugeValue, m.memoryFailCount, c.manager, c.hostname, m.jobuser, m.jobaccount, m.jobuuid) // PSI stats. Push them only if they are available - if *collectSwapMemoryStats { + if *collectSwapMemoryStatsDepre || *collectSwapMemoryStats { ch <- prometheus.MustNewConstMetric(c.jobMemswUsed, prometheus.GaugeValue, m.memswUsed, c.manager, c.hostname, m.jobuser, m.jobaccount, m.jobuuid) ch <- prometheus.MustNewConstMetric(c.jobMemswTotal, prometheus.GaugeValue, m.memswTotal, c.manager, c.hostname, m.jobuser, m.jobaccount, m.jobuuid) ch <- prometheus.MustNewConstMetric(c.jobMemswFailCount, prometheus.GaugeValue, m.memswFailCount, c.manager, c.hostname, m.jobuser, m.jobaccount, m.jobuuid) } - if *collectPSIStats { + if *collectPSIStatsDepre || *collectPSIStats { ch <- prometheus.MustNewConstMetric(c.jobCPUPressure, prometheus.GaugeValue, m.cpuPressure, c.manager, c.hostname, m.jobuser, m.jobaccount, m.jobuuid) ch <- prometheus.MustNewConstMetric(c.jobMemoryPressure, prometheus.GaugeValue, m.memoryPressure, c.manager, c.hostname, m.jobuser, m.jobaccount, m.jobuuid) } diff --git a/pkg/collector/slurm_test.go b/pkg/collector/slurm_test.go index 473bf7fb..e3ce09c6 100644 --- a/pkg/collector/slurm_test.go +++ b/pkg/collector/slurm_test.go @@ -30,9 +30,9 @@ func TestNewSlurmCollector(t *testing.T) { _, err := CEEMSExporterApp.Parse( []string{ "--path.cgroupfs", "testdata/sys/fs/cgroup", - "--collector.slurm.create.unique.jobids", - "--collector.slurm.job.props.path", "testdata/slurmjobprops", - "--collector.slurm.gpu.job.map.path", "testdata/gpujobmap", + "--collector.slurm.create-unique-jobids", + "--collector.slurm.job-props-path", "testdata/slurmjobprops", + "--collector.slurm.gpu-job-map-path", "testdata/gpujobmap", }, ) require.NoError(t, err) @@ -45,9 +45,9 @@ func TestCgroupsV2SlurmJobMetrics(t *testing.T) { _, err := CEEMSExporterApp.Parse( []string{ "--path.cgroupfs", "testdata/sys/fs/cgroup", - "--collector.slurm.create.unique.jobids", - "--collector.slurm.job.props.path", "testdata/slurmjobprops", - "--collector.slurm.gpu.job.map.path", "testdata/gpujobmap", + "--collector.slurm.create-unique-jobids", + "--collector.slurm.job-props-path", "testdata/slurmjobprops", + "--collector.slurm.gpu-job-map-path", "testdata/gpujobmap", }, ) require.NoError(t, err) @@ -95,7 +95,7 @@ func TestCgroupsV2SlurmJobMetricsWithProcFs(t *testing.T) { _, err := CEEMSExporterApp.Parse( []string{ "--path.cgroupfs", "testdata/sys/fs/cgroup", - "--collector.slurm.create.unique.jobids", + "--collector.slurm.create-unique-jobids", "--path.procfs", "testdata/proc", }, ) @@ -144,7 +144,7 @@ func TestCgroupsV2SlurmJobMetricsNoJobProps(t *testing.T) { _, err := CEEMSExporterApp.Parse( []string{ "--path.cgroupfs", "testdata/sys/fs/cgroup", - "--collector.slurm.create.unique.jobids", + "--collector.slurm.create-unique-jobids", }, ) require.NoError(t, err) @@ -191,8 +191,8 @@ func TestCgroupsV1SlurmJobMetrics(t *testing.T) { []string{ "--path.cgroupfs", "testdata/sys/fs/cgroup", "--path.procfs", "testdata/proc", - "--collector.slurm.create.unique.jobids", - "--collector.slurm.job.props.path", "testdata/slurmjobprops", + "--collector.slurm.create-unique-jobids", + "--collector.slurm.job-props-path", "testdata/slurmjobprops", }, ) require.NoError(t, err) diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index 639cf058..278cc996 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -300,14 +300,14 @@ then --path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \ --path.procfs="pkg/collector/testdata/proc" \ --collector.slurm \ - --collector.slurm.create.unique.jobids \ - --collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \ - --collector.slurm.gpu.type="nvidia" \ - --collector.slurm.nvidia.smi.path="pkg/collector/testdata/nvidia-smi" \ - --collector.slurm.force.cgroups.version="v1" \ - --collector.slurm.gpu.job.map.path="pkg/collector/testdata/gpujobmap" \ + --collector.slurm.create-unique-jobids \ + --collector.slurm.job-props-path="pkg/collector/testdata/slurmjobprops" \ + --collector.slurm.gpu-type="nvidia" \ + --collector.slurm.nvidia-smi-path="pkg/collector/testdata/nvidia-smi" \ + --collector.slurm.force-cgroups-version="v1" \ + --collector.slurm.gpu-job-map-path="pkg/collector/testdata/gpujobmap" \ --collector.ipmi.dcmi.cmd="pkg/collector/testdata/ipmi/freeipmi/ipmi-dcmi" \ - --collector.empty.hostname.label \ + --collector.empty-hostname-label \ --web.listen-address "127.0.0.1:${port}" \ --log.level="debug" > "${logfile}" 2>&1 & @@ -318,12 +318,12 @@ then --path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \ --path.procfs="pkg/collector/testdata/proc" \ --collector.slurm \ - --collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \ - --collector.slurm.gpu.type="nvidia" \ - --collector.slurm.nvidia.smi.path="pkg/collector/testdata/nvidia-smi" \ - --collector.slurm.force.cgroups.version="v2" \ - --collector.slurm.gpu.job.map.path="pkg/collector/testdata/gpujobmap" \ - --collector.empty.hostname.label \ + --collector.slurm.job-props-path="pkg/collector/testdata/slurmjobprops" \ + --collector.slurm.gpu-type="nvidia" \ + --collector.slurm.nvidia-smi-path="pkg/collector/testdata/nvidia-smi" \ + --collector.slurm.force-cgroups-version="v2" \ + --collector.slurm.gpu-job-map-path="pkg/collector/testdata/gpujobmap" \ + --collector.empty-hostname-label \ --web.listen-address "127.0.0.1:${port}" \ --log.level="debug" > "${logfile}" 2>&1 & @@ -334,13 +334,13 @@ then --path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \ --path.procfs="pkg/collector/testdata/proc" \ --collector.slurm \ - --collector.slurm.create.unique.jobids \ - --collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \ - --collector.slurm.gpu.type="amd" \ - --collector.slurm.rocm.smi.path="pkg/collector/testdata/rocm-smi" \ - --collector.slurm.force.cgroups.version="v2" \ - --collector.slurm.gpu.job.map.path="pkg/collector/testdata/gpujobmap" \ - --collector.empty.hostname.label \ + --collector.slurm.create-unique-jobids \ + --collector.slurm.job-props-path="pkg/collector/testdata/slurmjobprops" \ + --collector.slurm.gpu-type="amd" \ + --collector.slurm.rocm-smi-path="pkg/collector/testdata/rocm-smi" \ + --collector.slurm.force-cgroups-version="v2" \ + --collector.slurm.gpu-job-map-path="pkg/collector/testdata/gpujobmap" \ + --collector.empty-hostname-label \ --web.listen-address "127.0.0.1:${port}" \ --log.level="debug" > "${logfile}" 2>&1 & @@ -351,10 +351,10 @@ then --path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \ --path.procfs="pkg/collector/testdata/proc" \ --collector.slurm \ - --collector.slurm.create.unique.jobids \ - --collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \ - --collector.slurm.force.cgroups.version="v2" \ - --collector.empty.hostname.label \ + --collector.slurm.create-unique-jobids \ + --collector.slurm.job-props-path="pkg/collector/testdata/slurmjobprops" \ + --collector.slurm.force-cgroups-version="v2" \ + --collector.empty-hostname-label \ --web.listen-address "127.0.0.1:${port}" \ --log.level="debug" > "${logfile}" 2>&1 & @@ -365,12 +365,12 @@ then --path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \ --path.procfs="pkg/collector/testdata/proc" \ --collector.slurm \ - --collector.slurm.create.unique.jobids \ - --collector.slurm.gpu.type="nvidia" \ - --collector.slurm.nvidia.smi.path="pkg/collector/testdata/nvidia-smi" \ - --collector.slurm.force.cgroups.version="v2" \ + --collector.slurm.create-unique-jobids \ + --collector.slurm.gpu-type="nvidia" \ + --collector.slurm.nvidia-smi-path="pkg/collector/testdata/nvidia-smi" \ + --collector.slurm.force-cgroups-version="v2" \ --collector.ipmi.dcmi.cmd="pkg/collector/testdata/ipmi/ipmiutils/ipmiutil" \ - --collector.empty.hostname.label \ + --collector.empty-hostname-label \ --web.listen-address "127.0.0.1:${port}" \ --log.level="debug" > "${logfile}" 2>&1 & @@ -381,16 +381,16 @@ then --path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \ --path.procfs="pkg/collector/testdata/proc" \ --collector.slurm \ - --collector.slurm.create.unique.jobids \ - --collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \ - --collector.slurm.gpu.type="amd" \ - --collector.slurm.rocm.smi.path="pkg/collector/testdata/rocm-smi" \ - --collector.slurm.force.cgroups.version="v2" \ - --collector.slurm.gpu.job.map.path="pkg/collector/testdata/gpujobmap" \ + --collector.slurm.create-unique-jobids \ + --collector.slurm.job-props-path="pkg/collector/testdata/slurmjobprops" \ + --collector.slurm.gpu-type="amd" \ + --collector.slurm.rocm-smi-path="pkg/collector/testdata/rocm-smi" \ + --collector.slurm.force-cgroups-version="v2" \ + --collector.slurm.gpu-job-map-path="pkg/collector/testdata/gpujobmap" \ --collector.slurm.swap.memory.metrics \ --collector.slurm.psi.metrics \ --collector.ipmi.dcmi.cmd="pkg/collector/testdata/ipmi/capmc/capmc" \ - --collector.empty.hostname.label \ + --collector.empty-hostname-label \ --web.listen-address "127.0.0.1:${port}" \ --log.level="debug" > "${logfile}" 2>&1 & fi diff --git a/website/docs/configuration/ceems-exporter.md b/website/docs/configuration/ceems-exporter.md index ec93e700..dbf69774 100644 --- a/website/docs/configuration/ceems-exporter.md +++ b/website/docs/configuration/ceems-exporter.md @@ -4,44 +4,55 @@ sidebar_position: 2 # CEEMS Exporter -Different collectors of CEEMS exporter are briefed earlier in -[Components](../components/ceems-exporter.md) section. Some of these collectors need -privileges to collect metrics. Current list of collectors that need privileges are +Different collectors of CEEMS exporter are briefed earlier in +[Components](../components/ceems-exporter.md) section. Some of these collectors need +privileges to collect metrics. Current list of collectors that need privileges are listed below. +:::important[IMPORTANT] + +Starting from `v0.3.0`, the following CLI flags have been slightly modified to have +a consistent styling. They will be removed in `v1.0.0`. + +- `--collector.slurm.swap.memory.metrics` changed to `--collector.slurm.swap-memory-metrics` +- `--collector.slurm.psi.metrics` changed to `--collector.slurm.psi-metrics` +- `--collector.meminfo.all.stats` changed to `--collector.meminfo.all-stats` +- `--collector.ipmi.dcmi.cmd` changed to `--collector.ipmi_dcmi.cmd` + +::: + ## Slurm collector -Although fetching metrics from cgroups do not need any additional privileges, getting -GPU ordinal to job ID needs extra privileges. This is due to the fact that this -information is not readily available in cgroups (at least in v2 where devices are -bound to cgroups using BPF programs). Currently, the exporter supports two different -ways to get the GPU ordinals to job ID map. +Although fetching metrics from cgroups do not need any additional privileges, getting +GPU ordinal to job ID needs extra privileges. This is due to the fact that this +information is not readily available in cgroups (at least in v2 where devices are +bound to cgroups using BPF programs). Currently, the exporter supports two different +ways to get the GPU ordinals to job ID map. -- Reading environment variables `SLURM_STEP_GPUS` and/or `SLURM_JOB_GPUS` of job from +- Reading environment variables `SLURM_STEP_GPUS` and/or `SLURM_JOB_GPUS` of job from `/proc` file system which contains GPU ordinal numbers of job. -- Use prolog and epilog scripts to get the GPU to job ID map. Example prolog script +- Use prolog and epilog scripts to get the GPU to job ID map. Example prolog script is provided in the [repo](https://github.com/mahendrapaipuri/ceems/tree/main/etc/slurm). We recommend to use the first approach as it requires minimum configuration to maintain -for the operators. The downside is that the CEEMS exporter process will need some -privileges to be able to read the environment variables in `/proc` file system. The -privileges can be set in different ways and it is discussed in [Systemd](./systemd.md) +for the operators. The downside is that the CEEMS exporter process will need some +privileges to be able to read the environment variables in `/proc` file system. The +privileges can be set in different ways and it is discussed in [Systemd](./systemd.md) section. -On the other hand, if the operators do not wish to add any privileges to exporter -process, they can use the second approach but this requires some configuration additions -to SLURM controller to execute a prolog and epilog script for each job. Alongside GPU +On the other hand, if the operators do not wish to add any privileges to exporter +process, they can use the second approach but this requires some configuration additions +to SLURM controller to execute a prolog and epilog script for each job. Alongside GPU ordinals to job ID map, the exporter retrieves some other job metadata like job owner, -group, account, _etc_ to facilitate easy querying. These meta data are also gathered +group, account, _etc_ to facilitate easy querying. These meta data are also gathered from prolog scripts. A sample prolog script to get job meta data is as follows: - ```bash #!/bin/bash -# Need to use this path in --collector.slurm.job.props.path flag for ceems_exporter +# Need to use this path in --collector.slurm.job-props-path flag for ceems_exporter DEST=/run/slurmjobprops [ -e $DEST ] || mkdir -m 755 $DEST @@ -55,7 +66,7 @@ Similarly, sample prolog script to get GPU ordinals is as follows: ```bash #!/bin/bash -# Need to use this path in --collector.nvidia.gpu.job.map.path flag for ceems_exporter +# Need to use this path in --collector.nvidia.gpu-job-map-path flag for ceems_exporter DEST=/run/gpujobmap [ -e $DEST ] || mkdir -m 755 $DEST @@ -70,90 +81,91 @@ done exit 0 ``` -At the end of each job, we must remove these files from `/run` file system to avoid -accumulation of these files. This can be configured using epilog scrips and sample +At the end of each job, we must remove these files from `/run` file system to avoid +accumulation of these files. This can be configured using epilog scrips and sample scripts can be found in the [repo](https://github.com/mahendrapaipuri/ceems/tree/main/etc/slurm/epilog.d). These prolog and epilog scripts must be configured to run at the start and end of each -job and operators can consult [SLURM docs](https://slurm.schedmd.com/prolog_epilog.html) +job and operators can consult [SLURM docs](https://slurm.schedmd.com/prolog_epilog.html) on more details configuring epilog and prolog scripts. Assuming the operators are using the above prolog scripts to get job meta data, CEEMS exporter must be configured with the following CLI flags: ```bash -ceems_exporter --collector.slum --collector.slurm.job.props.path=/run/slurmjobprops --collector.slurm.gpu.job.map.path=/run/gpujobmap +ceems_exporter --collector.slum --collector.slurm.job-props-path=/run/slurmjobprops --collector.slurm.gpu-job-map-path=/run/gpujobmap ``` -With above configuration, the exporter should export job meta data and GPU ordinal mapping + +With above configuration, the exporter should export job meta data and GPU ordinal mapping along with other metrics of slurm collector. :::important[IMPORTANT] -The CLI arguments `--collector.slurm.job.props.path` and `--collector.slurm.gpu.job.map.path` -are hidden and cannot be seen in `ceems_exporter --help` output. However, these arguments +The CLI arguments `--collector.slurm.job-props-path` and `--collector.slurm.gpu-job-map-path` +are hidden and cannot be seen in `ceems_exporter --help` output. However, these arguments exists in the exporter and can be used. ::: ## IPMI collector -Currently, collector supports FreeIPMI, OpenIMPI, IPMIUtils and Cray's [`capmc`](https://cray-hpe.github.io/docs-csm/en-10/operations/power_management/cray_advanced_platform_monitoring_and_control_capmc/) -framework. If one of these binaries exist on `PATH`, the exporter will automatically -detect it and parse the implementation's output to get power reading values. +Currently, collector supports FreeIPMI, OpenIMPI, IPMIUtils and Cray's [`capmc`](https://cray-hpe.github.io/docs-csm/en-10/operations/power_management/cray_advanced_platform_monitoring_and_control_capmc/) +framework. If one of these binaries exist on `PATH`, the exporter will automatically +detect it and parse the implementation's output to get power reading values. :::note[NOTE] -Current auto detection mode is only limited to `ipmi-dcmi` (FreeIPMI), `ipmitool` -(OpenIPMI), `ipmiutil` (IPMIUtils) and `capmc` (Cray) implementations. These binaries -must be on `PATH` for the exporter to detect them. If a custom IPMI command is used, -the command must output the power info in -[one of these formats](https://github.com/mahendrapaipuri/ceems/blob/c031e0e5b484c30ad8b6e2b68e35874441e9d167/pkg/collector/ipmi.go#L35-L92). -If that is not the case, operators must write a wrapper around the custom IPMI command +Current auto detection mode is only limited to `ipmi-dcmi` (FreeIPMI), `ipmitool` +(OpenIPMI), `ipmiutil` (IPMIUtils) and `capmc` (Cray) implementations. These binaries +must be on `PATH` for the exporter to detect them. If a custom IPMI command is used, +the command must output the power info in +[one of these formats](https://github.com/mahendrapaipuri/ceems/blob/c031e0e5b484c30ad8b6e2b68e35874441e9d167/pkg/collector/ipmi.go#L35-L92). +If that is not the case, operators must write a wrapper around the custom IPMI command to output the energy info in one of the supported formats. When a custom script is being -used, it is possible to configure it using CLI flag `--collector.ipmi.dcmi.cmd`. +used, it is possible to configure it using CLI flag `--collector.ipmi_dcmi.cmd`. ::: -Generally `ipmi` related commands are available for only `root`. Like in the case of -slurm collector, there are different ways to configure the privileges to execute -IPMI command. +Generally `ipmi` related commands are available for only `root`. Like in the case of +slurm collector, there are different ways to configure the privileges to execute +IPMI command. -- Admins can add a sudoers entry to let the user that runs the `ceems_exporter` to -execute only necessary command that reports the power usage. For instance, in the case of FreeIPMI +- Admins can add a sudoers entry to let the user that runs the `ceems_exporter` to +execute only necessary command that reports the power usage. For instance, in the case of FreeIPMI implementation, that sudoers entry will be -``` +```plain ceems ALL = NOPASSWD: /usr/sbin/ipmi-dcmi ``` -The exporter will automatically attempt to run the discovered IPMI command with `sudo` +The exporter will automatically attempt to run the discovered IPMI command with `sudo` prefix. -- Use linux capabilities to spawn a subprocess as `root` to execute just the `ipmi-dcmi` +- Use linux capabilities to spawn a subprocess as `root` to execute just the `ipmi-dcmi` command. This needs `CAP_SETUID` and `CAP_SETGID` capabilities in order to able use `setuid` and `setgid` syscalls. - Last approach is to run `ceems_exporter` as root. -We recommend to use either `sudo` or capabilities approach. More on the privileges +We recommend to use either `sudo` or capabilities approach. More on the privileges can be consulted from [Systemd](./systemd.md) section. :::important[IMPORTANT] -When the compute nodes have GPUs, it is important to verify what IPMI DCMI -power reading report exactly. Depending on the vendor's implementation, it might or -might not include the power consumption of GPUs. +When the compute nodes have GPUs, it is important to verify what IPMI DCMI +power reading report exactly. Depending on the vendor's implementation, it might or +might not include the power consumption of GPUs. ::: ## RAPL collector -For the kernels that are `<5.3`, there is no special configuration to be done. If the -kernel version is `>=5.3`, RAPL metrics are only available for `root`. Three approaches +For the kernels that are `<5.3`, there is no special configuration to be done. If the +kernel version is `>=5.3`, RAPL metrics are only available for `root`. Three approaches can be envisioned here: -- Adding capability `CAP_DAC_READ_SEARCH` to the exporter process can give enough -privileges to read the energy counters. -- Another approach is to add a ACL rule on the `/sys/fs/class/powercap` +- Adding capability `CAP_DAC_READ_SEARCH` to the exporter process can give enough +privileges to read the energy counters. +- Another approach is to add a ACL rule on the `/sys/fs/class/powercap` directory to give read permissions to the user that is running `ceems_exporter`. - Running `ceems_exporter` as `root` user. @@ -161,16 +173,16 @@ We recommend the capabilities approach as it requires minimum configuration. ## Emissions collector -The only configuration needed for emissions collector is an API token for -[Electricity Maps](https://app.electricitymaps.com/map). For non commercial uses, -a [free tier token](https://www.electricitymaps.com/free-tier-api) can be requested. -This token must be passed using an environment variable `EMAPS_API_TOKEN` in the -systemd service file of the collector. +The only configuration needed for emissions collector is an API token for +[Electricity Maps](https://app.electricitymaps.com/map). For non commercial uses, +a [free tier token](https://www.electricitymaps.com/free-tier-api) can be requested. +This token must be passed using an environment variable `EMAPS_API_TOKEN` in the +systemd service file of the collector. :::tip[TIP] -This collector is not enabled by default as it is not needed to run on every compute node. -This collector can be run separately on a node that has internet access by disabling -rest of the collectors. +This collector is not enabled by default as it is not needed to run on every compute node. +This collector can be run separately on a node that has internet access by disabling +rest of the collectors. ::: diff --git a/website/docs/usage/ceems-exporter.md b/website/docs/usage/ceems-exporter.md index 44614437..ed465bc5 100644 --- a/website/docs/usage/ceems-exporter.md +++ b/website/docs/usage/ceems-exporter.md @@ -9,8 +9,7 @@ sidebar_position: 1 :::important[IMPORTANT] Currently CEEMS exporter supports only exporting SLURM job metrics. Consequently, CEEMS -support only SLURM resource manager. Adding support for -Openstack and libvirt is in next milestone. +support only SLURM resource manager. Adding support for Openstack and libvirt is in next milestone. ::: @@ -27,14 +26,14 @@ List of collectors that are enabled by default are: - `rapl`: RAPL energy counters - `ipmi_dcmi`: Power usage from IPMI DCMI -By default CEEMS exporter exposes metrics on all interfaces, port `9010` and +By default CEEMS exporter exposes metrics on all interfaces, port `9010` and at `/metrics` endpoint. This can be changed by setting `--web.listen-address` CLI flag ```bash ceems_exporter --web.listen-address="localhost:8010" ``` -Above command will run exporter only on `localhost` and on port `8010`. +Above command will run exporter only on `localhost` and on port `8010`. In order to enable SLURM collector, we need to add the following CLI flag @@ -42,56 +41,54 @@ In order to enable SLURM collector, we need to add the following CLI flag ceems_exporter --collector.slurm ``` -If there are GPUs on the compute nodes, it is necessary to tell the exporter the type -of GPU. Currently only NVIDIA and AMD GPUs are supported. +:::important[IMPORTANT] -```bash -ceems_exporter --collector.slurm --collector.slurm.gpu.type=amd -# or -ceems_exporter --collector.slurm --collector.slurm.gpu.type=nvidia -``` +Starting from `v0.3.0`, there is no need to configure the GPU type. The exporter will +automatically detect the supported GPU types: NVIDIA and AMD. + +::: -In order to disable default collectors, we need to add `no` prefix to the collector flag. +In order to disable default collectors, we need to add `no` prefix to the collector flag. The following command will disable IPMI and RAPL collectors: ```bash ceems_exporter --no-collector.ipmi_dcmi --no-collector.rapl ``` -If a custom IPMI DCMI implementation is used that does not give power usage in one +If a custom IPMI DCMI implementation is used that does not give power usage in one of the [one of these formats](https://github.com/mahendrapaipuri/ceems/blob/c031e0e5b484c30ad8b6e2b68e35874441e9d167/pkg/collector/ipmi.go#L35-L92) -supported by the exporter, it is necessary to write a wrapper that outputs the usage -in one of the supported format. This wrapper script can be provided to the exporter +supported by the exporter, it is necessary to write a wrapper that outputs the usage +in one of the supported format. This wrapper script can be provided to the exporter using following command: ```bash -ceems_exporter --collector.ipmi.dcmi.cmd="/path/to/wrapper/script" +ceems_exporter --collector.ipmi_dcmi.cmd="/path/to/wrapper/script" ``` -By default no authentication is imposed on the exporter web server. In production this -is no advisable and it is possible to add basic auth and TLS to the exporter using -a web configuration file. More details on how to setup web configuration is discussed -in [Web configuration](../configuration/basic-auth.md) section. This file can be +By default no authentication is imposed on the exporter web server. In production this +is no advisable and it is possible to add basic auth and TLS to the exporter using +a web configuration file. More details on how to setup web configuration is discussed +in [Web configuration](../configuration/basic-auth.md) section. This file can be passed to exporter as a CLI argument as follows: ```bash ceems_exporter --web.config.file=/path/to/web/config/file ``` -The basic auth password is hashed inside the web configuration file just like in +The basic auth password is hashed inside the web configuration file just like in `/etc/passwd` file and hence, the chances of password leaks are minimal. :::important[IMPORTANT] -In all the cases, it is important that either exporter binary or exporter process must -have enough privileges to be able to export all the metrics. More info on the privileges -necessary for the exporter are discussed in [Configuration](../configuration/ceems-exporter.md) -section where as how to set privileges are briefed in [Systemd](../configuration/systemd.md) +In all the cases, it is important that either exporter binary or exporter process must +have enough privileges to be able to export all the metrics. More info on the privileges +necessary for the exporter are discussed in [Configuration](../configuration/ceems-exporter.md) +section where as how to set privileges are briefed in [Systemd](../configuration/systemd.md) section. ::: -Once the exporter is running, by making a request to `/metrics` endpoint will give +Once the exporter is running, by making a request to `/metrics` endpoint will give following output: ```bash