diff --git a/pkg/collector/fixtures/nvidia-smi b/pkg/collector/fixtures/nvidia-smi index 2d8a77a8..bb25aa95 100755 --- a/pkg/collector/fixtures/nvidia-smi +++ b/pkg/collector/fixtures/nvidia-smi @@ -1,6 +1,6 @@ #!/bin/bash -echo """index, name, uuid +printf """index, name, uuid 0, Tesla V100-SXM2-32GB, GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e 1, Tesla V100-SXM2-32GB, GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3 2, Tesla V100-SXM2-32GB, GPU-61a65011-6571-a6d2-5th8-66cbb6f7f9c3 diff --git a/pkg/collector/helper.go b/pkg/collector/helper.go index 768a615c..e4504446 100644 --- a/pkg/collector/helper.go +++ b/pkg/collector/helper.go @@ -118,7 +118,7 @@ func GetNvidiaGPUDevices(nvidiaSmiPath string, logger log.Logger) (map[int]Devic // Get all devices gpuDevices := map[int]Device{} devIndxInt := 0 - for _, line := range strings.Split(string(nvidiaSmiOutput), "\n") { + for _, line := range strings.Split(strings.TrimSpace(string(nvidiaSmiOutput)), "\n") { // Header line, empty line and newlines are ignored if line == "" || line == "\n" || strings.HasPrefix(line, "index") { continue diff --git a/pkg/collector/slurm.go b/pkg/collector/slurm.go index 46a579b1..c03dcbb6 100644 --- a/pkg/collector/slurm.go +++ b/pkg/collector/slurm.go @@ -486,7 +486,7 @@ func (c *slurmCollector) getJobProperties(metric *CgroupMetric, pids []uint64) { // it but just to be safe. This will have a small overhead as we need to check the // correct integer index for each device index. We can live with it as there are // typically 2/4/8 GPUs per node. - for i := 0; i <= len(c.nvidiaGPUDevs); i++ { + for i := 0; i < len(c.nvidiaGPUDevs); i++ { dev := c.nvidiaGPUDevs[i] gpuJobMapInfo := fmt.Sprintf("%s/%s", *gpuStatPath, dev.index)