From a82fcd7e32e39d514287a6d1c69b489d1dcc90f8 Mon Sep 17 00:00:00 2001 From: Patrik Lermon Date: Sat, 13 Apr 2024 16:05:00 +0200 Subject: [PATCH] fix: handle nvidia-smi non-zero exit status #55 --- GPUtil/GPUtil.py | 52 +++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/GPUtil/GPUtil.py b/GPUtil/GPUtil.py index 2d5375d..ef44648 100644 --- a/GPUtil/GPUtil.py +++ b/GPUtil/GPUtil.py @@ -30,6 +30,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import encodings +import logging from subprocess import Popen, PIPE from distutils import spawn import os @@ -67,19 +69,29 @@ def safeFloatCast(strNumber): def getGPUs(): if platform.system() == "Windows": - # If the platform is Windows and nvidia-smi - # could not be found from the environment path, + # If the platform is Windows and nvidia-smi + # could not be found from the environment path, # try to find it from system drive with default installation path nvidia_smi = spawn.find_executable('nvidia-smi') if nvidia_smi is None: nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive'] else: nvidia_smi = "nvidia-smi" - + # Get ID, processing and memory utilization for all GPUs try: + output = "" + output_stderr = "" p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE) - stdout, stderror = p.communicate() + stdout, stderr = p.communicate() + if stdout: + output = stdout.decode(encodings.utf_8.getregentry().name) + if stderr: + output_stderr = stderr.decode(encodings.utf_8.getregentry().name) + if p.returncode != 0: + nvidia_smi_error_message = f"{output} {output_stderr}".strip() + logging.error("%s error: %s", nvidia_smi, nvidia_smi_error_message) + return [] except: return [] output = stdout.decode('UTF-8') @@ -227,7 +239,7 @@ def showUtilization(all=False, attrList=None, useOldCode=False): {'attr':'memoryFree','name':'Memory free','suffix':'MB','precision':0}], [{'attr':'display_mode','name':'Display mode'}, {'attr':'display_active','name':'Display active'}]] - + else: if (useOldCode): print(' ID GPU MEM') @@ -240,7 +252,7 @@ def showUtilization(all=False, attrList=None, useOldCode=False): {'attr':'load','name':'GPU','suffix':'%','transform': lambda x: x*100,'precision':0}, {'attr':'memoryUtil','name':'MEM','suffix':'%','transform': lambda x: x*100,'precision':0}], ] - + if (not useOldCode): if (attrList is not None): headerString = '' @@ -251,15 +263,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False): headerString = headerString + '| ' + attrDict['name'] + ' ' headerWidth = len(attrDict['name']) minWidth = len(attrDict['name']) - + attrPrecision = '.' + str(attrDict['precision']) if ('precision' in attrDict.keys()) else '' attrSuffix = str(attrDict['suffix']) if ('suffix' in attrDict.keys()) else '' attrTransform = attrDict['transform'] if ('transform' in attrDict.keys()) else lambda x : x for gpu in GPUs: attr = getattr(gpu,attrDict['attr']) - + attr = attrTransform(attr) - + if (isinstance(attr,float)): attrStr = ('{0:' + attrPrecision + 'f}').format(attr) elif (isinstance(attr,int)): @@ -271,20 +283,20 @@ def showUtilization(all=False, attrList=None, useOldCode=False): attrStr = attr.encode('ascii','ignore') else: raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'') - + attrStr += attrSuffix - + minWidth = max(minWidth,len(attrStr)) - + headerString += ' '*max(0,minWidth-headerWidth) - + minWidthStr = str(minWidth - len(attrSuffix)) - + for gpuIdx,gpu in enumerate(GPUs): attr = getattr(gpu,attrDict['attr']) - + attr = attrTransform(attr) - + if (isinstance(attr,float)): attrStr = ('{0:'+ minWidthStr + attrPrecision + 'f}').format(attr) elif (isinstance(attr,int)): @@ -296,15 +308,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False): attrStr = ('{0:' + minWidthStr + 's}').format(attr.encode('ascii','ignore')) else: raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'') - + attrStr += attrSuffix - + GPUstrings[gpuIdx] += '| ' + attrStr + ' ' - + headerString = headerString + '|' for gpuIdx,gpu in enumerate(GPUs): GPUstrings[gpuIdx] += '|' - + headerSpacingString = '-' * len(headerString) print(headerString) print(headerSpacingString)