Skip to content

Commit

Permalink
fix: handle nvidia-smi non-zero exit status anderskm#55
Browse files Browse the repository at this point in the history
  • Loading branch information
plerecit committed Apr 13, 2024
1 parent 42ef071 commit a82fcd7
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions GPUtil/GPUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import encodings
import logging
from subprocess import Popen, PIPE
from distutils import spawn
import os
Expand Down Expand Up @@ -67,19 +69,29 @@ def safeFloatCast(strNumber):

def getGPUs():
if platform.system() == "Windows":
# If the platform is Windows and nvidia-smi
# could not be found from the environment path,
# If the platform is Windows and nvidia-smi
# could not be found from the environment path,
# try to find it from system drive with default installation path
nvidia_smi = spawn.find_executable('nvidia-smi')
if nvidia_smi is None:
nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
else:
nvidia_smi = "nvidia-smi"

# Get ID, processing and memory utilization for all GPUs
try:
output = ""
output_stderr = ""
p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE)
stdout, stderror = p.communicate()
stdout, stderr = p.communicate()
if stdout:
output = stdout.decode(encodings.utf_8.getregentry().name)
if stderr:
output_stderr = stderr.decode(encodings.utf_8.getregentry().name)
if p.returncode != 0:
nvidia_smi_error_message = f"{output} {output_stderr}".strip()
logging.error("%s error: %s", nvidia_smi, nvidia_smi_error_message)
return []
except:
return []
output = stdout.decode('UTF-8')
Expand Down Expand Up @@ -227,7 +239,7 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
{'attr':'memoryFree','name':'Memory free','suffix':'MB','precision':0}],
[{'attr':'display_mode','name':'Display mode'},
{'attr':'display_active','name':'Display active'}]]

else:
if (useOldCode):
print(' ID GPU MEM')
Expand All @@ -240,7 +252,7 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
{'attr':'load','name':'GPU','suffix':'%','transform': lambda x: x*100,'precision':0},
{'attr':'memoryUtil','name':'MEM','suffix':'%','transform': lambda x: x*100,'precision':0}],
]

if (not useOldCode):
if (attrList is not None):
headerString = ''
Expand All @@ -251,15 +263,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
headerString = headerString + '| ' + attrDict['name'] + ' '
headerWidth = len(attrDict['name'])
minWidth = len(attrDict['name'])

attrPrecision = '.' + str(attrDict['precision']) if ('precision' in attrDict.keys()) else ''
attrSuffix = str(attrDict['suffix']) if ('suffix' in attrDict.keys()) else ''
attrTransform = attrDict['transform'] if ('transform' in attrDict.keys()) else lambda x : x
for gpu in GPUs:
attr = getattr(gpu,attrDict['attr'])

attr = attrTransform(attr)

if (isinstance(attr,float)):
attrStr = ('{0:' + attrPrecision + 'f}').format(attr)
elif (isinstance(attr,int)):
Expand All @@ -271,20 +283,20 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
attrStr = attr.encode('ascii','ignore')
else:
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')

attrStr += attrSuffix

minWidth = max(minWidth,len(attrStr))

headerString += ' '*max(0,minWidth-headerWidth)

minWidthStr = str(minWidth - len(attrSuffix))

for gpuIdx,gpu in enumerate(GPUs):
attr = getattr(gpu,attrDict['attr'])

attr = attrTransform(attr)

if (isinstance(attr,float)):
attrStr = ('{0:'+ minWidthStr + attrPrecision + 'f}').format(attr)
elif (isinstance(attr,int)):
Expand All @@ -296,15 +308,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
attrStr = ('{0:' + minWidthStr + 's}').format(attr.encode('ascii','ignore'))
else:
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')

attrStr += attrSuffix

GPUstrings[gpuIdx] += '| ' + attrStr + ' '

headerString = headerString + '|'
for gpuIdx,gpu in enumerate(GPUs):
GPUstrings[gpuIdx] += '|'

headerSpacingString = '-' * len(headerString)
print(headerString)
print(headerSpacingString)
Expand Down

0 comments on commit a82fcd7

Please sign in to comment.