Skip to content

Commit a82fcd7

Browse files
committed
fix: handle nvidia-smi non-zero exit status anderskm#55
1 parent 42ef071 commit a82fcd7

File tree

1 file changed

+32
-20
lines changed

1 file changed

+32
-20
lines changed

GPUtil/GPUtil.py

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3131
# SOFTWARE.
3232

33+
import encodings
34+
import logging
3335
from subprocess import Popen, PIPE
3436
from distutils import spawn
3537
import os
@@ -67,19 +69,29 @@ def safeFloatCast(strNumber):
6769

6870
def getGPUs():
6971
if platform.system() == "Windows":
70-
# If the platform is Windows and nvidia-smi
71-
# could not be found from the environment path,
72+
# If the platform is Windows and nvidia-smi
73+
# could not be found from the environment path,
7274
# try to find it from system drive with default installation path
7375
nvidia_smi = spawn.find_executable('nvidia-smi')
7476
if nvidia_smi is None:
7577
nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
7678
else:
7779
nvidia_smi = "nvidia-smi"
78-
80+
7981
# Get ID, processing and memory utilization for all GPUs
8082
try:
83+
output = ""
84+
output_stderr = ""
8185
p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE)
82-
stdout, stderror = p.communicate()
86+
stdout, stderr = p.communicate()
87+
if stdout:
88+
output = stdout.decode(encodings.utf_8.getregentry().name)
89+
if stderr:
90+
output_stderr = stderr.decode(encodings.utf_8.getregentry().name)
91+
if p.returncode != 0:
92+
nvidia_smi_error_message = f"{output} {output_stderr}".strip()
93+
logging.error("%s error: %s", nvidia_smi, nvidia_smi_error_message)
94+
return []
8395
except:
8496
return []
8597
output = stdout.decode('UTF-8')
@@ -227,7 +239,7 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
227239
{'attr':'memoryFree','name':'Memory free','suffix':'MB','precision':0}],
228240
[{'attr':'display_mode','name':'Display mode'},
229241
{'attr':'display_active','name':'Display active'}]]
230-
242+
231243
else:
232244
if (useOldCode):
233245
print(' ID GPU MEM')
@@ -240,7 +252,7 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
240252
{'attr':'load','name':'GPU','suffix':'%','transform': lambda x: x*100,'precision':0},
241253
{'attr':'memoryUtil','name':'MEM','suffix':'%','transform': lambda x: x*100,'precision':0}],
242254
]
243-
255+
244256
if (not useOldCode):
245257
if (attrList is not None):
246258
headerString = ''
@@ -251,15 +263,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
251263
headerString = headerString + '| ' + attrDict['name'] + ' '
252264
headerWidth = len(attrDict['name'])
253265
minWidth = len(attrDict['name'])
254-
266+
255267
attrPrecision = '.' + str(attrDict['precision']) if ('precision' in attrDict.keys()) else ''
256268
attrSuffix = str(attrDict['suffix']) if ('suffix' in attrDict.keys()) else ''
257269
attrTransform = attrDict['transform'] if ('transform' in attrDict.keys()) else lambda x : x
258270
for gpu in GPUs:
259271
attr = getattr(gpu,attrDict['attr'])
260-
272+
261273
attr = attrTransform(attr)
262-
274+
263275
if (isinstance(attr,float)):
264276
attrStr = ('{0:' + attrPrecision + 'f}').format(attr)
265277
elif (isinstance(attr,int)):
@@ -271,20 +283,20 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
271283
attrStr = attr.encode('ascii','ignore')
272284
else:
273285
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
274-
286+
275287
attrStr += attrSuffix
276-
288+
277289
minWidth = max(minWidth,len(attrStr))
278-
290+
279291
headerString += ' '*max(0,minWidth-headerWidth)
280-
292+
281293
minWidthStr = str(minWidth - len(attrSuffix))
282-
294+
283295
for gpuIdx,gpu in enumerate(GPUs):
284296
attr = getattr(gpu,attrDict['attr'])
285-
297+
286298
attr = attrTransform(attr)
287-
299+
288300
if (isinstance(attr,float)):
289301
attrStr = ('{0:'+ minWidthStr + attrPrecision + 'f}').format(attr)
290302
elif (isinstance(attr,int)):
@@ -296,15 +308,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
296308
attrStr = ('{0:' + minWidthStr + 's}').format(attr.encode('ascii','ignore'))
297309
else:
298310
raise TypeError('Unhandled object type (' + str(type(attr)) + ') for attribute \'' + attrDict['name'] + '\'')
299-
311+
300312
attrStr += attrSuffix
301-
313+
302314
GPUstrings[gpuIdx] += '| ' + attrStr + ' '
303-
315+
304316
headerString = headerString + '|'
305317
for gpuIdx,gpu in enumerate(GPUs):
306318
GPUstrings[gpuIdx] += '|'
307-
319+
308320
headerSpacingString = '-' * len(headerString)
309321
print(headerString)
310322
print(headerSpacingString)

0 commit comments

Comments
 (0)