Skip to content

Commit

Permalink
Make rocm_smi.py handle disappearing PIDs
Browse files Browse the repository at this point in the history
rocm_smi.py had an issue where it gets process information
in 2 different places. If the process disappears in between
those 2 places, a crash would occur.

This fix gracefully returns in this scenario.
Reading the file information from /proc instead of using
the python subProcess() call was considered, but it has the
drawback of not being able to read the process names of
processes not owned by the caller.

Change-Id: If812c4641f00da37e99defb0740f670107c8a797
  • Loading branch information
Chris Freehill authored and Chris Freehill committed Dec 13, 2020
1 parent 0ee670d commit 4b49d2d
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions python_smi_tools/rocm_smi.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,22 +243,29 @@ def getMemInfo(device, memType):
return (memUsed, memTotal)


def getName(pid):
def getProcessName(pid):
""" Get the process name of a specific pid
@param pid: Process ID of a program to be parsed
"""
if int(pid) < 1:
logging.debug('PID must be greater than 0')
return 'UNKNOWN'
pName = str(subprocess.check_output("ps -p %d -o comm=" % (int(pid)), shell=True))
try:
pName = str(subprocess.check_output("ps -p %d -o comm=" % (int(pid)), shell=True))
except subprocess.CalledProcessError as e:
print(e.output)
pName = 'UNKNOWN'

if pName == None:
pName = 'UNKNOWN'

# Remove the substrings surrounding from process name (b' and \n')
if str(pName).startswith('b\''):
pName = pName[2:]
if str(pName).endswith('\\n\''):
pName = pName[:-3]
else:
pName = 'UNKNOWN'

return pName


Expand Down Expand Up @@ -1539,7 +1546,7 @@ def showPids():
cuOccupancy = proc.cu_occupancy
else:
logging.debug('Unable to fetch process info by PID')
dataArray.append([pid, getName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
printLog(None, 'KFD process information:', None)
print2DArray(dataArray)
printLogSpacer()
Expand Down

0 comments on commit 4b49d2d

Please sign in to comment.