forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
72 lines (62 loc) · 2.87 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
import os
import numpy as np
from cuda import cuda, cudart, nvrtc
def _cudaGetErrorEnum(error):
if isinstance(error, cuda.CUresult):
err, name = cuda.cuGetErrorName(error)
return name if err == cuda.CUresult.CUDA_SUCCESS else "<unknown>"
elif isinstance(error, cudart.cudaError_t):
return cudart.cudaGetErrorName(error)[1]
elif isinstance(error, nvrtc.nvrtcResult):
return nvrtc.nvrtcGetErrorString(error)[1]
else:
raise RuntimeError('Unknown error type: {}'.format(error))
def checkCudaErrors(result):
if result[0].value:
raise RuntimeError("CUDA error code={}({})".format(result[0].value, _cudaGetErrorEnum(result[0])))
if len(result) == 1:
return None
elif len(result) == 2:
return result[1]
else:
return result[1:]
class KernelHelper:
def __init__(self, code, devID):
prog = checkCudaErrors(nvrtc.nvrtcCreateProgram(str.encode(code), b'sourceCode.cu', 0, [], []))
CUDA_HOME = os.getenv('CUDA_HOME')
if CUDA_HOME == None:
raise RuntimeError('Environment variable CUDA_HOME is not defined')
include_dirs = os.path.join(CUDA_HOME, 'include')
# Initialize CUDA
checkCudaErrors(cudart.cudaFree(0))
major = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, devID))
minor = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, devID))
_, nvrtc_minor = checkCudaErrors(nvrtc.nvrtcVersion())
use_cubin = (nvrtc_minor >= 1)
prefix = 'sm' if use_cubin else 'compute'
arch_arg = bytes(f'--gpu-architecture={prefix}_{major}{minor}', 'ascii')
try:
opts = [b'--fmad=true', arch_arg, '--include-path={}'.format(include_dirs).encode('UTF-8'),
b'--std=c++11', b'-default-device']
print(code)
print('nvcc flags:', opts)
checkCudaErrors(nvrtc.nvrtcCompileProgram(prog, len(opts), opts))
except RuntimeError as err:
logSize = checkCudaErrors(nvrtc.nvrtcGetProgramLogSize(prog))
log = b' ' * logSize
checkCudaErrors(nvrtc.nvrtcGetProgramLog(prog, log))
print(log.decode())
print(err)
exit(-1)
if use_cubin:
dataSize = checkCudaErrors(nvrtc.nvrtcGetCUBINSize(prog))
data = b' ' * dataSize
checkCudaErrors(nvrtc.nvrtcGetCUBIN(prog, data))
else:
dataSize = checkCudaErrors(nvrtc.nvrtcGetPTXSize(prog))
data = b' ' * dataSize
checkCudaErrors(nvrtc.nvrtcGetPTX(prog, data))
self.module = checkCudaErrors(cuda.cuModuleLoadData(np.char.array(data)))
def getFunction(self, name):
return checkCudaErrors(cuda.cuModuleGetFunction(self.module, name))