Skip to content

Commit

Permalink
[RUNTIME] dlopen loads libcuda.so.1 instead of libcuda.so (trit…
Browse files Browse the repository at this point in the history
…on-lang#3872)

This is consistent with pytorch

> As `libcuda.so` is only installed on dev environment (i.e. when
CUDAToolkit is installed), while `libcuda.so.1` is part of NVIDIA
driver.


pytorch/pytorch@3be0e1c
  • Loading branch information
Jokeren authored May 10, 2024
1 parent 987dd04 commit 84b8e5f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
6 changes: 3 additions & 3 deletions third_party/nvidia/backend/driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,9 @@ typedef CUresult (*cuOccupancyMaxActiveClusters_t)(
#define defineGetFunctionHandle(name, symbolName) \
static symbolName##_t name() { \
/* Open the shared library */ \
void *libHandle = dlopen("libcuda.so", RTLD_LAZY); \
void *libHandle = dlopen("libcuda.so.1", RTLD_LAZY); \
if (!libHandle) { \
PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so"); \
PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1"); \
return NULL; \
} \
/* Clear any existing error */ \
Expand All @@ -158,7 +158,7 @@ typedef CUresult (*cuOccupancyMaxActiveClusters_t)(
const char *err = dlerror(); \
if (err) { \
PyErr_SetString(PyExc_RuntimeError, \
"Failed to retrieve " #symbolName " from libcuda.so"); \
"Failed to retrieve " #symbolName " from libcuda.so.1"); \
dlclose(libHandle); \
return NULL; \
} \
Expand Down
12 changes: 6 additions & 6 deletions third_party/nvidia/backend/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ def libcuda_dirs():
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode()
# each line looks like the following:
# libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
locs = [line.split()[-1] for line in libs.splitlines() if "libcuda.so" in line]
locs = [line.split()[-1] for line in libs.splitlines() if "libcuda.so.1" in line]
dirs = [os.path.dirname(loc) for loc in locs]
env_ld_library_path = os.getenv("LD_LIBRARY_PATH")
if env_ld_library_path and not dirs:
dirs = [dir for dir in env_ld_library_path.split(":") if os.path.exists(os.path.join(dir, "libcuda.so"))]
dirs = [dir for dir in env_ld_library_path.split(":") if os.path.exists(os.path.join(dir, "libcuda.so.1"))]
msg = 'libcuda.so cannot found!\n'
if locs:
msg += 'Possible files are located at %s.' % str(locs)
msg += 'Please create a symlink of libcuda.so to any of the files.'
else:
msg += 'Please make sure GPU is set up and then run "/sbin/ldconfig"'
msg += ' (requires sudo) to refresh the linker cache.'
assert any(os.path.exists(os.path.join(path, 'libcuda.so')) for path in dirs), msg
assert any(os.path.exists(os.path.join(path, 'libcuda.so.1')) for path in dirs), msg
return dirs


Expand Down Expand Up @@ -174,9 +174,9 @@ def format_of(ty):
static cuLaunchKernelEx_t getLaunchKernelExHandle() {{
// Open the shared library
void* handle = dlopen("libcuda.so", RTLD_LAZY);
void* handle = dlopen("libcuda.so.1", RTLD_LAZY);
if (!handle) {{
PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so");
PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1");
return NULL;
}}
// Clear any existing error
Expand All @@ -185,7 +185,7 @@ def format_of(ty):
// Check for errors
const char *dlsym_error = dlerror();
if (dlsym_error) {{
PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so");
PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so.1");
return NULL;
}}
return cuLaunchKernelExHandle;
Expand Down

0 comments on commit 84b8e5f

Please sign in to comment.