From 84b8e5fe1617bd1d39b6449099e682787abaa546 Mon Sep 17 00:00:00 2001 From: Keren Zhou Date: Thu, 9 May 2024 22:33:50 -0400 Subject: [PATCH] [RUNTIME] `dlopen` loads `libcuda.so.1` instead of `libcuda.so` (#3872) This is consistent with pytorch > As `libcuda.so` is only installed on dev environment (i.e. when CUDAToolkit is installed), while `libcuda.so.1` is part of NVIDIA driver. https://github.com/pytorch/pytorch/commit/3be0e1cd587ece8fa54a3a4da8ae68225b9cbb9b --- third_party/nvidia/backend/driver.c | 6 +++--- third_party/nvidia/backend/driver.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c index abbd1638ed40..1ae6bd52c47a 100644 --- a/third_party/nvidia/backend/driver.c +++ b/third_party/nvidia/backend/driver.c @@ -146,9 +146,9 @@ typedef CUresult (*cuOccupancyMaxActiveClusters_t)( #define defineGetFunctionHandle(name, symbolName) \ static symbolName##_t name() { \ /* Open the shared library */ \ - void *libHandle = dlopen("libcuda.so", RTLD_LAZY); \ + void *libHandle = dlopen("libcuda.so.1", RTLD_LAZY); \ if (!libHandle) { \ - PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so"); \ + PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1"); \ return NULL; \ } \ /* Clear any existing error */ \ @@ -158,7 +158,7 @@ typedef CUresult (*cuOccupancyMaxActiveClusters_t)( const char *err = dlerror(); \ if (err) { \ PyErr_SetString(PyExc_RuntimeError, \ - "Failed to retrieve " #symbolName " from libcuda.so"); \ + "Failed to retrieve " #symbolName " from libcuda.so.1"); \ dlclose(libHandle); \ return NULL; \ } \ diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py index 6f04141b59fb..90f71138bcd9 100644 --- a/third_party/nvidia/backend/driver.py +++ b/third_party/nvidia/backend/driver.py @@ -24,11 +24,11 @@ def libcuda_dirs(): libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode() # each line looks like the following: # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1 - locs = [line.split()[-1] for line in libs.splitlines() if "libcuda.so" in line] + locs = [line.split()[-1] for line in libs.splitlines() if "libcuda.so.1" in line] dirs = [os.path.dirname(loc) for loc in locs] env_ld_library_path = os.getenv("LD_LIBRARY_PATH") if env_ld_library_path and not dirs: - dirs = [dir for dir in env_ld_library_path.split(":") if os.path.exists(os.path.join(dir, "libcuda.so"))] + dirs = [dir for dir in env_ld_library_path.split(":") if os.path.exists(os.path.join(dir, "libcuda.so.1"))] msg = 'libcuda.so cannot found!\n' if locs: msg += 'Possible files are located at %s.' % str(locs) @@ -36,7 +36,7 @@ def libcuda_dirs(): else: msg += 'Please make sure GPU is set up and then run "/sbin/ldconfig"' msg += ' (requires sudo) to refresh the linker cache.' - assert any(os.path.exists(os.path.join(path, 'libcuda.so')) for path in dirs), msg + assert any(os.path.exists(os.path.join(path, 'libcuda.so.1')) for path in dirs), msg return dirs @@ -174,9 +174,9 @@ def format_of(ty): static cuLaunchKernelEx_t getLaunchKernelExHandle() {{ // Open the shared library - void* handle = dlopen("libcuda.so", RTLD_LAZY); + void* handle = dlopen("libcuda.so.1", RTLD_LAZY); if (!handle) {{ - PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so"); + PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1"); return NULL; }} // Clear any existing error @@ -185,7 +185,7 @@ def format_of(ty): // Check for errors const char *dlsym_error = dlerror(); if (dlsym_error) {{ - PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so"); + PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so.1"); return NULL; }} return cuLaunchKernelExHandle;