From 84b8e5fe1617bd1d39b6449099e682787abaa546 Mon Sep 17 00:00:00 2001
From: Keren Zhou <kerenzhou@openai.com>
Date: Thu, 9 May 2024 22:33:50 -0400
Subject: [PATCH] [RUNTIME] `dlopen` loads `libcuda.so.1` instead of
 `libcuda.so` (#3872)

This is consistent with pytorch

> As `libcuda.so` is only installed on dev environment (i.e. when
CUDAToolkit is installed), while `libcuda.so.1` is part of NVIDIA
driver.


https://github.com/pytorch/pytorch/commit/3be0e1cd587ece8fa54a3a4da8ae68225b9cbb9b
---
 third_party/nvidia/backend/driver.c  |  6 +++---
 third_party/nvidia/backend/driver.py | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c
index abbd1638ed40..1ae6bd52c47a 100644
--- a/third_party/nvidia/backend/driver.c
+++ b/third_party/nvidia/backend/driver.c
@@ -146,9 +146,9 @@ typedef CUresult (*cuOccupancyMaxActiveClusters_t)(
 #define defineGetFunctionHandle(name, symbolName)                              \
   static symbolName##_t name() {                                               \
     /* Open the shared library */                                              \
-    void *libHandle = dlopen("libcuda.so", RTLD_LAZY);                         \
+    void *libHandle = dlopen("libcuda.so.1", RTLD_LAZY);                       \
     if (!libHandle) {                                                          \
-      PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so");        \
+      PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1");      \
       return NULL;                                                             \
     }                                                                          \
     /* Clear any existing error */                                             \
@@ -158,7 +158,7 @@ typedef CUresult (*cuOccupancyMaxActiveClusters_t)(
     const char *err = dlerror();                                               \
     if (err) {                                                                 \
       PyErr_SetString(PyExc_RuntimeError,                                      \
-                      "Failed to retrieve " #symbolName " from libcuda.so");   \
+                      "Failed to retrieve " #symbolName " from libcuda.so.1"); \
       dlclose(libHandle);                                                      \
       return NULL;                                                             \
     }                                                                          \
diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py
index 6f04141b59fb..90f71138bcd9 100644
--- a/third_party/nvidia/backend/driver.py
+++ b/third_party/nvidia/backend/driver.py
@@ -24,11 +24,11 @@ def libcuda_dirs():
     libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode()
     # each line looks like the following:
     # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
-    locs = [line.split()[-1] for line in libs.splitlines() if "libcuda.so" in line]
+    locs = [line.split()[-1] for line in libs.splitlines() if "libcuda.so.1" in line]
     dirs = [os.path.dirname(loc) for loc in locs]
     env_ld_library_path = os.getenv("LD_LIBRARY_PATH")
     if env_ld_library_path and not dirs:
-        dirs = [dir for dir in env_ld_library_path.split(":") if os.path.exists(os.path.join(dir, "libcuda.so"))]
+        dirs = [dir for dir in env_ld_library_path.split(":") if os.path.exists(os.path.join(dir, "libcuda.so.1"))]
     msg = 'libcuda.so cannot found!\n'
     if locs:
         msg += 'Possible files are located at %s.' % str(locs)
@@ -36,7 +36,7 @@ def libcuda_dirs():
     else:
         msg += 'Please make sure GPU is set up and then run "/sbin/ldconfig"'
         msg += ' (requires sudo) to refresh the linker cache.'
-    assert any(os.path.exists(os.path.join(path, 'libcuda.so')) for path in dirs), msg
+    assert any(os.path.exists(os.path.join(path, 'libcuda.so.1')) for path in dirs), msg
     return dirs
 
 
@@ -174,9 +174,9 @@ def format_of(ty):
 
 static cuLaunchKernelEx_t getLaunchKernelExHandle() {{
   // Open the shared library
-  void* handle = dlopen("libcuda.so", RTLD_LAZY);
+  void* handle = dlopen("libcuda.so.1", RTLD_LAZY);
   if (!handle) {{
-    PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so");
+    PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1");
     return NULL;
   }}
   // Clear any existing error
@@ -185,7 +185,7 @@ def format_of(ty):
   // Check for errors
   const char *dlsym_error = dlerror();
   if (dlsym_error) {{
-    PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so");
+    PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so.1");
     return NULL;
   }}
   return cuLaunchKernelExHandle;