From 213d849a4987ba4edb028a3ef4a4acfa469bc55c Mon Sep 17 00:00:00 2001
From: Benson Ma <bensonma415@meta.com>
Date: Wed, 1 Jan 2025 00:12:24 -0800
Subject: [PATCH] CUDA 12.6 support, pt 2 (#3533)

Summary:
X-link: https://github.com/facebookresearch/FBGEMM/pull/615

Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/3533

Reviewed By: spcyppt

Differential Revision: D67725264

Pulled By: q10

fbshipit-source-id: 3f7206f47781f0d4916a808017743639f8c1e5af
---
 .github/scripts/fbgemm_gpu_build.bash         |  5 ++
 .github/scripts/utils_base.bash               | 33 ++++++++
 .github/scripts/utils_build.bash              | 23 +-----
 .github/scripts/utils_cuda.bash               | 76 ++++++++++++++-----
 .../fbgemm_gpu_ci_genai_generic_infra.yml     |  4 +-
 5 files changed, 102 insertions(+), 39 deletions(-)

diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash
index 2ba89eef50..5d81ab999c 100644
--- a/.github/scripts/fbgemm_gpu_build.bash
+++ b/.github/scripts/fbgemm_gpu_build.bash
@@ -106,6 +106,8 @@ __configure_fbgemm_gpu_build_nvcc () {
   echo "[BUILD] Setting NVCC flags ..."
   # shellcheck disable=SC2086
   print_exec conda env config vars set ${env_prefix} NVCC_PREPEND_FLAGS=\"${nvcc_prepend_flags}\"
+  # shellcheck disable=SC2086
+  print_exec conda run ${env_prefix} printenv NVCC_PREPEND_FLAGS
 
   echo "[BUILD] Setting CUDA build args ..."
   # shellcheck disable=SC2206
@@ -302,6 +304,9 @@ __configure_fbgemm_gpu_build () {
     __configure_fbgemm_gpu_build_cuda "${fbgemm_variant_targets}"
   fi
 
+  # shellcheck disable=SC2086
+  print_exec conda run ${env_prefix} c++ --version
+
   # Set other compiler flags as needed
   if print_exec "conda run ${env_prefix} c++ --version | grep -i clang"; then
     __configure_fbgemm_gpu_build_clang
diff --git a/.github/scripts/utils_base.bash b/.github/scripts/utils_base.bash
index 39a88f7a69..976befa4e2 100644
--- a/.github/scripts/utils_base.bash
+++ b/.github/scripts/utils_base.bash
@@ -265,3 +265,36 @@ test_library_symbol () {
     return 1
   fi
 }
+
+set_clang_symlinks () {
+  local env_name="$1"
+  if [ "$env_name" == "" ]; then
+    echo "Usage: ${FUNCNAME[0]} ENV_NAME"
+    echo "Example(s):"
+    echo "    ${FUNCNAME[0]} build_env"
+    return 1
+  fi
+
+  # shellcheck disable=SC2155
+  local env_prefix=$(env_name_or_prefix "${env_name}")
+
+  # shellcheck disable=SC2155,SC2086
+  local cc_path=$(conda run ${env_prefix} which clang)
+  # shellcheck disable=SC2155,SC2086
+  local cxx_path=$(conda run ${env_prefix} which clang++)
+
+  # Set the symlinks, override if needed
+  #
+  # NOTE: Setting the symlink CONDA_PREFIX/bin/c++ to point to clang++ can mess
+  # up the runtime for tests, since torch dynamo makes compilation calls with
+  # gcc-specific compiler flags, effectively making gcc a hard dependency:
+  #
+  #   clang-16: error: unknown argument: '-fno-tree-loop-vectorize'
+  #
+  # As such, clang is installed only during the build step, where we are
+  # exercising building FBGEMM in clang.
+  print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
+  print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
+  print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
+  print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
+}
diff --git a/.github/scripts/utils_build.bash b/.github/scripts/utils_build.bash
index 7fded1eb2e..9fc88e449e 100644
--- a/.github/scripts/utils_build.bash
+++ b/.github/scripts/utils_build.bash
@@ -182,6 +182,7 @@ __remove_gcc_activation_scripts () {
   if [[ "$BUILD_CUDA_VERSION" =~ ^12.6.*$ ]]; then
       echo "[INSTALL] Removing GCC package activation scripts ..."
       local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
+      print_exec ls -la ${conda_prefix}/etc/conda/activate.d
       print_exec rm -rf ${conda_prefix}/etc/conda/activate.d/activate-gcc_linux-${COMPILER_ARCHNAME}.sh
       print_exec rm -rf ${conda_prefix}/etc/conda/activate.d/activate-gxx_linux-${COMPILER_ARCHNAME}.sh
   fi
@@ -208,25 +209,7 @@ __conda_install_clang () {
   # The compilers are visible in the PATH as `clang` and `clang++`, so symlinks
   # will need to be created
   echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
-  # shellcheck disable=SC2155,SC2086
-  local cc_path=$(conda run ${env_prefix} which clang)
-  # shellcheck disable=SC2155,SC2086
-  local cxx_path=$(conda run ${env_prefix} which clang++)
-
-  # Set the symlinks, override if needed
-  #
-  # NOTE: Setting the symlink CONDA_PREFIX/bin/c++ to point to clang++ can mess
-  # up the runtime for tests, since torch dynamo makes compilation calls with
-  # gcc-specific compiler flags, effectively making gcc a hard dependency:
-  #
-  #   clang-16: error: unknown argument: '-fno-tree-loop-vectorize'
-  #
-  # As such, clang is installed only during the build step, where we are
-  # exercising building FBGEMM in clang.
-  print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
-  print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
-  print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
-  print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
+  set_clang_symlinks "${env_name}"
 
   # Remove the Conda activations scripts for gcc; see comments in the method for details
   __remove_gcc_activation_scripts
@@ -369,7 +352,7 @@ install_build_tools () {
     scikit-build \
     wheel) || return 1
 
-  echo "[INSTALL] Adding symlink librhash.so.0, which is needed by Cmake ..."
+  echo "[INSTALL] Adding symlink librhash.so.0, which is needed by CMake ..."
   # shellcheck disable=SC2155,SC2086
   local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
   (print_exec ln -s "${conda_prefix}/lib/librhash.so" "${conda_prefix}/lib/librhash.so.0") || return 1
diff --git a/.github/scripts/utils_cuda.bash b/.github/scripts/utils_cuda.bash
index 81bd6e3332..40f906aadb 100644
--- a/.github/scripts/utils_cuda.bash
+++ b/.github/scripts/utils_cuda.bash
@@ -49,18 +49,54 @@ __set_cuda_symlinks_envvars () {
 
   echo "[INSTALL] Setting environment variable NVML_LIB_PATH ..."
   # shellcheck disable=SC2155
-  local nvml_lib_path=$(find "${conda_prefix}" -name libnvidia-ml.so | head -n1)
+  local libnvml_path=$(find "${conda_prefix}" -name libnvidia-ml.so | head -n1)
   # shellcheck disable=SC2086
-  print_exec conda env config vars set ${env_prefix} NVML_LIB_PATH="${nvml_lib_path}"
+  print_exec conda env config vars set ${env_prefix} NVML_LIB_PATH="${libnvml_path}"
+
+  if [ "$ADD_LIBCUDA_SYMLINK" == "1" ]; then
+    echo "[INSTALL] Setting up symlink to libnvidia-ml.so.1"
+    print_exec ln "${libnvml_path}" -s "${conda_prefix}/lib/libnvidia-ml.so.1"
+  fi
 
   echo "[INSTALL] Setting environment variable CUDA_INCLUDE_DIRS ..."
   # shellcheck disable=SC2086
   print_exec conda env config vars set ${env_prefix} CUDA_INCLUDE_DIRS=\""${conda_prefix}/include/:${new_cuda_home}/include/"\"
+
+  # Ensure that the CUDA headers are properly installed
+  (test_filepath "${env_name}" cuda_runtime.h) || return 1
+  # Ensure that the libraries are properly installed
+  (test_filepath "${env_name}" libcuda.so) || return 1
+  (test_filepath "${env_name}" libnvToolsExt.so) || return 1
+  (test_filepath "${env_name}" libnvidia-ml.so) || return 1
+
+  # Ensure that nvcc is properly installed
+  (test_binpath "${env_name}" nvcc) || return 1
 }
 
 __set_nvcc_prepend_flags () {
+  # shellcheck disable=SC2155,SC2086
+  local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
+
+  # If clang is available, but CUDA was installed through conda-forge, the
+  # cc/c++ symlinks will be reset to gcc/g++, so fix this first
+  # shellcheck disable=SC2155,SC2086
+  if conda run ${env_prefix} clang --version; then
+    echo "[INSTALL] Resetting compiler symlinks to clang ..."
+    set_clang_symlinks "${env_name}"
+  fi
+
+  # The NVCC activation scripts append `-ccbin=${CXX}`` to NVCC_PREPEND_FLAGS,
+  # which overrides whatever `-ccbin` flag we set manually, so remove this
+  # unwanted hook
+  print_exec ls -la "${conda_prefix}/etc/conda/activate.d"
+  if [[ "$BUILD_CUDA_VERSION" =~ ^12.6.*$ ]]; then
+    echo "[INSTALL] Removing the -ccbin=CXX hook from NVCC activation scripts ..."
+    print_exec sed -i '/-ccbin=/d' "${conda_prefix}/etc/conda/activate.d/*cuda-nvcc_activate.sh"
+  fi
+
   local nvcc_prepend_flags=(
-    # Allow for the use of newer compilers than what the current CUDA SDK supports
+    # Allow for the use of newer compilers than what the current CUDA SDK
+    # supports
     -allow-unsupported-compiler
   )
 
@@ -144,25 +180,31 @@ install_cuda () {
 
   # shellcheck disable=SC2155
   local env_prefix=$(env_name_or_prefix "${env_name}")
-
-  # Install CUDA packages
   echo "[INSTALL] Installing CUDA ${cuda_version} ..."
-  # shellcheck disable=SC2086
-  (exec_with_retries 3 conda install --force-reinstall ${env_prefix} -c "nvidia/label/cuda-${cuda_version}" -y \
-    cuda) || return 1
+
+  # NOTE: Currently, CUDA 12.6 cannot be installed using the nvidia/label/cuda-*
+  # conda channels, because we run into the following error:
+  #
+  #   LibMambaUnsatisfiableError: Encountered problems while solving:
+  #     - nothing provides __win needed by cuda-12.6.3-0
+  #
+  # For now, we only use conda-forge for installing 12.6, but it is likely that
+  # in the future, we will be using conda-forge for installing all CUDA versions
+  # (except for versions 11.8 and below, which are only available through
+  # nvidia/label/cuda-*)
+  if [[ "$BUILD_CUDA_VERSION" =~ ^12.6.*$ ]]; then
+    # shellcheck disable=SC2086
+    (exec_with_retries 3 conda install --force-reinstall ${env_prefix} -c conda-forge --override-channels -y \
+      cuda=${cuda_version}) || return 1
+  else
+    # shellcheck disable=SC2086
+    (exec_with_retries 3 conda install --force-reinstall ${env_prefix} -c "nvidia/label/cuda-${cuda_version}" -y \
+      cuda) || return 1
+  fi
 
   # Set the symlinks and environment variables not covered by conda install
   __set_cuda_symlinks_envvars
 
-  # Ensure that nvcc is properly installed
-  (test_binpath "${env_name}" nvcc) || return 1
-  # Ensure that the CUDA headers are properly installed
-  (test_filepath "${env_name}" cuda_runtime.h) || return 1
-  # Ensure that the libraries are properly installed
-  (test_filepath "${env_name}" libcuda.so) || return 1
-  (test_filepath "${env_name}" libnvToolsExt.so) || return 1
-  (test_filepath "${env_name}" libnvidia-ml.so) || return 1
-
   # Set the NVCC prepend flags depending on gcc or clang
   __set_nvcc_prepend_flags
 
diff --git a/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml b/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml
index d23b53c72d..d028d23474 100644
--- a/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml
+++ b/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml
@@ -61,7 +61,7 @@ jobs:
           { arch: x86, instance: "ubuntu-latest" },
         ]
         python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
-        cuda-version: [ "11.8.0", "12.4.1" ]
+        cuda-version: [ "11.8.0", "12.4.1", "12.6.3" ]
         compiler: [ "gcc", "clang" ]
 
     steps:
@@ -149,7 +149,7 @@ jobs:
           { arch: x86, instance: "ubuntu-latest" },
         ]
         python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
-        cuda-version: [ "11.8.0", "12.4.1" ]
+        cuda-version: [ "11.8.0", "12.4.1", "12.6.3" ]
         compiler: [ "gcc", "clang" ]
     needs: build_artifact