Modularize CMake build [2/N] (#3392)

Summary: X-link: facebookresearch/FBGEMM#496 - Migrate the building of `fbgemm_gpu_py` over to `gpu_cpp_library()` Pull Request resolved: #3392 Reviewed By: leitian Differential Revision: D66382655 Pulled By: q10 fbshipit-source-id: bd820125867734f9521f0ccec2084eb0163159ca
pytorch · Nov 23, 2024 · 9a94515 · 9a94515
1 parent f110630
commit 9a94515
Show file tree

Hide file tree

Showing 13 changed files with 177 additions and 234 deletions.
diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash
@@ -498,7 +498,8 @@ test_fbgemm_gpu_setup_and_pip_install () {
     )
   elif [ "$variant_type" == "rocm" ]; then
     local variant_versions=(
-      6.0.2
+      6.1.2
+      6.2.4
     )
   elif [ "$variant_type" == "cpu" ]; then
     local variant_versions=(

diff --git a/.github/scripts/utils_pip.bash b/.github/scripts/utils_pip.bash
@@ -42,7 +42,7 @@ __export_package_variant_info () {
   local package_variant_type_version="$1"
 
   local FALLBACK_VERSION_CUDA="12.4.1"
-  local FALLBACK_VERSION_ROCM="6.0.2"
+  local FALLBACK_VERSION_ROCM="6.2.4"
 
   if [ "$package_variant_type_version" == "cuda" ]; then
     # If "cuda", default to latest CUDA
@@ -205,7 +205,7 @@ install_from_pytorch_pip () {
     echo "    ${FUNCNAME[0]} build_env torch 1.11.0 cpu                       # Install the CPU variant, specific version from release channel"
     echo "    ${FUNCNAME[0]} build_env torch release cpu                      # Install the CPU variant, latest version from release channel"
     echo "    ${FUNCNAME[0]} build_env fbgemm_gpu test/0.8.0 cuda/12.4.0      # Install the CUDA 12.4 variant, specific version from test channel"
-    echo "    ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.1            # Install the ROCM 6.1 variant, latest version from nightly channel"
+    echo "    ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.2            # Install the ROCM 6.2 variant, latest version from nightly channel"
     echo "    ${FUNCNAME[0]} build_env pytorch_triton 1.11.0                  # Install specific version from release channel"
     echo "    ${FUNCNAME[0]} build_env pytorch_triton release                 # Install latest version from release channel"
     echo "    ${FUNCNAME[0]} build_env pytorch_triton test/0.8.0              # Install specific version from test channel"
@@ -250,7 +250,7 @@ download_from_pytorch_pip () {
     echo "    ${FUNCNAME[0]} build_env torch 1.11.0 cpu                       # Download the CPU variant, specific version from release channel"
     echo "    ${FUNCNAME[0]} build_env torch release cpu                      # Download the CPU variant, latest version from release channel"
     echo "    ${FUNCNAME[0]} build_env fbgemm_gpu test/0.8.0 cuda/12.4.0      # Download the CUDA 12.4 variant, specific version from test channel"
-    echo "    ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.1            # Download the ROCM 6.1 variant, latest version from nightly channel"
+    echo "    ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.2            # Download the ROCM 6.2 variant, latest version from nightly channel"
     return 1
   else
     echo "################################################################################"

diff --git a/.github/scripts/utils_pytorch.bash b/.github/scripts/utils_pytorch.bash
@@ -113,7 +113,7 @@ install_pytorch_pip () {
     echo "    ${FUNCNAME[0]} build_env test/2.1.0 cpu     # Install the CPU variant for a specific version"
     echo "    ${FUNCNAME[0]} build_env release cpu        # Install the CPU variant, latest release version"
     echo "    ${FUNCNAME[0]} build_env test cuda/12.4.0   # Install the CUDA 12.4 variant, latest test version"
-    echo "    ${FUNCNAME[0]} build_env nightly rocm/6.1   # Install the ROCM 6.1 variant, latest nightly version"
+    echo "    ${FUNCNAME[0]} build_env nightly rocm/6.2   # Install the ROCM 6.2 variant, latest nightly version"
     return 1
   else
     echo "################################################################################"

diff --git a/.github/workflows/fbgemm_gpu_ci_rocm.yml b/.github/workflows/fbgemm_gpu_ci_rocm.yml
@@ -66,7 +66,7 @@ jobs:
         ]
         container-image: [ "ubuntu:22.04" ]
         python-version: [ "3.9", "3.10", "3.11", "3.12" ]
-        rocm-version: [ "6.1", "6.2" ]
+        rocm-version: [ "6.1.2", "6.2.4" ]
         compiler: [ "gcc", "clang" ]
 
     steps:
@@ -147,7 +147,7 @@ jobs:
         ]
         # ROCm machines are limited, so we only test a subset of Python versions
         python-version: [ "3.12" ]
-        rocm-version: [ "6.2" ]
+        rocm-version: [ "6.2.4" ]
         compiler: [ "gcc", "clang" ]
     needs: build_artifact
 

diff --git a/.github/workflows/fbgemm_gpu_pip.yml b/.github/workflows/fbgemm_gpu_pip.yml
@@ -186,7 +186,7 @@ jobs:
         ]
         # ROCm machines are limited, so we only test a subset of Python versions
         python-version: [ "3.11", "3.12" ]
-        rocm-version: [ "6.2" ]
+        rocm-version: [ "6.1.2", "6.2.4" ]
 
     steps:
     - name: Setup Build Container

diff --git a/cmake/modules/GpuCppLibrary.cmake b/cmake/modules/GpuCppLibrary.cmake
@@ -9,7 +9,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake)
 function(prepare_target_sources)
     # This function does the following:
     #   1. Take all the specified project sources for a target
-    #   1. Filter the files out based on CPU-only, CUDA, and HIP build modes
+    #   1. Filter files out based on CPU-only, CUDA, and HIP build modes
     #   1. Bucketize them into sets of CXX, CU, and HIP files
     #   1. Apply common source file properties for each bucket
     #   1. Merge the buckets back into a single list of sources
@@ -36,7 +36,12 @@ function(prepare_target_sources)
     ############################################################################
 
     # Add the CPU CXX sources
-    set(${args_PREFIX}_sources_cpp ${args_CPU_SRCS})
+    LIST_FILTER(
+        INPUT ${args_CPU_SRCS}
+        OUTPUT cpu_sources_cpp
+        REGEX "^.+\.cpp$"
+    )
+    set(${args_PREFIX}_sources_cpp ${cpu_sources_cpp})
 
     # For GPU mode, add the CXX sources from GPU_SRCS
     if(NOT FBGEMM_CPU_ONLY)
@@ -127,37 +132,6 @@ function(prepare_target_sources)
     set(${args_PREFIX}_sources ${${args_PREFIX}_sources_combined} PARENT_SCOPE)
 endfunction()
 
-function(prepare_hipified_target_sources)
-    # This function does the following:
-    #   1. Take all the specified target sources
-    #   1. Look up their equivalent HIPified files if applicable (presumes that hipify() already been run)
-    #   1. Apply source file properties
-    #   1. Update the HIP include directories
-
-    set(flags)
-    set(singleValueArgs PREFIX)
-    set(multiValueArgs SRCS INCLUDE_DIRS)
-
-    cmake_parse_arguments(
-        args
-        "${flags}" "${singleValueArgs}" "${multiValueArgs}"
-        ${ARGN})
-
-    get_hipified_list("${args_SRCS}" args_SRCS)
-
-    set_source_files_properties(${args_SRCS}
-                                PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
-
-    # Add include directories
-    hip_include_directories("${args_INCLUDE_DIRS}")
-
-    ############################################################################
-    # Set the Output Variable(s)
-    ############################################################################
-
-    set(${args_PREFIX}_sources_hipified ${args_SRCS} PARENT_SCOPE)
-endfunction()
-
 function(gpu_cpp_library)
     # This function does the following:
     #   1. Take all the target sources and select relevant sources based on build type (CPU-only, CUDA, HIP)
@@ -174,6 +148,7 @@ function(gpu_cpp_library)
         GPU_SRCS            # Sources common to both CUDA and HIP builds.  .CU files specified here will be HIPified when building a HIP target
         CUDA_SPECIFIC_SRCS  # Sources available only for CUDA build
         HIP_SPECIFIC_SRCS   # Sources available only for HIP build
+        OTHER_SRCS          # Sources from third-party libraries
         GPU_FLAGS           # Compile flags for GPU builds
         INCLUDE_DIRS        # Include directories for compilation
     )
@@ -204,12 +179,16 @@ function(gpu_cpp_library)
 
     set(lib_name ${args_PREFIX}_py)
     if(USE_ROCM)
-        # Fetch the HIPified sources
-        prepare_hipified_target_sources(
-            PREFIX ${args_PREFIX}
-            SRCS ${lib_sources}
-            INCLUDE_DIRS ${args_INCLUDE_DIRS})
-        set(lib_sources_hipified ${${args_PREFIX}_sources_hipified})
+        # Fetch the equivalent HIPified sources if available.
+        # This presumes that hipify() has already been run.
+        get_hipified_list("${lib_sources}" lib_sources_hipified)
+
+        # Set properties for the HIPified sources
+        set_source_files_properties(${lib_sources_hipified}
+                                    PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
+
+        # Set the include directories for HIP
+        hip_include_directories("${args_INCLUDE_DIRS}")
 
         # Create the HIP library
         hip_add_library(${lib_name} SHARED
@@ -223,7 +202,8 @@ function(gpu_cpp_library)
         target_include_directories(${lib_name} PUBLIC
             ${FBGEMM_HIP_INCLUDE}
             ${ROCRAND_INCLUDE}
-            ${ROCM_SMI_INCLUDE})
+            ${ROCM_SMI_INCLUDE}
+            ${args_INCLUDE_DIRS})
 
     else()
         # Create the C++/CUDA library
@@ -296,6 +276,9 @@ function(gpu_cpp_library)
         "HIP_SPECIFIC_SRCS"
         "${args_HIP_SPECIFIC_SRCS}"
         " "
+        "OTHER_SRCS:"
+        "${args_OTHER_SRCS}"
+        " "
         "GPU_FLAGS:"
         "${args_GPU_FLAGS}"
         " "

diff --git a/cmake/modules/Utilities.cmake b/cmake/modules/Utilities.cmake
@@ -40,6 +40,26 @@ function(LIST_FILTER)
   set(${args_OUTPUT} ${${args_OUTPUT}} PARENT_SCOPE)
 endfunction()
 
+
+function(prepend_filepaths)
+  set(flags)
+  set(singleValueArgs PREFIX OUTPUT)
+  set(multiValueArgs INPUT)
+
+  cmake_parse_arguments(
+    args
+    "${flags}" "${singleValueArgs}" "${multiValueArgs}"
+    ${ARGN})
+
+  set(${args_OUTPUT})
+
+  foreach(filepath ${args_INPUT})
+    list(APPEND ${args_OUTPUT} "${args_PREFIX}/${filepath}")
+  endforeach()
+
+  set(${args_OUTPUT} ${${args_OUTPUT}} PARENT_SCOPE)
+endfunction()
+
 function(add_to_package)
   set(flags)
   set(singleValueArgs DESTINATION)

diff --git a/fbgemm_gpu/CMakeLists.txt b/fbgemm_gpu/CMakeLists.txt
@@ -108,6 +108,68 @@ set(fbgemm_sources_include_directories
   ${NCCL_INCLUDE_DIRS})
 
 
+################################################################################
+# TBE Code Generation
+################################################################################
+
+set(CMAKE_CODEGEN_DIR ${CMAKE_CURRENT_SOURCE_DIR}/codegen)
+
+macro(RUN_GEN_SCRIPT SCRIPT)
+  if(USE_ROCM)
+    set(rocm_flag --is_rocm)
+  endif()
+
+  BLOCK_PRINT(
+    "Running code generation script ..."
+    "${PYTHON_EXECUTABLE} ${SCRIPT} --opensource ${rocm_flag}"
+  )
+
+  execute_process(
+    COMMAND "${PYTHON_EXECUTABLE}" ${SCRIPT} "--opensource" ${rocm_flag})
+endmacro()
+
+foreach(script
+    "${CMAKE_CODEGEN_DIR}/genscript/generate_backward_split.py"
+    "${CMAKE_CODEGEN_DIR}/genscript/generate_embedding_optimizer.py"
+    "${CMAKE_CODEGEN_DIR}/genscript/generate_forward_quantized.py"
+    "${CMAKE_CODEGEN_DIR}/genscript/generate_forward_split.py"
+    "${CMAKE_CODEGEN_DIR}/genscript/generate_index_select.py")
+    RUN_GEN_SCRIPT(${script})
+endforeach()
+
+
+# ################################################################################
+# HIP Code Generation
+# ################################################################################
+
+if(USE_ROCM)
+  set(include_dirs_for_hipification
+    # All directories need to be included for headers to be properly HIPified
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR}/experimental/gen_ai)
+
+  # HIPify all .CU and .CUH sources under the current directory (`/fbgemm_gpu`)
+  # .H sources are not automatically HIPified, so they need #ifdef USE_ROCM guards
+  hipify(
+    CUDA_SOURCE_DIR
+      ${PROJECT_SOURCE_DIR}
+    HEADER_INCLUDE_DIR
+      ${include_dirs_for_hipification})
+
+  BLOCK_PRINT(
+    "HIPify Sources"
+    " "
+    "CUDA_SOURCE_DIR:"
+    "${PROJECT_SOURCE_DIR}"
+    " "
+    "HEADER_INCLUDE_DIR:"
+    "${include_dirs_for_hipification}"
+  )
+endif()
+
+
 ################################################################################
 # Build FBGEMM_GPU (Main) Module
 ################################################################################
@@ -131,6 +193,7 @@ if(NOT FBGEMM_CPU_ONLY)
 endif()
 
 if(NOT FBGEMM_CPU_ONLY AND NOT USE_ROCM)
-  # TODO: Re-enable gen_ai for ROCm after enabling build support for ROCm 6.2
+  # TODO: Re-enable gen_ai for ROCm once ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp
+  # lands into latest ROCm
   add_subdirectory(experimental/gen_ai)
 endif()