Skip to content

Commit

Permalink
Modularize CMake build [2/N] (#3392)
Browse files Browse the repository at this point in the history
Summary:
X-link: facebookresearch/FBGEMM#496

- Migrate the building of `fbgemm_gpu_py` over to `gpu_cpp_library()`

Pull Request resolved: #3392

Reviewed By: leitian

Differential Revision: D66382655

Pulled By: q10

fbshipit-source-id: bd820125867734f9521f0ccec2084eb0163159ca
  • Loading branch information
q10 authored and facebook-github-bot committed Nov 23, 2024
1 parent f110630 commit 9a94515
Show file tree
Hide file tree
Showing 13 changed files with 177 additions and 234 deletions.
3 changes: 2 additions & 1 deletion .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,8 @@ test_fbgemm_gpu_setup_and_pip_install () {
)
elif [ "$variant_type" == "rocm" ]; then
local variant_versions=(
6.0.2
6.1.2
6.2.4
)
elif [ "$variant_type" == "cpu" ]; then
local variant_versions=(
Expand Down
6 changes: 3 additions & 3 deletions .github/scripts/utils_pip.bash
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ __export_package_variant_info () {
local package_variant_type_version="$1"

local FALLBACK_VERSION_CUDA="12.4.1"
local FALLBACK_VERSION_ROCM="6.0.2"
local FALLBACK_VERSION_ROCM="6.2.4"

if [ "$package_variant_type_version" == "cuda" ]; then
# If "cuda", default to latest CUDA
Expand Down Expand Up @@ -205,7 +205,7 @@ install_from_pytorch_pip () {
echo " ${FUNCNAME[0]} build_env torch 1.11.0 cpu # Install the CPU variant, specific version from release channel"
echo " ${FUNCNAME[0]} build_env torch release cpu # Install the CPU variant, latest version from release channel"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu test/0.8.0 cuda/12.4.0 # Install the CUDA 12.4 variant, specific version from test channel"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.1 # Install the ROCM 6.1 variant, latest version from nightly channel"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.2 # Install the ROCM 6.2 variant, latest version from nightly channel"
echo " ${FUNCNAME[0]} build_env pytorch_triton 1.11.0 # Install specific version from release channel"
echo " ${FUNCNAME[0]} build_env pytorch_triton release # Install latest version from release channel"
echo " ${FUNCNAME[0]} build_env pytorch_triton test/0.8.0 # Install specific version from test channel"
Expand Down Expand Up @@ -250,7 +250,7 @@ download_from_pytorch_pip () {
echo " ${FUNCNAME[0]} build_env torch 1.11.0 cpu # Download the CPU variant, specific version from release channel"
echo " ${FUNCNAME[0]} build_env torch release cpu # Download the CPU variant, latest version from release channel"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu test/0.8.0 cuda/12.4.0 # Download the CUDA 12.4 variant, specific version from test channel"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.1 # Download the ROCM 6.1 variant, latest version from nightly channel"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu nightly rocm/6.2 # Download the ROCM 6.2 variant, latest version from nightly channel"
return 1
else
echo "################################################################################"
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/utils_pytorch.bash
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ install_pytorch_pip () {
echo " ${FUNCNAME[0]} build_env test/2.1.0 cpu # Install the CPU variant for a specific version"
echo " ${FUNCNAME[0]} build_env release cpu # Install the CPU variant, latest release version"
echo " ${FUNCNAME[0]} build_env test cuda/12.4.0 # Install the CUDA 12.4 variant, latest test version"
echo " ${FUNCNAME[0]} build_env nightly rocm/6.1 # Install the ROCM 6.1 variant, latest nightly version"
echo " ${FUNCNAME[0]} build_env nightly rocm/6.2 # Install the ROCM 6.2 variant, latest nightly version"
return 1
else
echo "################################################################################"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_ci_rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
]
container-image: [ "ubuntu:22.04" ]
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
rocm-version: [ "6.1", "6.2" ]
rocm-version: [ "6.1.2", "6.2.4" ]
compiler: [ "gcc", "clang" ]

steps:
Expand Down Expand Up @@ -147,7 +147,7 @@ jobs:
]
# ROCm machines are limited, so we only test a subset of Python versions
python-version: [ "3.12" ]
rocm-version: [ "6.2" ]
rocm-version: [ "6.2.4" ]
compiler: [ "gcc", "clang" ]
needs: build_artifact

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fbgemm_gpu_pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ jobs:
]
# ROCm machines are limited, so we only test a subset of Python versions
python-version: [ "3.11", "3.12" ]
rocm-version: [ "6.2" ]
rocm-version: [ "6.1.2", "6.2.4" ]

steps:
- name: Setup Build Container
Expand Down
63 changes: 23 additions & 40 deletions cmake/modules/GpuCppLibrary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake)
function(prepare_target_sources)
# This function does the following:
# 1. Take all the specified project sources for a target
# 1. Filter the files out based on CPU-only, CUDA, and HIP build modes
# 1. Filter files out based on CPU-only, CUDA, and HIP build modes
# 1. Bucketize them into sets of CXX, CU, and HIP files
# 1. Apply common source file properties for each bucket
# 1. Merge the buckets back into a single list of sources
Expand All @@ -36,7 +36,12 @@ function(prepare_target_sources)
############################################################################

# Add the CPU CXX sources
set(${args_PREFIX}_sources_cpp ${args_CPU_SRCS})
LIST_FILTER(
INPUT ${args_CPU_SRCS}
OUTPUT cpu_sources_cpp
REGEX "^.+\.cpp$"
)
set(${args_PREFIX}_sources_cpp ${cpu_sources_cpp})

# For GPU mode, add the CXX sources from GPU_SRCS
if(NOT FBGEMM_CPU_ONLY)
Expand Down Expand Up @@ -127,37 +132,6 @@ function(prepare_target_sources)
set(${args_PREFIX}_sources ${${args_PREFIX}_sources_combined} PARENT_SCOPE)
endfunction()

function(prepare_hipified_target_sources)
# This function does the following:
# 1. Take all the specified target sources
# 1. Look up their equivalent HIPified files if applicable (presumes that hipify() already been run)
# 1. Apply source file properties
# 1. Update the HIP include directories

set(flags)
set(singleValueArgs PREFIX)
set(multiValueArgs SRCS INCLUDE_DIRS)

cmake_parse_arguments(
args
"${flags}" "${singleValueArgs}" "${multiValueArgs}"
${ARGN})

get_hipified_list("${args_SRCS}" args_SRCS)

set_source_files_properties(${args_SRCS}
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)

# Add include directories
hip_include_directories("${args_INCLUDE_DIRS}")

############################################################################
# Set the Output Variable(s)
############################################################################

set(${args_PREFIX}_sources_hipified ${args_SRCS} PARENT_SCOPE)
endfunction()

function(gpu_cpp_library)
# This function does the following:
# 1. Take all the target sources and select relevant sources based on build type (CPU-only, CUDA, HIP)
Expand All @@ -174,6 +148,7 @@ function(gpu_cpp_library)
GPU_SRCS # Sources common to both CUDA and HIP builds. .CU files specified here will be HIPified when building a HIP target
CUDA_SPECIFIC_SRCS # Sources available only for CUDA build
HIP_SPECIFIC_SRCS # Sources available only for HIP build
OTHER_SRCS # Sources from third-party libraries
GPU_FLAGS # Compile flags for GPU builds
INCLUDE_DIRS # Include directories for compilation
)
Expand Down Expand Up @@ -204,12 +179,16 @@ function(gpu_cpp_library)

set(lib_name ${args_PREFIX}_py)
if(USE_ROCM)
# Fetch the HIPified sources
prepare_hipified_target_sources(
PREFIX ${args_PREFIX}
SRCS ${lib_sources}
INCLUDE_DIRS ${args_INCLUDE_DIRS})
set(lib_sources_hipified ${${args_PREFIX}_sources_hipified})
# Fetch the equivalent HIPified sources if available.
# This presumes that hipify() has already been run.
get_hipified_list("${lib_sources}" lib_sources_hipified)

# Set properties for the HIPified sources
set_source_files_properties(${lib_sources_hipified}
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)

# Set the include directories for HIP
hip_include_directories("${args_INCLUDE_DIRS}")

# Create the HIP library
hip_add_library(${lib_name} SHARED
Expand All @@ -223,7 +202,8 @@ function(gpu_cpp_library)
target_include_directories(${lib_name} PUBLIC
${FBGEMM_HIP_INCLUDE}
${ROCRAND_INCLUDE}
${ROCM_SMI_INCLUDE})
${ROCM_SMI_INCLUDE}
${args_INCLUDE_DIRS})

else()
# Create the C++/CUDA library
Expand Down Expand Up @@ -296,6 +276,9 @@ function(gpu_cpp_library)
"HIP_SPECIFIC_SRCS"
"${args_HIP_SPECIFIC_SRCS}"
" "
"OTHER_SRCS:"
"${args_OTHER_SRCS}"
" "
"GPU_FLAGS:"
"${args_GPU_FLAGS}"
" "
Expand Down
20 changes: 20 additions & 0 deletions cmake/modules/Utilities.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,26 @@ function(LIST_FILTER)
set(${args_OUTPUT} ${${args_OUTPUT}} PARENT_SCOPE)
endfunction()


function(prepend_filepaths)
set(flags)
set(singleValueArgs PREFIX OUTPUT)
set(multiValueArgs INPUT)

cmake_parse_arguments(
args
"${flags}" "${singleValueArgs}" "${multiValueArgs}"
${ARGN})

set(${args_OUTPUT})

foreach(filepath ${args_INPUT})
list(APPEND ${args_OUTPUT} "${args_PREFIX}/${filepath}")
endforeach()

set(${args_OUTPUT} ${${args_OUTPUT}} PARENT_SCOPE)
endfunction()

function(add_to_package)
set(flags)
set(singleValueArgs DESTINATION)
Expand Down
65 changes: 64 additions & 1 deletion fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,68 @@ set(fbgemm_sources_include_directories
${NCCL_INCLUDE_DIRS})


################################################################################
# TBE Code Generation
################################################################################

set(CMAKE_CODEGEN_DIR ${CMAKE_CURRENT_SOURCE_DIR}/codegen)

macro(RUN_GEN_SCRIPT SCRIPT)
if(USE_ROCM)
set(rocm_flag --is_rocm)
endif()

BLOCK_PRINT(
"Running code generation script ..."
"${PYTHON_EXECUTABLE} ${SCRIPT} --opensource ${rocm_flag}"
)

execute_process(
COMMAND "${PYTHON_EXECUTABLE}" ${SCRIPT} "--opensource" ${rocm_flag})
endmacro()

foreach(script
"${CMAKE_CODEGEN_DIR}/genscript/generate_backward_split.py"
"${CMAKE_CODEGEN_DIR}/genscript/generate_embedding_optimizer.py"
"${CMAKE_CODEGEN_DIR}/genscript/generate_forward_quantized.py"
"${CMAKE_CODEGEN_DIR}/genscript/generate_forward_split.py"
"${CMAKE_CODEGEN_DIR}/genscript/generate_index_select.py")
RUN_GEN_SCRIPT(${script})
endforeach()


# ################################################################################
# HIP Code Generation
# ################################################################################

if(USE_ROCM)
set(include_dirs_for_hipification
# All directories need to be included for headers to be properly HIPified
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/experimental/gen_ai)

# HIPify all .CU and .CUH sources under the current directory (`/fbgemm_gpu`)
# .H sources are not automatically HIPified, so they need #ifdef USE_ROCM guards
hipify(
CUDA_SOURCE_DIR
${PROJECT_SOURCE_DIR}
HEADER_INCLUDE_DIR
${include_dirs_for_hipification})

BLOCK_PRINT(
"HIPify Sources"
" "
"CUDA_SOURCE_DIR:"
"${PROJECT_SOURCE_DIR}"
" "
"HEADER_INCLUDE_DIR:"
"${include_dirs_for_hipification}"
)
endif()


################################################################################
# Build FBGEMM_GPU (Main) Module
################################################################################
Expand All @@ -131,6 +193,7 @@ if(NOT FBGEMM_CPU_ONLY)
endif()

if(NOT FBGEMM_CPU_ONLY AND NOT USE_ROCM)
# TODO: Re-enable gen_ai for ROCm after enabling build support for ROCm 6.2
# TODO: Re-enable gen_ai for ROCm once ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp
# lands into latest ROCm
add_subdirectory(experimental/gen_ai)
endif()
Loading

0 comments on commit 9a94515

Please sign in to comment.