Skip to content

Commit

Permalink
Modularize CMake Build [3/N] (#3408)
Browse files Browse the repository at this point in the history
Summary:
X-link: facebookresearch/FBGEMM#499

- Remove duplicated CMake instructions now that `gpu_cpp_library()` is in place
- Add support for building using target dependencies in `gpu_cpp_library()`

Pull Request resolved: #3408

Reviewed By: leitian

Differential Revision: D66408320

Pulled By: q10

fbshipit-source-id: 6446db396ed65eb9b535c92e9fd88cb37e83b391
  • Loading branch information
q10 authored and facebook-github-bot committed Nov 25, 2024
1 parent 0505ed8 commit 2cac703
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 85 deletions.
63 changes: 46 additions & 17 deletions cmake/modules/GpuCppLibrary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake)

function(prepare_target_sources)
# This function does the following:
#
# 1. Take all the specified project sources for a target
# 1. Filter files out based on CPU-only, CUDA, and HIP build modes
# 1. Bucketize them into sets of CXX, CU, and HIP files
Expand Down Expand Up @@ -134,14 +135,20 @@ endfunction()

function(gpu_cpp_library)
# This function does the following:
#
# 1. Take all the target sources and select relevant sources based on build type (CPU-only, CUDA, HIP)
# 1. Apply source file properties as needed
# 1. HIPify files as needed
# 1. Build the .SO file
# 1. Fetch the HIPified versions of the files as needed (presumes that `hipify()` has already been run)
# 1. Build the .SO file, either as STATIC or MODULE
#
# Building as STATIC allows the target to be linked to other library targets:
# https://www.reddit.com/r/cpp_questions/comments/120p0ey/how_to_create_a_composite_shared_library_out_of
# https://github.com/ROCm/hipDNN/blob/master/Examples/hipdnn-training/cmake/FindHIP.cmake

set(flags)
set(singleValueArgs
PREFIX # Desired name prefix for the library target
PREFIX # Desired name for the library target (and by extension, the prefix for naming intermediate targets)
TYPE # Target type, e.g., MODULE, OBJECT. See https://cmake.org/cmake/help/latest/command/add_library.html
)
set(multiValueArgs
CPU_SRCS # Sources for CPU-only build
Expand All @@ -151,6 +158,7 @@ function(gpu_cpp_library)
OTHER_SRCS # Sources from third-party libraries
GPU_FLAGS # Compile flags for GPU builds
INCLUDE_DIRS # Include directories for compilation
DEPS # Target dependencies, i.e. built STATIC targets
)

cmake_parse_arguments(
Expand All @@ -162,6 +170,8 @@ function(gpu_cpp_library)
# Prepare CXX and CU sources
############################################################################

# Take all the sources, and filter them into CPU and GPU buckets depending
# on the source type and build mode
prepare_target_sources(
PREFIX ${args_PREFIX}
CPU_SRCS ${args_CPU_SRCS}
Expand All @@ -172,15 +182,25 @@ function(gpu_cpp_library)
INCLUDE_DIRS ${args_INCLUDE_DIRS})
set(lib_sources ${${args_PREFIX}_sources})

############################################################################
# Prepare Target Deps
############################################################################

# Convert target dependency references into CMake target-dependent expressions
# See https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html#id34
set(target_deps)
foreach(dep ${args_DEPS})
list(APPEND target_deps "$<TARGET_OBJECTS:${dep}>")
endforeach()

############################################################################
# Build the Library
############################################################################

set(lib_name ${args_PREFIX}_py)
set(lib_name ${args_PREFIX})
if(USE_ROCM)
# Fetch the equivalent HIPified sources if available.
# This presumes that hipify() has already been run.
# This presumes that `hipify()` has already been run.
get_hipified_list("${lib_sources}" lib_sources_hipified)

# Set properties for the HIPified sources
Expand All @@ -191,9 +211,10 @@ function(gpu_cpp_library)
hip_include_directories("${args_INCLUDE_DIRS}")

# Create the HIP library
hip_add_library(${lib_name} SHARED
hip_add_library(${lib_name} ${args_TYPE}
${lib_sources_hipified}
${args_OTHER_SRCS}
${target_deps}
${FBGEMM_HIP_HCC_LIBRARIES}
HIPCC_OPTIONS
${HIP_HCC_FLAGS})
Expand All @@ -206,10 +227,11 @@ function(gpu_cpp_library)
${args_INCLUDE_DIRS})

else()
# Create the C++/CUDA library
add_library(${lib_name} MODULE
# Create the CPU-only / CUDA library
add_library(${lib_name} ${args_TYPE}
${lib_sources}
${args_OTHER_SRCS})
${args_OTHER_SRCS}
${target_deps})
endif()

############################################################################
Expand All @@ -221,9 +243,14 @@ function(gpu_cpp_library)
${TORCH_INCLUDE_DIRS}
${NCCL_INCLUDE_DIRS})

# Remove `lib` from the output artifact name, i.e. `libfoo.so` -> `foo.so`
set_target_properties(${lib_name}
PROPERTIES PREFIX "")
# Set additional target properties
set_target_properties(${lib_name} PROPERTIES
# Remove `lib` prefix from the output artifact name, e.g. `libfoo.so` -> `foo.so`
PREFIX ""
# Enforce -fPIC for STATIC library option, since they are to be
# integrated into other libraries down the line
# https://stackoverflow.com/questions/3961446/why-does-gcc-not-implicitly-supply-the-fpic-flag-when-compiling-static-librarie
POSITION_INDEPENDENT_CODE ON)

# Link to PyTorch
target_link_libraries(${lib_name}
Expand All @@ -236,7 +263,7 @@ function(gpu_cpp_library)
target_link_libraries(${lib_name} ${NVML_LIB_PATH})
endif()

# Silence warnings (in asmjit)
# Silence compiler warnings (in asmjit)
target_compile_options(${lib_name} PRIVATE
-Wno-deprecated-anon-enum-enum-conversion
-Wno-deprecated-declarations)
Expand All @@ -251,18 +278,17 @@ function(gpu_cpp_library)
WORKING_DIRECTORY ${OUTPUT_DIR}
COMMAND bash ${FBGEMM}/.github/scripts/fbgemm_gpu_postbuild.bash)

# Run the post-build steps AFTER the build itself
# Set the post-build steps to run AFTER the build completes
add_dependencies(${lib_name}_postbuild ${lib_name})

############################################################################
# Set the Output Variable(s)
############################################################################

# PREFIX = `foo` --> Target Library = `foo_py`
set(${args_PREFIX}_py ${lib_name} PARENT_SCOPE)
set(${args_PREFIX} ${lib_name} PARENT_SCOPE)

BLOCK_PRINT(
"GPU CPP Library Target: ${args_PREFIX}"
"GPU CPP Library Target: ${args_PREFIX} (${args_TYPE})"
" "
"CPU_SRCS:"
"${args_CPU_SRCS}"
Expand Down Expand Up @@ -291,6 +317,9 @@ function(gpu_cpp_library)
"HIPified Source Files:"
"${lib_sources_hipified}"
" "
"Target Dependencies:"
"${target_deps}"
" "
"Output Library:"
"${lib_name}"
)
Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ if(USE_ROCM)
${CMAKE_CURRENT_SOURCE_DIR}/experimental/gen_ai)

# HIPify all .CU and .CUH sources under the current directory (`/fbgemm_gpu`)
# .H sources are not automatically HIPified, so they need #ifdef USE_ROCM guards
#
# Note that .H sources are not automatically HIPified, so if they reference
# CUDA-specific code, e.g. `#include <c10/cuda/CUDAStream.h>`, they will need
# to be updated with `#ifdef USE_ROCM` guards.
hipify(
CUDA_SOURCE_DIR
${PROJECT_SOURCE_DIR}
Expand Down
89 changes: 24 additions & 65 deletions fbgemm_gpu/FbgemmGpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -308,53 +308,6 @@ list(APPEND gen_defused_optim_py_files
${CMAKE_BINARY_DIR}/optimizer_args.py)


################################################################################
# FBGEMM_GPU Generated Sources
################################################################################

if(CXX_AVX2_FOUND)
set_source_files_properties(${gen_cpu_source_files}
PROPERTIES COMPILE_OPTIONS "${AVX2_FLAGS}")
else()
set_source_files_properties(${gen_cpu_source_files}
PROPERTIES COMPILE_OPTIONS "-fopenmp")
endif()

set_source_files_properties(${gen_cpu_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

set_source_files_properties(${gen_gpu_host_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

set_source_files_properties(${gen_gpu_kernel_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

set_source_files_properties(${gen_gpu_kernel_source_files}
PROPERTIES COMPILE_OPTIONS
"${TORCH_CUDA_OPTIONS}")

set_source_files_properties(${gen_defused_optim_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

if(NOT FBGEMM_CPU_ONLY)
set(fbgemm_gpu_sources_gen
${gen_gpu_kernel_source_files}
${gen_gpu_host_source_files}
${gen_cpu_source_files}
${gen_defused_optim_source_files})
else()
set(fbgemm_gpu_sources_gen
${gen_cpu_source_files}
# To force generate_embedding_optimizer to generate Python files
${gen_defused_optim_py_files}
)
endif()


################################################################################
# FBGEMM (not FBGEMM_GPU) Sources
################################################################################
Expand Down Expand Up @@ -437,7 +390,7 @@ set(fbgemm_gpu_sources_cpu_static
src/sparse_ops/sparse_async_cumsum.cpp
src/sparse_ops/sparse_ops_cpu.cpp
src/sparse_ops/sparse_ops_meta.cpp
src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
# src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
src/split_embeddings_cache/linearize_cache_indices.cpp
src/split_embeddings_cache/lfu_cache_populate_byte.cpp
src/split_embeddings_cache/lru_cache_populate_byte.cpp
Expand All @@ -459,7 +412,7 @@ if(NOT FBGEMM_CPU_ONLY)
src/sparse_ops/sparse_ops_gpu.cpp
src/split_embeddings_utils/split_embeddings_utils.cpp
src/metric_ops/metric_ops_host.cpp
src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
# src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
src/input_combine_ops/input_combine_gpu.cpp
codegen/training/index_select/batch_index_select_dim0_host.cpp)

Expand All @@ -478,7 +431,7 @@ if(NOT FBGEMM_CPU_ONLY)
codegen/utils/embedding_bounds_check_v1.cu
codegen/utils/embedding_bounds_check_v2.cu
codegen/inference/embedding_forward_quantized_split_lookup.cu
src/embedding_inplace_ops/embedding_inplace_update.cu
# src/embedding_inplace_ops/embedding_inplace_update.cu
src/histogram_binning_calibration_ops.cu
src/input_combine_ops/input_combine.cu
src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu
Expand Down Expand Up @@ -552,7 +505,7 @@ endif()


################################################################################
# FBGEMM_GPU HIP Code Generation
# FBGEMM_GPU Generated Sources Organized
################################################################################

set(fbgemm_gpu_sources_cpu_gen
Expand Down Expand Up @@ -580,36 +533,42 @@ endif()
# FBGEMM_GPU C++ Modules
################################################################################

# Test target to demonstrate that target deps works as intended
gpu_cpp_library(
PREFIX
fbgemm_gpu
embedding_inplace_ops
TYPE
STATIC
INCLUDE_DIRS
${fbgemm_sources_include_directories}
CPU_SRCS
${fbgemm_gpu_sources_cpu_static}
${fbgemm_gpu_sources_cpu_gen}
src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
GPU_SRCS
${fbgemm_gpu_sources_gpu_static}
${fbgemm_gpu_sources_gpu_gen}
OTHER_SRCS
${asmjit_sources}
${fbgemm_sources}
src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
src/embedding_inplace_ops/embedding_inplace_update.cu
GPU_FLAGS
${TORCH_CUDA_OPTIONS})

# TODO: Test target, need to properly integrate into FBGEMM_GPU main build
gpu_cpp_library(
PREFIX
embedding_inplace_ops
fbgemm_gpu_py
TYPE
MODULE
INCLUDE_DIRS
${fbgemm_sources_include_directories}
CPU_SRCS
src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
${fbgemm_gpu_sources_cpu_static}
${fbgemm_gpu_sources_cpu_gen}
GPU_SRCS
src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
src/embedding_inplace_ops/embedding_inplace_update.cu
${fbgemm_gpu_sources_gpu_static}
${fbgemm_gpu_sources_gpu_gen}
OTHER_SRCS
${asmjit_sources}
${fbgemm_sources}
GPU_FLAGS
${TORCH_CUDA_OPTIONS})
${TORCH_CUDA_OPTIONS}
DEPS
embedding_inplace_ops)


################################################################################
Expand Down
4 changes: 3 additions & 1 deletion fbgemm_gpu/experimental/example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ set(experimental_example_python_source_files

gpu_cpp_library(
PREFIX
fbgemm_gpu_experimental_example
fbgemm_gpu_experimental_example_py
TYPE
MODULE
INCLUDE_DIRS
${fbgemm_sources_include_directories}
GPU_SRCS
Expand Down
4 changes: 3 additions & 1 deletion fbgemm_gpu/experimental/gen_ai/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ file(GLOB_RECURSE experimental_gen_ai_python_source_files

gpu_cpp_library(
PREFIX
fbgemm_gpu_experimental_gen_ai
fbgemm_gpu_experimental_gen_ai_py
TYPE
MODULE
INCLUDE_DIRS
${fbgemm_sources_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/src/quantize
Expand Down

0 comments on commit 2cac703

Please sign in to comment.