Skip to content

Commit

Permalink
building dynamic library for specified GPU architectures.
Browse files Browse the repository at this point in the history
  • Loading branch information
lcy-seso committed Jan 15, 2025
1 parent f689b82 commit d285859
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ endif()

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake")

set(USER_CUDA_ARCH_LIST
""
CACHE STRING "User-specified cuda device architectures")

include(generic)

add_subdirectory(csrc)
20 changes: 17 additions & 3 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,26 @@ find_package(Torch REQUIRED)
message(STATUS "Torch include include_directories: " ${TORCH_INCLUDE_DIRS})
include_directories(${TORCH_INCLUDE_DIRS})

# let cmake automatically detect the current CUDA architecture to avoid
# generating device codes for all possible architectures
set(CMAKE_CUDA_ARCHITECTURES OFF)
if(USER_CUDA_ARCH_LIST)
message(STATUS "User specified CUDA architectures: ${USER_CUDA_ARCH_LIST}")
set(CMAKE_CUDA_ARCHITECTURES ${USER_CUDA_ARCH_LIST})
else()
# let cmake automatically detect the current CUDA architecture to avoid
# generating device codes for all possible architectures
message(STATUS "No user specified CUDA architectures, cmake will detect the "
"current CUDA architecture.")
set(CMAKE_CUDA_ARCHITECTURES ON)
endif()

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror all-warnings")

# Set the CUDA_PROPAGATE_HOST_FLAGS to OFF to avoid passing host compiler flags
# to the device compiler
set(CUDA_PROPAGATE_HOST_FLAGS OFF)

message(STATUS "my cuda architecture list: ${CUDA_ARCH_LIST}")
message(STATUS "cmake cuda flags: ${CMAKE_CUDA_FLAGS}")

# FIXME(ying): -std=c++17 has to be set explicitly here, Otherwise, linking
# against torchlibs will raise errors. it seems that the host compilation
# options are not passed to torchlibs.
Expand All @@ -64,6 +76,8 @@ set(CUDA_NVCC_FLAGS
-U__CUDA_NO_BFLOAT162_CONVERSIONS__)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math)

message(STATUS "NVCC FLAGS = ${CUDA_NVCC_FLAGS}")

if(${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11")
add_definitions("-DENABLE_BF16")
message("CUDA_VERSION ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} "
Expand Down
11 changes: 8 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import os
import subprocess
from pathlib import Path
Expand Down Expand Up @@ -56,6 +55,10 @@ def build_extension(self, ext: CMakeExtension) -> None:
) if self.debug is None else self.debug
cfg = "Debug" if debug else "Release"

# Set CUDA_ARCH_LIST to build the dynamic shared library
# for the specified GPU architectures.
arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)

parallel_level = os.environ.get("CMAKE_BUILD_PARALLEL_LEVEL", None)
if parallel_level is not None:
self.parallel = int(parallel_level)
Expand All @@ -72,9 +75,11 @@ def build_extension(self, ext: CMakeExtension) -> None:
"-DCMAKE_BUILD_TYPE=%s" % cfg,
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(
cfg.upper(), extdir
), "-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}".format(
),
"-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}".format(
cfg.upper(), self.build_temp
)
),
"-DCUDA_ARCH_LIST={}".format(arch_list) if arch_list else "",
]

# Adding CMake arguments set as environment variable
Expand Down

0 comments on commit d285859

Please sign in to comment.