Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for NVIDIA cards up to Ampere (CUDA 11), removed Fermi (CUDA 3.2) #7020

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions Makefile.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,21 @@ CUDA_DIR := /usr/local/cuda
# CUDA_DIR := /usr

# CUDA architecture setting: going with all of them.
# For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility.
# For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility.
# For CUDA >= 9.0, comment the *_20 and *_21 lines for compatibility.
CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
-gencode arch=compute_20,code=sm_21 \
# For CUDA < 6.0, comment all the lines starting from *_50 for compatibility.
# For CUDA < 8.0, comment all the lines starting from *_60 for compatibility.
CUDA_ARCH := \
-gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=sm_50 \
-gencode arch=compute_52,code=sm_52 \
-gencode arch=compute_60,code=sm_60 \
-gencode arch=compute_61,code=sm_61 \
-gencode arch=compute_61,code=compute_61
-gencode arch=compute_62,code=sm_62 \
-gencode arch=compute_70,code=sm_70 \
-gencode arch=compute_72,code=sm_72 \
-gencode arch=compute_75,code=sm_75 \
-gencode arch=compute_80,code=sm_80 \
-gencode arch=compute_86,code=sm_86

# BLAS choice:
# atlas for ATLAS (default)
Expand Down
39 changes: 29 additions & 10 deletions cmake/Cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,20 @@ endif()

# Known NVIDIA GPU achitectures Caffe can be compiled for.
# This list will be used for CUDA_ARCH_NAME = All option
set(Caffe_known_gpu_archs "20 21(20) 30 35 50 60 61")
# Kepler (CUDA >= 5)
set(KEPLER "30 35 37")
# Maxwell (CUDA >= 6)
set(MAXWELL "50 52 53")
# Pascal (CUDA >= 8)
set(PASCAL "60 61 62")
# Volta (CUDA >= 9)
set(VOLTA "70 72")
# Turing (CUDA >= 10)
set(TURING "75")
# Ampere (CUDA >= 11)
set(AMPERE "80 86")

set(Caffe_known_gpu_archs "${KEPLER} ${MAXWELL} ${PASCAL} ${VOLTA} ${TURING} ${AMPERE}")

################################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
Expand Down Expand Up @@ -56,7 +69,7 @@ endfunction()
# caffe_select_nvcc_arch_flags(out_variable)
function(caffe_select_nvcc_arch_flags out_variable)
# List of arch names
set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual")
set(__archs_names "Kepler (CUDA >= 5)" "Maxwell (CUDA >= 6)" "Pascal (CUDA >= 8)" "Volta (CUDA >= 9)" "Turing (CUDA >= 10)" "Ampere (CUDA >= 11)" "All" "Manual")
set(__archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING)
list(APPEND __archs_names "Auto")
Expand All @@ -83,14 +96,20 @@ function(caffe_select_nvcc_arch_flags out_variable)
unset(CUDA_ARCH_PTX CACHE)
endif()

if(${CUDA_ARCH_NAME} STREQUAL "Fermi")
set(__cuda_arch_bin "20 21(20)")
elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(__cuda_arch_bin "30 35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
set(__cuda_arch_bin "50")
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
set(__cuda_arch_bin "60 61")
if (${CUDA_ARCH_NAME} STREQUAL "Fermi (3.2 <= CUDA <= 8)" AND NOT WIN32)
set(__cuda_arch_bin ${FERMI})
elseif (${CUDA_ARCH_NAME} STREQUAL "Kepler (CUDA >= 5)")
set(__cuda_arch_bin ${KEPLER})
elseif (${CUDA_ARCH_NAME} STREQUAL "Maxwell (CUDA >= 6)")
set(__cuda_arch_bin ${MAXWELL})
elseif (${CUDA_ARCH_NAME} STREQUAL "Pascal (CUDA >= 8)")
set(__cuda_arch_bin ${PASCAL})
elseif (${CUDA_ARCH_NAME} STREQUAL "Volta (CUDA >= 9)")
set(__cuda_arch_bin ${VOLTA})
elseif (${CUDA_ARCH_NAME} STREQUAL "Turing (CUDA >= 10)")
set(__cuda_arch_bin ${TURING})
elseif (${CUDA_ARCH_NAME} STREQUAL "Ampere (CUDA >= 11)")
set(__cuda_arch_bin ${AMPERE})
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(__cuda_arch_bin ${Caffe_known_gpu_archs})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
Expand Down