Skip to content

Commit

Permalink
Make G2G as default during the compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Dec 9, 2024
1 parent e216532 commit 0f960f1
Show file tree
Hide file tree
Showing 6 changed files with 1 addition and 42 deletions.
8 changes: 0 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,7 @@ set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES}

option(WITH_CUDA_PROFILING "Enable profiling within CUDA" OFF)
option(WITH_HIP_PROFILING "Enable profiling within HIP" OFF)
option(WITH_G2G "Enable GPU aware MPI within CUDA/HIP backends" OFF)

if (WITH_G2G AND ((NOT USE_ACCEL) OR ((NOT USE_ACCEL MATCHES "cuda")
AND (NOT USE_ACCEL MATCHES "hip"))))
message(
FATAL_ERROR "GPU aware MPI can only be enabled for HIP/CUDA GPU backends")
endif ()
# =================================================================================================
# LANGUAGES AND TESTING
enable_language(Fortran)
Expand Down Expand Up @@ -274,7 +268,6 @@ if (USE_ACCEL MATCHES "cuda")
message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS})
message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER})
message(STATUS "GPU profiling enabled: " ${WITH_CUDA_PROFILING})
message(STATUS "GPU aware MPI enabled: " ${WITH_G2G})
endif ()

if (USE_ACCEL MATCHES "hip")
Expand Down Expand Up @@ -319,7 +312,6 @@ if (USE_ACCEL MATCHES "hip")
message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS})
message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER})
message(STATUS "GPU profiling enabled: " ${WITH_HIP_PROFILING})
message(STATUS "GPU aware MPI enabled: " ${WITH_G2G})

# =================================== BLAS on GPU backend
find_package(hipblas CONFIG REQUIRED HINTS ${ROCM_PATH})
Expand Down
1 change: 0 additions & 1 deletion docs/guide/2-user-guide/1-installation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ make
-DUSE_ACCEL=<opencl|cuda|hip>
-DWITH_CUDA_PROFILING=<OFF|ON>
-DWITH_HIP_PROFILING=<OFF|ON>
-DWITH_G2G=<OFF|ON>
-DWITH_C_API=<ON|OFF>
-DWITH_EXAMPLES=<ON|OFF>
-DWITH_GPU=<P100|K20X|K40|K80|V100|Mi50|Mi100|Mi250>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,3 @@ Assumed square matrix with 20x20 matrix with 5x5 blocks and a 2x2 processor grid
| `__CUDA_PROFILING` | To turn on Nvidia Tools Extensions. It requires to link `-lnvToolsExt` | Fortran, C, C++ |
| `__CUDA` | Enable CUDA acceleration | C, C++ |
| `__HIP` | Enable HIP acceleration | C, C++ |
| `__DBCSR_ACC_G2G` | Enable GPU Aware MPI in CUDA and HIP backends | Fortran, C, C++ |
12 changes: 0 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -284,18 +284,6 @@ if (USE_ACCEL)
$<$<BOOL:${WITH_HIP_PROFILING}>:roctx64>
$<$<BOOL:${WITH_HIP_PROFILING}>:roctracer64>
$<$<STREQUAL:${USE_ACCEL},opencl>:OpenCL::OpenCL>)

if (WITH_G2G)
target_compile_definitions(
dbcsr
PRIVATE __DBCSR_ACC_G2G
$<$<STREQUAL:${USE_ACCEL},cuda>:__CUDA>
$<$<STREQUAL:${USE_ACCEL},cuda>:ARCH_NUMBER=${ACC_ARCH_NUMBER}>
$<$<STREQUAL:${USE_ACCEL},hip>:__HIP>
$<$<STREQUAL:${USE_ACCEL},hip>:ARCH_NUMBER=${ACC_ARCH_NUMBER}>
$<$<BOOL:${WITH_CUDA_PROFILING}>:__CUDA_PROFILING>
$<$<BOOL:${WITH_HIP_PROFILING}>:__HIP_PROFILING>)
endif ()
endif ()

# =================================================================================================
Expand Down
14 changes: 1 addition & 13 deletions src/core/dbcsr_config.F
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,7 @@ MODULE dbcsr_config
SET_PARAMETER_DEFAULT(USE_MEMPOOLS_CPU, CONF_PAR_LOGICAL, .FALSE.)
SET_PARAMETER_DEFAULT(USE_MPI_ALLOCATOR, CONF_PAR_LOGICAL, .FALSE.)
SET_PARAMETER_DEFAULT(TAS_SPLIT_FACTOR, CONF_PAR_REAL, 1.0_real_8)
#if defined(__DBCSR_ACC_G2G)
SET_PARAMETER_DEFAULT(USE_ACC_G2G, CONF_PAR_LOGICAL, .TRUE.)
#endif
SET_PARAMETER_DEFAULT(USE_ACC_G2G, CONF_PAR_LOGICAL, .FALSE.)
END TYPE dbcsr_config_type

TYPE(dbcsr_config_type), PROTECTED, SAVE :: dbcsr_cfg = dbcsr_config_type() ! defaults
Expand Down Expand Up @@ -414,11 +412,7 @@ SUBROUTINE dbcsr_set_config( &
CALL dbcsr_cfg%accdrv_binning_binsize%set(accdrv_binning_binsize)
CALL dbcsr_cfg%use_mempools_cpu%set(use_mempools_cpu)
CALL dbcsr_cfg%tas_split_factor%set(tas_split_factor)
#if defined(__DBCSR_ACC_G2G)
CALL dbcsr_cfg%use_acc_g2g%set(use_acc_g2g)
#else
MARK_USED(use_acc_g2g)
#endif

IF (0 == nthreads) THEN
nthreads = 1
Expand Down Expand Up @@ -517,11 +511,7 @@ SUBROUTINE dbcsr_get_default_config( &
IF (PRESENT(use_mempools_cpu)) use_mempools_cpu = dbcsr_cfg%use_mempools_cpu%defval
IF (PRESENT(nstacks)) nstacks = dbcsr_cfg%n_stacks%defval
IF (PRESENT(tas_split_factor)) tas_split_factor = dbcsr_cfg%tas_split_factor%defval
#if defined(__DBCSR_ACC_G2G)
IF (PRESENT(use_acc_g2g)) use_acc_g2g = dbcsr_cfg%use_acc_g2g%defval
#else
MARK_USED(use_acc_g2g)
#endif

END SUBROUTINE dbcsr_get_default_config

Expand Down Expand Up @@ -650,11 +640,9 @@ SUBROUTINE dbcsr_print_config(unit_nr)
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| ACC: Min. flop for processing", dbcsr_cfg%accdrv_min_flop_process%val, &
dbcsr_cfg%accdrv_min_flop_process%print_source()
#if defined(__DBCSR_ACC_G2G)
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| ACC: Use G2G algorithm", dbcsr_cfg%use_acc_g2g%val, &
dbcsr_cfg%use_acc_g2g%print_source()
#endif
IF (dbcsr_cfg%accdrv_stack_sort%val) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| ACC: Min. flop for sorting", dbcsr_cfg%accdrv_min_flop_sort%val, &
Expand Down
7 changes: 0 additions & 7 deletions src/mm/dbcsr_mm.F
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,6 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
flop=my_flop, keep_product_data=keep_product_data)
ELSE
data_type = dbcsr_get_data_type(product_matrix)
#if defined (__DBCSR_ACC_G2G)
IF (data_type .NE. dbcsr_type_real_8 .OR. (.NOT. dbcsr_cfg%use_acc_g2g%val)) THEN
! If G2G is enabled, norms have to be calculated on the GPU.
! Since the norms kernel expects only real_8 type data, we
Expand All @@ -921,12 +920,6 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
filter_eps=filter_eps, &
flop=my_flop, keep_product_data=keep_product_data)
END IF
#else
CALL multiply_cannon(m2s_left, m2s_right, product_matrix, &
retain_sparsity=retain_sparsity, &
filter_eps=filter_eps, &
flop=my_flop, keep_product_data=keep_product_data)
#endif
CALL dbcsr_finalize(product_matrix, reshuffle=PRESENT(filter_eps) .AND. .NOT. keep_sparsity)
END IF
!
Expand Down

0 comments on commit 0f960f1

Please sign in to comment.