From 0f960f1d9e0bdf6b819730e9314875473da3cac6 Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Mon, 9 Dec 2024 21:03:20 +0100 Subject: [PATCH] Make G2G as default during the compilation --- CMakeLists.txt | 8 -------- docs/guide/2-user-guide/1-installation/index.md | 1 - .../3-programming/1-overview/index.md | 1 - src/CMakeLists.txt | 12 ------------ src/core/dbcsr_config.F | 14 +------------- src/mm/dbcsr_mm.F | 7 ------- 6 files changed, 1 insertion(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b3f9f569d0..e819bbc23a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,13 +120,7 @@ set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} option(WITH_CUDA_PROFILING "Enable profiling within CUDA" OFF) option(WITH_HIP_PROFILING "Enable profiling within HIP" OFF) -option(WITH_G2G "Enable GPU aware MPI within CUDA/HIP backends" OFF) -if (WITH_G2G AND ((NOT USE_ACCEL) OR ((NOT USE_ACCEL MATCHES "cuda") - AND (NOT USE_ACCEL MATCHES "hip")))) - message( - FATAL_ERROR "GPU aware MPI can only be enabled for HIP/CUDA GPU backends") -endif () # ================================================================================================= # LANGUAGES AND TESTING enable_language(Fortran) @@ -274,7 +268,6 @@ if (USE_ACCEL MATCHES "cuda") message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS}) message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER}) message(STATUS "GPU profiling enabled: " ${WITH_CUDA_PROFILING}) - message(STATUS "GPU aware MPI enabled: " ${WITH_G2G}) endif () if (USE_ACCEL MATCHES "hip") @@ -319,7 +312,6 @@ if (USE_ACCEL MATCHES "hip") message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS}) message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER}) message(STATUS "GPU profiling enabled: " ${WITH_HIP_PROFILING}) - message(STATUS "GPU aware MPI enabled: " ${WITH_G2G}) # =================================== BLAS on GPU backend find_package(hipblas CONFIG REQUIRED HINTS ${ROCM_PATH}) diff --git a/docs/guide/2-user-guide/1-installation/index.md b/docs/guide/2-user-guide/1-installation/index.md index 1c091558026..a3f1d362052 100644 --- a/docs/guide/2-user-guide/1-installation/index.md +++ b/docs/guide/2-user-guide/1-installation/index.md @@ -70,7 +70,6 @@ make -DUSE_ACCEL= -DWITH_CUDA_PROFILING= -DWITH_HIP_PROFILING= --DWITH_G2G= -DWITH_C_API= -DWITH_EXAMPLES= -DWITH_GPU= diff --git a/docs/guide/3-developer-guide/3-programming/1-overview/index.md b/docs/guide/3-developer-guide/3-programming/1-overview/index.md index 27f6bda40d0..d55b9b3f30f 100644 --- a/docs/guide/3-developer-guide/3-programming/1-overview/index.md +++ b/docs/guide/3-developer-guide/3-programming/1-overview/index.md @@ -55,4 +55,3 @@ Assumed square matrix with 20x20 matrix with 5x5 blocks and a 2x2 processor grid | `__CUDA_PROFILING` | To turn on Nvidia Tools Extensions. It requires to link `-lnvToolsExt` | Fortran, C, C++ | | `__CUDA` | Enable CUDA acceleration | C, C++ | | `__HIP` | Enable HIP acceleration | C, C++ | -| `__DBCSR_ACC_G2G` | Enable GPU Aware MPI in CUDA and HIP backends | Fortran, C, C++ | diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3f64deea382..31b89858369 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -284,18 +284,6 @@ if (USE_ACCEL) $<$:roctx64> $<$:roctracer64> $<$:OpenCL::OpenCL>) - - if (WITH_G2G) - target_compile_definitions( - dbcsr - PRIVATE __DBCSR_ACC_G2G - $<$:__CUDA> - $<$:ARCH_NUMBER=${ACC_ARCH_NUMBER}> - $<$:__HIP> - $<$:ARCH_NUMBER=${ACC_ARCH_NUMBER}> - $<$:__CUDA_PROFILING> - $<$:__HIP_PROFILING>) - endif () endif () # ================================================================================================= diff --git a/src/core/dbcsr_config.F b/src/core/dbcsr_config.F index 0655a1cc81a..c6d06708390 100644 --- a/src/core/dbcsr_config.F +++ b/src/core/dbcsr_config.F @@ -177,9 +177,7 @@ MODULE dbcsr_config SET_PARAMETER_DEFAULT(USE_MEMPOOLS_CPU, CONF_PAR_LOGICAL, .FALSE.) SET_PARAMETER_DEFAULT(USE_MPI_ALLOCATOR, CONF_PAR_LOGICAL, .FALSE.) SET_PARAMETER_DEFAULT(TAS_SPLIT_FACTOR, CONF_PAR_REAL, 1.0_real_8) -#if defined(__DBCSR_ACC_G2G) - SET_PARAMETER_DEFAULT(USE_ACC_G2G, CONF_PAR_LOGICAL, .TRUE.) -#endif + SET_PARAMETER_DEFAULT(USE_ACC_G2G, CONF_PAR_LOGICAL, .FALSE.) END TYPE dbcsr_config_type TYPE(dbcsr_config_type), PROTECTED, SAVE :: dbcsr_cfg = dbcsr_config_type() ! defaults @@ -414,11 +412,7 @@ SUBROUTINE dbcsr_set_config( & CALL dbcsr_cfg%accdrv_binning_binsize%set(accdrv_binning_binsize) CALL dbcsr_cfg%use_mempools_cpu%set(use_mempools_cpu) CALL dbcsr_cfg%tas_split_factor%set(tas_split_factor) -#if defined(__DBCSR_ACC_G2G) CALL dbcsr_cfg%use_acc_g2g%set(use_acc_g2g) -#else - MARK_USED(use_acc_g2g) -#endif IF (0 == nthreads) THEN nthreads = 1 @@ -517,11 +511,7 @@ SUBROUTINE dbcsr_get_default_config( & IF (PRESENT(use_mempools_cpu)) use_mempools_cpu = dbcsr_cfg%use_mempools_cpu%defval IF (PRESENT(nstacks)) nstacks = dbcsr_cfg%n_stacks%defval IF (PRESENT(tas_split_factor)) tas_split_factor = dbcsr_cfg%tas_split_factor%defval -#if defined(__DBCSR_ACC_G2G) IF (PRESENT(use_acc_g2g)) use_acc_g2g = dbcsr_cfg%use_acc_g2g%defval -#else - MARK_USED(use_acc_g2g) -#endif END SUBROUTINE dbcsr_get_default_config @@ -650,11 +640,9 @@ SUBROUTINE dbcsr_print_config(unit_nr) WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| ACC: Min. flop for processing", dbcsr_cfg%accdrv_min_flop_process%val, & dbcsr_cfg%accdrv_min_flop_process%print_source() -#if defined(__DBCSR_ACC_G2G) WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| ACC: Use G2G algorithm", dbcsr_cfg%use_acc_g2g%val, & dbcsr_cfg%use_acc_g2g%print_source() -#endif IF (dbcsr_cfg%accdrv_stack_sort%val) THEN WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| ACC: Min. flop for sorting", dbcsr_cfg%accdrv_min_flop_sort%val, & diff --git a/src/mm/dbcsr_mm.F b/src/mm/dbcsr_mm.F index b3d475310e6..8dd9da1eaee 100644 --- a/src/mm/dbcsr_mm.F +++ b/src/mm/dbcsr_mm.F @@ -906,7 +906,6 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, & flop=my_flop, keep_product_data=keep_product_data) ELSE data_type = dbcsr_get_data_type(product_matrix) -#if defined (__DBCSR_ACC_G2G) IF (data_type .NE. dbcsr_type_real_8 .OR. (.NOT. dbcsr_cfg%use_acc_g2g%val)) THEN ! If G2G is enabled, norms have to be calculated on the GPU. ! Since the norms kernel expects only real_8 type data, we @@ -921,12 +920,6 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, & filter_eps=filter_eps, & flop=my_flop, keep_product_data=keep_product_data) END IF -#else - CALL multiply_cannon(m2s_left, m2s_right, product_matrix, & - retain_sparsity=retain_sparsity, & - filter_eps=filter_eps, & - flop=my_flop, keep_product_data=keep_product_data) -#endif CALL dbcsr_finalize(product_matrix, reshuffle=PRESENT(filter_eps) .AND. .NOT. keep_sparsity) END IF !