Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ACC Setdevice reordering #595

Merged
merged 3 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/acc/PACKAGE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "Generic accelerator API",
"archive": "libdbcsr",
"requires": ["../base", "cuda", "hip", "opencl", "libsmm_acc"]
"requires": ["../base", "../core", "cuda", "hip", "opencl", "libsmm_acc"]
}
14 changes: 13 additions & 1 deletion src/acc/dbcsr_acc_init.F
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ MODULE dbcsr_acc_init
#if defined (__DBCSR_ACC)
USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_PTR, C_NULL_PTR, C_NULL_CHAR, C_ASSOCIATED
#endif
USE dbcsr_acc_device, ONLY: dbcsr_acc_set_active_device
USE dbcsr_config, ONLY: get_accdrv_active_device_id
#include "base/dbcsr_base_uses.f90"

IMPLICIT NONE
Expand Down Expand Up @@ -47,9 +49,15 @@ SUBROUTINE acc_init()
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
#else
INTEGER :: istat
! Set active device first
CALL dbcsr_acc_set_active_device(get_accdrv_active_device_id())
!$OMP PARALLEL DEFAULT(NONE) PRIVATE(istat)
!$OMP MASTER
istat = acc_interface_drv_init()
IF (istat /= 0) &
DBCSR_ABORT("acc_init failed")
!$OMP END MASTER
!$OMP END PARALLEL
#endif
END SUBROUTINE acc_init

Expand All @@ -58,10 +66,14 @@ SUBROUTINE acc_finalize()
#if ! defined (__DBCSR_ACC)
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
#else
INTEGER :: istat
INTEGER :: istat
!$OMP PARALLEL DEFAULT(NONE) PRIVATE(istat)
!$OMP MASTER
istat = acc_interface_drv_finalize()
IF (istat /= 0) &
DBCSR_ABORT("acc_finalize failed")
!$OMP END MASTER
!$OMP END PARALLEL
#endif
END SUBROUTINE acc_finalize

Expand Down
31 changes: 10 additions & 21 deletions src/core/dbcsr_lib.F
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ MODULE dbcsr_lib

!! Routines that affect the DBCSR library as a whole
USE dbcsr_acc_init, ONLY: acc_finalize, acc_init
USE dbcsr_acc_device, ONLY: dbcsr_acc_get_ndevices, dbcsr_acc_set_active_device
USE dbcsr_config, ONLY: get_accdrv_active_device_id, &
set_accdrv_active_device_id, &
USE dbcsr_acc_device, ONLY: dbcsr_acc_get_ndevices
USE dbcsr_config, ONLY: set_accdrv_active_device_id, &
reset_accdrv_active_device_id, &
dbcsr_set_config, &
has_acc
Expand Down Expand Up @@ -204,24 +203,18 @@ SUBROUTINE dbcsr_init_lib_pre(mp_comm, io_unit, accdrv_active_device_id)

! Initialize Acc and set active device
IF (has_acc) THEN
!$OMP PARALLEL
!$OMP MASTER
CALL acc_init()
!$OMP END MASTER
!$OMP END PARALLEL
IF (dbcsr_acc_get_ndevices() > 0) THEN
IF (PRESENT(accdrv_active_device_id)) THEN
CALL set_accdrv_active_device_id(accdrv_active_device_id)
ELSE
! Use round-robin assignment per rank
CALL set_accdrv_active_device_id(MOD(mynode, dbcsr_acc_get_ndevices()))
END IF
IF (PRESENT(accdrv_active_device_id)) THEN
CALL set_accdrv_active_device_id(accdrv_active_device_id)
ELSEIF (dbcsr_acc_get_ndevices() > 0) THEN
! Use round-robin assignment per rank
CALL set_accdrv_active_device_id(MOD(mynode, dbcsr_acc_get_ndevices()))
ELSE
DBCSR_ABORT("dbcsr_init_lib: No recongnized GPU devices")
END IF
CALL acc_init()
END IF

#if defined(__DBCSR_ACC)
CALL dbcsr_acc_set_active_device(get_accdrv_active_device_id())

! Checks related to DBCSR's GPU backend: check consistency in threading level
libsmm_acc_thread_safe = libsmm_acc_is_thread_safe() ! 0: not threaded, 1: threaded
dbcsr_thread_safe = 0 ! not threaded
Expand Down Expand Up @@ -310,11 +303,7 @@ SUBROUTINE dbcsr_finalize_lib()
! Reset Acc ID
CALL reset_accdrv_active_device_id()
IF (has_acc) THEN
!$OMP PARALLEL
!$OMP MASTER
CALL acc_finalize()
!$OMP END MASTER
!$OMP END PARALLEL
END IF

! Check the number of communicators
Expand Down
16 changes: 8 additions & 8 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,14 @@ if (USE_ACCEL MATCHES "cuda|hip")
$<$<STREQUAL:${USE_ACCEL},cuda>:CUDA::cuda_driver> libsmm_acc)
endforeach ()

add_test(NAME libsmm_acc_unittest_multiply
COMMAND libsmm_acc_unittest_multiply)
add_test(NAME libsmm_acc_unittest_transpose
COMMAND libsmm_acc_unittest_transpose)
add_test(NAME libsmm_acc_timer_multiply-autotuned
COMMAND libsmm_acc_timer_multiply autotuned)
add_test(NAME libsmm_acc_timer_multiply-predicted
COMMAND libsmm_acc_timer_multiply predicted)
# Comment for the moment, they are not parallelized, very slow... Check issue
# https://github.com/cp2k/dbcsr/issues/427 add_test(NAME
# libsmm_acc_unittest_multiply COMMAND libsmm_acc_unittest_multiply)
# add_test(NAME libsmm_acc_unittest_transpose COMMAND
# libsmm_acc_unittest_transpose) add_test(NAME
# libsmm_acc_timer_multiply-autotuned COMMAND libsmm_acc_timer_multiply
# autotuned) add_test(NAME libsmm_acc_timer_multiply-predicted COMMAND
# libsmm_acc_timer_multiply predicted)

endif ()

Expand Down