From 1b2228608549fde18ffe567947d607ea5c9f68c4 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Tue, 6 Aug 2024 04:38:29 -0700 Subject: [PATCH 1/4] fix openacc if already on device --- src/trans/gpu/internal/trgtol_mod.F90 | 19 +++++++++++++------ src/trans/gpu/internal/trltog_mod.F90 | 18 ++++++++++++------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 92ce4a89d..8aeec5c6c 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -120,7 +120,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE - USE OPENACC, ONLY: ACC_HANDLE_KIND + USE OPENACC, ONLY: ACC_IS_PRESENT USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -176,6 +176,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, TYPE(EXT_ACC_ARR_DESC) :: ACC_POINTERS(5) ! at most 5 copyins... INTEGER(KIND=JPIM) :: ACC_POINTERS_CNT = 0 + LOGICAL :: LUPDATE_PGP, LUPDATE_PGPUV, LUPDATE_PGP2, LUPDATE_PGP3A, LUPDATE_PGP3B + TYPE(MPI_COMM) :: LOCAL_COMM TYPE(MPI_REQUEST) :: IREQUEST(2*NPROC) @@ -343,54 +345,59 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IF (PRESENT(PGP)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP) + LUPDATE_PGP = .NOT. ACC_IS_PRESENT(PGP) ENDIF IF (PRESENT(PGPUV)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGPUV) + LUPDATE_PGPUV = .NOT. ACC_IS_PRESENT(PGPUV) ENDIF IF (PRESENT(PGP2)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP2) + LUPDATE_PGP2 = .NOT. ACC_IS_PRESENT(PGP2) ENDIF IF (PRESENT(PGP3A)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3A) + LUPDATE_PGP3A = .NOT. ACC_IS_PRESENT(PGP3A) ENDIF IF (PRESENT(PGP3B)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) + LUPDATE_PGP3B = .NOT. ACC_IS_PRESENT(PGP3B) ENDIF IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) !$ACC WAIT(1) - IF (PRESENT(PGP)) THEN + IF (PRESENT(PGP) .AND. LUPDATE_PGP) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP) #endif ENDIF - IF (PRESENT(PGPUV)) THEN + IF (PRESENT(PGPUV) .AND. LUPDATE_PGPUV) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGPUV) #endif ENDIF - IF (PRESENT(PGP2)) THEN + IF (PRESENT(PGP2) .AND. LUPDATE_PGP2) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP2) #endif ENDIF - IF (PRESENT(PGP3A)) THEN + IF (PRESENT(PGP3A) .AND. LUPDATE_PGP3A) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP3A) #endif ENDIF - IF (PRESENT(PGP3B)) THEN + IF (PRESENT(PGP3B) .AND. LUPDATE_PGP3B) THEN #ifdef OMPGPU #endif #ifdef ACCGPU diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 242f701bf..7f863eacf 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -121,7 +121,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE ISO_C_BINDING, ONLY: C_SIZE_T USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE - USE OPENACC, ONLY: ACC_HANDLE_KIND + USE OPENACC, ONLY: ACC_IS_PRESENT IMPLICIT NONE @@ -190,6 +190,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, TYPE(MPI_COMM) :: LOCAL_COMM TYPE(MPI_REQUEST) :: IREQUEST(NPROC*2) + LOGICAL :: LUPDATE_PGP, LUPDATE_PGPUV, LUPDATE_PGP2, LUPDATE_PGP3A, LUPDATE_PGP3B #ifdef PARKINDTRANS_SINGLE #define TRLTOG_DTYPE MPI_FLOAT @@ -490,22 +491,27 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IF (PRESENT(PGP)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP) + LUPDATE_PGP = .NOT. ACC_IS_PRESENT(PGP) ENDIF IF (PRESENT(PGPUV)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGPUV) + LUPDATE_PGPUV = .NOT. ACC_IS_PRESENT(PGPUV) ENDIF IF (PRESENT(PGP2)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP2) + LUPDATE_PGP2 = .NOT. ACC_IS_PRESENT(PGP2) ENDIF IF (PRESENT(PGP3A)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3A) + LUPDATE_PGP3A = .NOT. ACC_IS_PRESENT(PGP3A) ENDIF IF (PRESENT(PGP3B)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) + LUPDATE_PGP3B = .NOT. ACC_IS_PRESENT(PGP3B) ENDIF IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) #ifdef OMPGPU @@ -857,35 +863,35 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC END DATA ! PGPUV !$ACC END DATA ! PGP #endif - IF (PRESENT(PGP)) THEN + IF (PRESENT(PGP) .AND. LUPDATE_PGP) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP) #endif ENDIF - IF (PRESENT(PGPUV)) THEN + IF (PRESENT(PGPUV) .AND. LUPDATE_PGPUV) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGPUV) #endif ENDIF - IF (PRESENT(PGP2)) THEN + IF (PRESENT(PGP2) .AND. LUPDATE_PGP2) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP2) #endif ENDIF - IF (PRESENT(PGP3A)) THEN + IF (PRESENT(PGP3A) .AND. LUPDATE_PGP3A) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP3A) #endif ENDIF - IF (PRESENT(PGP3B)) THEN + IF (PRESENT(PGP3B) .AND. LUPDATE_PGP3B) THEN #ifdef OMPGPU #endif #ifdef ACCGPU From 1449faf7d6d5007e60400b9d847463caf6034d3c Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Tue, 6 Aug 2024 05:01:04 -0700 Subject: [PATCH 2/4] fix omp --- src/trans/gpu/internal/trgtol_mod.F90 | 25 ++++++++++++++++++++----- src/trans/gpu/internal/trltog_mod.F90 | 25 ++++++++++++++++++++----- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 8aeec5c6c..18d4a15a2 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -342,62 +342,77 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDIF CALL GSTATS(412,0) ACC_POINTERS_CNT = 0 + LUPDATE_PGP = .FALSE. IF (PRESENT(PGP)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP) +#ifdef ACCGPU LUPDATE_PGP = .NOT. ACC_IS_PRESENT(PGP) +#endif ENDIF + LUPDATE_PGPUV = .FALSE. IF (PRESENT(PGPUV)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGPUV) +#ifdef ACCGPU LUPDATE_PGPUV = .NOT. ACC_IS_PRESENT(PGPUV) +#endif ENDIF + LUPDATE_PGP2 = .FALSE. IF (PRESENT(PGP2)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP2) +#ifdef ACCGPU LUPDATE_PGP2 = .NOT. ACC_IS_PRESENT(PGP2) +#endif ENDIF + LUPDATE_PGP3A = .FALSE. IF (PRESENT(PGP3A)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3A) +#ifdef ACCGPU LUPDATE_PGP3A = .NOT. ACC_IS_PRESENT(PGP3A) +#endif ENDIF + LUPDATE_PGP3B = .FALSE. IF (PRESENT(PGP3B)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) +#ifdef ACCGPU LUPDATE_PGP3B = .NOT. ACC_IS_PRESENT(PGP3B) +#endif ENDIF IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) !$ACC WAIT(1) - IF (PRESENT(PGP) .AND. LUPDATE_PGP) THEN + IF (LUPDATE_PGP) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP) #endif ENDIF - IF (PRESENT(PGPUV) .AND. LUPDATE_PGPUV) THEN + IF (LUPDATE_PGPUV) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGPUV) #endif ENDIF - IF (PRESENT(PGP2) .AND. LUPDATE_PGP2) THEN + IF (LUPDATE_PGP2) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP2) #endif ENDIF - IF (PRESENT(PGP3A) .AND. LUPDATE_PGP3A) THEN + IF (LUPDATE_PGP3A) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE DEVICE(PGP3A) #endif ENDIF - IF (PRESENT(PGP3B) .AND. LUPDATE_PGP3B) THEN + IF (LUPDATE_PGP3B) THEN #ifdef OMPGPU #endif #ifdef ACCGPU diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 7f863eacf..555e99d43 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -488,30 +488,45 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif ACC_POINTERS_CNT = 0 + LUPDATE_PGP = .FALSE. IF (PRESENT(PGP)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP) +#ifdef ACCGPU LUPDATE_PGP = .NOT. ACC_IS_PRESENT(PGP) +#endif ENDIF + LUPDATE_PGPUV = .FALSE. IF (PRESENT(PGPUV)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGPUV) +#ifdef ACCGPU LUPDATE_PGPUV = .NOT. ACC_IS_PRESENT(PGPUV) +#endif ENDIF + LUPDATE_PGP2 = .FALSE. IF (PRESENT(PGP2)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP2) +#ifdef ACCGPU LUPDATE_PGP2 = .NOT. ACC_IS_PRESENT(PGP2) +#endif ENDIF + LUPDATE_PGP3A = .FALSE. IF (PRESENT(PGP3A)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3A) +#ifdef ACCGPU LUPDATE_PGP3A = .NOT. ACC_IS_PRESENT(PGP3A) +#endif ENDIF + LUPDATE_PGP3B = .FALSE. IF (PRESENT(PGP3B)) THEN ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) +#ifdef ACCGPU LUPDATE_PGP3B = .NOT. ACC_IS_PRESENT(PGP3B) +#endif ENDIF IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) #ifdef OMPGPU @@ -863,35 +878,35 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC END DATA ! PGPUV !$ACC END DATA ! PGP #endif - IF (PRESENT(PGP) .AND. LUPDATE_PGP) THEN + IF (LUPDATE_PGP) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP) #endif ENDIF - IF (PRESENT(PGPUV) .AND. LUPDATE_PGPUV) THEN + IF (LUPDATE_PGPUV) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGPUV) #endif ENDIF - IF (PRESENT(PGP2) .AND. LUPDATE_PGP2) THEN + IF (LUPDATE_PGP2) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP2) #endif ENDIF - IF (PRESENT(PGP3A) .AND. LUPDATE_PGP3A) THEN + IF (LUPDATE_PGP3A) THEN #ifdef OMPGPU #endif #ifdef ACCGPU !$ACC UPDATE HOST(PGP3A) #endif ENDIF - IF (PRESENT(PGP3B) .AND. LUPDATE_PGP3B) THEN + IF (LUPDATE_PGP3B) THEN #ifdef OMPGPU #endif #ifdef ACCGPU From 881a43152825b26d972b5545d0bee38f5463e12e Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Thu, 15 Aug 2024 07:42:06 +0200 Subject: [PATCH 3/4] Update src/trans/gpu/internal/trgtol_mod.F90 --- src/trans/gpu/internal/trgtol_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 18d4a15a2..a3124c05e 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -120,7 +120,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE - USE OPENACC, ONLY: ACC_IS_PRESENT + USE OPENACC, ONLY: ACC_IS_PRESENT, ACC_HANDLE_KIND USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE From 1b4beea4368cc16cb3ad36933632e433047d88b0 Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Mon, 19 Aug 2024 10:22:06 +0200 Subject: [PATCH 4/4] Update src/trans/gpu/internal/trltog_mod.F90 Co-authored-by: Sam Hatfield --- src/trans/gpu/internal/trltog_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 555e99d43..24d32767e 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -121,7 +121,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE ISO_C_BINDING, ONLY: C_SIZE_T USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE - USE OPENACC, ONLY: ACC_IS_PRESENT + USE OPENACC, ONLY: ACC_HANDLE_KIND, ACC_IS_PRESENT IMPLICIT NONE