diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index a4dc1bc28..d8c5da838 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -21,25 +21,6 @@ program ectrans_benchmark ! This test performs spectral to real and real to spectral transforms repeated in ! timed loop. ! -! 1) One "surface" field is always transformed: -! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk) -! -! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0" -! -! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk) -! -! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and -! can be enabled with "--vordiv" -! -! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk) -! -! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2) -! This must be enabled with "--scders" -! -! 5) Optionally uv East-West derivate can be computed from vorticity/divergence. -! This must be enabled with "--vordiv --uvders" -! -! ! Authors : George Mozdzynski ! Willem Deconinck ! Ioan Hadade @@ -58,7 +39,7 @@ program ectrans_benchmark integer(kind=jpim), parameter :: min_octa_points = 20 integer(kind=jpim) :: istack, getstackusage -real(kind=jprd), dimension(1) :: zmaxerr(5), zerr(5) +real(kind=jprd) :: zmaxerr(5) real(kind=jprd) :: zmaxerrg ! Output unit numbers @@ -67,17 +48,17 @@ program ectrans_benchmark integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output ! Default parameters -integer(kind=jpim) :: nsmax = 79 ! Spectral truncation integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test -integer(kind=jpim) :: nfld = 1 ! Number of scalar fields +integer(kind=jpim) :: nfld = 1 ! Number of 3D scalar fields integer(kind=jpim) :: nlev = 1 ! Number of vertical levels integer(kind=jpim) :: iters_warmup = 3 ! Number of warm up steps (for which timing statistics should be ignored) -integer(kind=jpim) :: nflevg -integer(kind=jpim) :: ndgl ! Number of latitudes -integer(kind=jpim) :: nspec2 -integer(kind=jpim) :: ngptot -integer(kind=jpim) :: ngptotg +integer(kind=jpim) :: nflevg ! Total number of vertical levels + +integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary) +integer(kind=jpim) :: ngptot ! Total number of grid points on this task +integer(kind=jpim) :: ngptotg ! Total number of grid points across all tasks + integer(kind=jpim) :: ifld integer(kind=jpim) :: jroc integer(kind=jpim) :: jb @@ -87,7 +68,7 @@ program ectrans_benchmark integer(kind=jpim) :: ib integer(kind=jpim) :: jprtrv -integer(kind=jpim), allocatable :: nloen(:), nprcids(:) +integer(kind=jpim), allocatable :: nprcids(:) integer(kind=jpim) :: myproc, jj integer :: jstep @@ -96,33 +77,45 @@ program ectrans_benchmark real(kind=jprd) :: ztstepmax2, ztstepmin2, ztstepavg2, ztstepmed2 real(kind=jprd), allocatable :: ztstep(:), ztstep1(:), ztstep2(:) -real(kind=jprb), allocatable :: znormsp(:), znormsp1(:), znormdiv(:), znormdiv1(:) -real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormt(:), znormt1(:) -real(kind=jprd) :: zaveave(0:jpmaxstat) +real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormdiv(:), znormdiv1(:) +real(kind=jprb), allocatable :: znormscalar(:), znormscalar1(:) +real(kind=jprb), allocatable :: znormsc3a(:), znormsc3a1(:), znormsc2(:), znormsc21(:) -! Grid-point space data structures -real(kind=jprb), allocatable, target PINNED_TAG :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt -real(kind=jprb), allocatable, target PINNED_TAG :: zgmvs (:,:,:) ! Single level fields at t and t-dt -real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt -real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt -real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt +real(kind=jprd) :: zaveave(0:jpmaxstat) ! Spectral space data structures -real(kind=jprb), allocatable, target PINNED_TAG :: sp3d(:,:,:) -real(kind=jprb), pointer :: zspvor(:,:) => null() -real(kind=jprb), pointer :: zspdiv(:,:) => null() -real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() +real(kind=jprb), allocatable PINNED_TAG :: zspvor(:,:) +real(kind=jprb), allocatable PINNED_TAG :: zspdiv(:,:) +real(kind=jprb), allocatable PINNED_TAG :: zspscalar(:,:) +real(kind=jprb), allocatable PINNED_TAG :: zspsc3a(:,:,:) real(kind=jprb), allocatable PINNED_TAG :: zspsc2(:,:) +! Grid-point space data structures +real(kind=jprb), allocatable PINNED_TAG :: zgp(:,:,:) +real(kind=jprb), allocatable PINNED_TAG :: zgpuv(:,:,:,:) +real(kind=jprb), allocatable PINNED_TAG :: zgp3a(:,:,:,:) +real(kind=jprb), allocatable PINNED_TAG :: zgp2(:,:,:) + logical :: lstack = .false. ! Output stack info -logical :: luserpnm = .false. -logical :: lkeeprpnm = .false. + +! setup_trans options +integer(kind=jpim) :: nsmax = 79 ! Spectral truncation +integer(kind=jpim) :: ndgl ! Number of latitudes +integer(kind=jpim), allocatable :: nloen(:) ! Number of points on each latitude +logical :: luserpnm = .false. ! Use Belusov algorithm to compute RPNM array instead of per m logical :: luseflt = .false. ! Use fast legendre transforms + +! Extra inv_trans options +logical :: lvordiv = .false. ! Compute vorticity and divergence in grid point space +logical :: lscders = .false. ! Compute derivatives of scalar (North-South and East-West) in grid + ! point space +logical :: luvder = .false. ! Compute East-West derivatives of U and V wind in grid point space + +! GSTATS options +logical :: lstats = .true. ! gstats statistics logical :: ltrace_stats = .false. logical :: lstats_omp = .false. logical :: lstats_comms = .false. -logical :: lstats_mpl = .false. -logical :: lstats = .true. ! gstats statistics logical :: lbarrier_stats = .false. logical :: lbarrier_stats2 = .false. logical :: ldetailed_stats = .false. @@ -131,17 +124,14 @@ program ectrans_benchmark logical :: lstatscpu = .false. logical :: lstats_mem = .false. logical :: lxml_stats = .false. -logical :: lvordiv = .false. -logical :: lscders = .false. -logical :: luvders = .false. -logical :: lprint_norms = .false. ! Calculate and print spectral norms -logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end - integer(kind=jpim) :: nstats_mem = 0 integer(kind=jpim) :: ntrace_stats = 0 integer(kind=jpim) :: nprnt_stats = 1 integer(kind=jpim) :: nopt_mem_tr = 0 +logical :: lprint_norms = .false. ! Calculate and print spectral norms +logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end + ! The multiplier of the machine epsilon used as a tolerance for correctness checking ! ncheck = 0 (the default) means that correctness checking is disabled integer(kind=jpim) :: ncheck = 0 @@ -151,11 +141,6 @@ program ectrans_benchmark ! Verbosity level (0 or 1) integer :: verbosity = 0 -real(kind=jprd) :: zra = 6371229._jprd - -integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions -integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib - integer(kind=jpim) :: nproc ! Number of procs integer(kind=jpim) :: nthread integer(kind=jpim) :: nprgpns ! Grid-point decomp @@ -167,8 +152,8 @@ program ectrans_benchmark integer(kind=jpim) :: mp_type = 2 ! Message passing type integer(kind=jpim) :: mbx_size = 150000000 ! Mailbox size -integer(kind=jpim), allocatable :: numll(:), ivset(:) -integer(kind=jpim) :: ivsetsc(1) +integer(kind=jpim), allocatable :: numll(:), ivset(:), ivsetsc(:) +integer(kind=jpim) :: ivsetsc2(1) integer(kind=jpim) :: nflevl @@ -177,7 +162,6 @@ program ectrans_benchmark logical :: lsync_trans = .true. ! Activate barrier sync logical :: leq_regions = .true. ! Eq regions flag - integer(kind=jpim) :: nproma = 0 integer(kind=jpim) :: ngpblks ! locals @@ -185,22 +169,6 @@ program ectrans_benchmark integer(kind=jpim) :: iprtrw integer(kind=jpim) :: iprused, ilevpp, irest, ilev, jlev -integer(kind=jpim) :: ndimgmv = 0 ! Third dim. of gmv "(nproma,nflevg,ndimgmv,ngpblks)" -integer(kind=jpim) :: ndimgmvs = 0 ! Second dim. gmvs "(nproma,ndimgmvs,ngpblks)" - -integer(kind=jpim) :: jbegin_uv = 0 -integer(kind=jpim) :: jend_uv = 0 -integer(kind=jpim) :: jbegin_sc = 0 -integer(kind=jpim) :: jend_sc = 0 -integer(kind=jpim) :: jbegin_scder_NS = 0 -integer(kind=jpim) :: jend_scder_NS = 0 -integer(kind=jpim) :: jbegin_scder_EW = 0 -integer(kind=jpim) :: jend_scder_EW = 0 -integer(kind=jpim) :: jbegin_uder_EW = 0 -integer(kind=jpim) :: jend_uder_EW = 0 -integer(kind=jpim) :: jbegin_vder_EW = 0 -integer(kind=jpim) :: jend_vder_EW = 0 - logical :: ldump_values = .false. integer, external :: ec_mpirank @@ -209,6 +177,9 @@ program ectrans_benchmark character(len=16) :: cgrid = '' integer(kind=jpim) :: ierr +integer :: icall_mode = 1 +integer :: inum_wind_fields, inum_sc_3d_fields, inum_sc_2d_fields, itotal_fields +integer :: ipgp_start, ipgp_end, ipgpuv_start, ipgpuv_end real(kind=jprb), allocatable :: global_field(:,:) @@ -230,8 +201,9 @@ program ectrans_benchmark luse_mpi = detect_mpirun() ! Setup -call get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvders, & - & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck) +call get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvder, & + & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck, & + & icall_mode) if (cgrid == '') cgrid = cubic_octahedral_gaussian_grid(nsmax) call parse_grid(cgrid, ndgl, nloen) nflevg = nlev @@ -267,11 +239,8 @@ program ectrans_benchmark if (ldetailed_stats) then lstats_omp = .true. lstats_comms = .true. - lstats_mpl = .true. lstatscpu = .true. nprnt_stats = nproc -! lstats_mem = .true. -! lstats_alloc = .true. endif !=================================================================================================== @@ -363,7 +332,7 @@ program ectrans_benchmark nflevl = numll(mysetv) -ivsetsc(1) = iprused +ivsetsc2(1) = iprused ifld = 0 !=================================================================================================== @@ -388,10 +357,9 @@ program ectrans_benchmark if (verbosity >= 1) write(nout,'(a)')'======= Setup ecTrans =======' call gstats(1, 0) -call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & - & kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, & - & kprtrw=nprtrw, ldsync_trans=lsync_trans, & - & ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi,& +call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & + & kprgpns=nprgpns, kprgpew=nprgpew, kprtrw=nprtrw, ldsync_trans=lsync_trans, & + & ldeq_regions=leq_regions, ldalloperm=.true., ldmpoff=.not.luse_mpi, & & kopt_memory_tr=nopt_mem_tr) call gstats(1, 1) @@ -400,8 +368,7 @@ program ectrans_benchmark call set_ectrans_gpu_nflev(nflevl) ! We pass nflevl via environment variable in order not to change API ! In long run, ectrans should grow its internal buffers automatically -call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., & - & lduserpnm=luserpnm, ldkeeprpnm=lkeeprpnm, & +call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., lduserpnm=luserpnm, & & lduseflt=luseflt) call gstats(2, 1) @@ -444,38 +411,37 @@ program ectrans_benchmark write(nout,'("nopt_mem_tr",i0)') nopt_mem_tr write(nout,'("lvordiv ",l1)') lvordiv write(nout,'("lscders ",l1)') lscders - write(nout,'("luvders ",l1)') luvders + write(nout,'("luvder ",l1)') luvder write(nout,'(" ")') write(nout,'(a)') '======= End of runtime parameters =======' write(nout,'(" ")') end if !=================================================================================================== -! Allocate and Initialize spectral arrays +! Allocate and initialize spectral arrays !=================================================================================================== -! Allocate spectral arrays -! Try to mimick IFS layout as much as possible -nullify(zspvor) -nullify(zspdiv) -nullify(zspsc3a) -allocate(sp3d(nflevl,nspec2,2+nfld)) -allocate(zspsc2(1,nspec2)) - -call initialize_spectral_arrays(nsmax, zspsc2, sp3d) +! Initialize vorticity and divergence - same for both call modes +allocate(zspvor(nflevl,nspec2)) +allocate(zspdiv(nflevl,nspec2)) +call initialize_spectral_field(nsmax, zspvor) +call initialize_spectral_field(nsmax, zspdiv) -! Point convenience variables to storage variable sp3d -zspvor => sp3d(:,:,1) -zspdiv => sp3d(:,:,2) -zspsc3a => sp3d(:,:,3:3+(nfld-1)) - -!=================================================================================================== -! Allocate gridpoint arrays -!=================================================================================================== +! Initialize spectral arrays differently depending on call mode +if (icall_mode == 1) then + allocate(zspscalar(nfld*nflevl+1,nspec2)) + call initialize_spectral_field(nsmax, zspscalar) +else + allocate(zspsc3a(nflevl,nspec2,nfld)) + allocate(zspsc2(1,nspec2)) + do i = 1, nfld + call initialize_spectral_field(nsmax, zspsc3a(:,:,i)) + enddo + call initialize_spectral_field(nsmax, zspsc2) +endif +! Compute spectral distribution variables allocate(ivset(nflevg)) - -! Compute spectral distribution ilev = 0 do jb = 1, nprtrv do jlev=1, numll(jb) @@ -484,69 +450,92 @@ program ectrans_benchmark enddo enddo -! Allocate grid-point arrays +allocate(ivsetsc(nfld*nflevg+1)) +do i = 1, nfld + ilev = 0 + do jb = 1, nprtrv + do jlev = 1, numll(jb) + ilev = ilev + 1 + ivsetsc(ilev + (i - 1)*nflevg) = jb + enddo + enddo +enddo +ivsetsc(nfld*nflevg+1) = 1 + +!=================================================================================================== +! Allocate gridpoint arrays +!=================================================================================================== + +! Determine start and end slice points for grid point arrays when they are passed back to dir_trans +ipgp_start = 1 +ipgp_end = (2 + nfld) * nflevg + 1 +ipgpuv_start = 1 +ipgpuv_end = 2 + +! Also enable vorticity divergence? if (lvordiv) then - jbegin_uv = 1 - jend_uv = 2 -endif -if (luvders) then - jbegin_uder_EW = jend_uv + 1 - jend_uder_EW = jbegin_uder_EW + 1 - jbegin_vder_EW = jend_uder_EW + 1 - jend_vder_EW = jbegin_vder_EW + 1 + inum_wind_fields = 4 ! Four fields - U, V, vorticity, divergence + ! If lvordiv, skip the vorticity and divergence elements when passing zgp + ! These two come first when enabled + ipgp_start = ipgp_start + 2 * nflevg + ipgp_end = ipgp_end + 2 * nflevg + ipgpuv_start = ipgpuv_start + 2 + ipgpuv_end = ipgpuv_end + 2 else - jbegin_uder_EW = jend_uv - jend_uder_EW = jend_uv - jbegin_vder_EW = jend_uv - jend_vder_EW = jend_uv + ! Otherwise just U and V + inum_wind_fields = 2 endif -jbegin_sc = jend_vder_EW + 1 -jend_sc = jend_vder_EW + nfld +! Also make room for East-West derivatives of winds? +if (luvder) inum_wind_fields = inum_wind_fields + 2 -if (lscders) then - ndimgmvs = 3 - jbegin_scder_NS = jend_sc + 1 - jend_scder_NS = jend_sc + nfld - jbegin_scder_EW = jend_scder_NS + 1 - jend_scder_EW = jend_scder_NS + nfld -else - ndimgmvs = 1 - jbegin_scder_NS = jend_sc - jend_scder_NS = jend_sc - jbegin_scder_EW = jend_sc - jend_scder_EW = jend_sc -endif +! We always have our nfld 3D scalar fields +inum_sc_3d_fields = nfld -ndimgmv = jend_scder_EW +! We always have one 2D scalar field +inum_sc_2d_fields = 1 -allocate(zgmv(nproma,nflevg,ndimgmv,ngpblks)) -allocate(zgmvs(nproma,ndimgmvs,ngpblks)) +! Also make room for North-South and East-West derivatives of scalar fields +if (lscders) then + inum_sc_3d_fields = inum_sc_3d_fields * 3 + inum_sc_2d_fields = inum_sc_2d_fields * 3 +endif -zgpuv => zgmv(:,:,1:jend_vder_EW,:) -zgp3a => zgmv(:,:,jbegin_sc:jend_scder_EW,:) -zgp2 => zgmvs(:,:,:) +! Finally, allocate grid point arrays +if (icall_mode == 1) then + itotal_fields = nflevg * (inum_wind_fields + inum_sc_3d_fields) + inum_sc_2d_fields + allocate(zgp(nproma,itotal_fields,ngpblks)) +else + allocate(zgpuv(nproma,nflevg,inum_wind_fields,ngpblks)) + allocate(zgp3a(nproma,nflevg,inum_sc_3d_fields,ngpblks)) + allocate(zgp2(nproma,inum_sc_2d_fields,ngpblks)) +endif !=================================================================================================== ! Allocate norm arrays !=================================================================================================== if (lprint_norms .or. ncheck > 0) then - allocate(znormsp(1)) - allocate(znormsp1(1)) allocate(znormvor(nflevg)) allocate(znormvor1(nflevg)) allocate(znormdiv(nflevg)) allocate(znormdiv1(nflevg)) - allocate(znormt(nflevg)) - allocate(znormt1(nflevg)) - call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor1, kvset=ivset(1:nflevg)) - call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv1, kvset=ivset(1:nflevg)) - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt1, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor1, kvset=ivset) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv1, kvset=ivset) + + if (icall_mode == 1) then + allocate(znormscalar(nfld*nflevg+1)) + allocate(znormscalar1(nfld*nflevg+1)) + call specnorm(pspec=zspscalar(:,:), pnorm=znormscalar1, kvset=ivsetsc) + else + allocate(znormsc3a(nflevg)) + allocate(znormsc3a1(nflevg)) + allocate(znormsc2(1)) + allocate(znormsc21(1)) + if (nfld > 0) call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormsc3a1, kvset=ivset) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsc21, kvset=ivsetsc2) endif - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp1, kvset=ivsetsc) if (verbosity >= 1 .and. myproc == 1) then do ifld = 1, nflevg @@ -557,16 +546,21 @@ program ectrans_benchmark write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv1(ifld) write(nout,'("0x",Z16.16)') znormdiv1(ifld) enddo - if (nfld > 0) then - do ifld = 1, nflevg - write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt1(ifld) - write(nout,'("0x",Z16.16)') znormt1(ifld) + if (icall_mode == 1) then + do ifld = 1, nfld*nflevg+1 + write(nout,'("norm zspscalar(",i4,",:,1) = ",f20.15)') ifld, znormscalar1(ifld) + write(nout,'("0x",Z16.16)') znormscalar1(ifld) enddo + else + if (nfld > 0) then + do ifld = 1, nflevg + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormsc3a1(ifld) + write(nout,'("0x",Z16.16)') znormsc3a1(ifld) + enddo + endif + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') 1, znormsc21(1) + write(nout,'("0x",Z16.16)') znormsc21(1) endif - do ifld = 1, 1 - write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp1(ifld) - write(nout,'("0x",Z16.16)') znormsp1(ifld) - enddo endif endif @@ -621,31 +615,16 @@ program ectrans_benchmark ztstep1(jstep) = timef() call gstats(4,0) - if (lvordiv) then - call inv_trans(kresol=1, kproma=nproma, & - & pspsc2=zspsc2, & ! spectral surface pressure - & pspvor=zspvor, & ! spectral vorticity - & pspdiv=zspdiv, & ! spectral divergence - & pspsc3a=zspsc3a, & ! spectral scalars - & ldscders=lscders, & - & ldvorgp=.false., & ! no gridpoint vorticity - & lddivgp=.false., & ! no gridpoint divergence - & lduvder=luvders, & - & kvsetuv=ivset, & - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset, & - & pgp2=zgp2, & - & pgpuv=zgpuv, & - & pgp3a=zgp3a) + if (icall_mode == 1) then + call inv_trans(pspvor=zspvor, pspdiv=zspdiv, pspscalar=zspscalar, pgp=zgp, & + & kvsetuv=ivset, kvsetsc=ivsetsc, & + & ldscders=lscders, ldvorgp=lvordiv, lddivgp=lvordiv, lduvder=luvder, & + & kproma=nproma) else - call inv_trans(kresol=1, kproma=nproma, & - & pspsc2=zspsc2, & ! spectral surface pressure - & pspsc3a=zspsc3a, & ! spectral scalars - & ldscders=lscders, & ! scalar derivatives - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset, & - & pgp2=zgp2, & - & pgp3a=zgp3a) + call inv_trans(pspvor=zspvor, pspdiv=zspdiv, pspsc3a=zspsc3a, pspsc2=zspsc2, pgpuv=zgpuv, & + & pgp3a=zgp3a, pgp2=zgp2, & + & kvsetuv=ivset, kvsetsc2=ivsetsc2, kvsetsc3a=ivset, & + & ldscders=lscders, ldvorgp=lvordiv, lddivgp=lvordiv, lduvder=luvder, kproma=nproma) endif call gstats(4,1) @@ -656,13 +635,14 @@ program ectrans_benchmark !================================================================================================= if (ldump_values .and. mod(jstep,10) == 1) then + ! dump a field to a binary file if (myproc == 1) then allocate(global_field(ngptotg,1)) endif - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp2(:,1:1,:), 's', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,1,:), 'u', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,2,:), 'v', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp3a(:,nflevg:nflevg,1,:), 't', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,1,:), 'U', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,2,:), 'V', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp2(:,1:1,:), 'S', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp3a(:,nflevg:nflevg,1,:), 'T', noutdump) if (myproc == 1) then deallocate(global_field) endif @@ -675,26 +655,14 @@ program ectrans_benchmark ztstep2(jstep) = timef() call gstats(5,0) - if (lvordiv) then - call dir_trans(kresol=1, kproma=nproma, & - & pgp2=zgmvs(:,1:1,:), & - & pgpuv=zgpuv(:,:,1:2,:), & - & pgp3a=zgp3a(:,:,1:nfld,:), & - & pspvor=zspvor, & - & pspdiv=zspdiv, & - & pspsc2=zspsc2, & - & pspsc3a=zspsc3a, & - & kvsetuv=ivset, & - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset) + if (icall_mode == 1) then + call dir_trans(pgp=zgp(:,ipgp_start:ipgp_end,:), pspvor=zspvor, pspdiv=zspdiv, & + & pspscalar=zspscalar, kvsetuv=ivset, kvsetsc=ivsetsc, kproma=nproma) else - call dir_trans(kresol=1, kproma=nproma, & - & pgp2=zgmvs(:,1:1,:), & - & pgp3a=zgp3a(:,:,1:nfld,:), & - & pspsc2=zspsc2, & - & pspsc3a=zspsc3a, & - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset) + call dir_trans(pgpuv=zgpuv(:,:,ipgpuv_start:ipgpuv_end,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), pgp2=zgp2(:,1:1,:), & + & pspvor=zspvor, pspdiv=zspdiv, pspsc3a=zspsc3a, pspsc2=zspsc2, & + & kvsetuv=ivset, kvsetsc2=ivsetsc2, kvsetsc3a=ivset, kproma=nproma) endif call gstats(5,1) ztstep2(jstep) = (timef() - ztstep2(jstep))/1000.0_jprd @@ -707,43 +675,36 @@ program ectrans_benchmark if (lprint_norms) then call gstats(6,0) - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc(1:1)) - call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset(1:nflevg)) - call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset(1:nflevg)) - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) + + if (icall_mode == 1) then + call specnorm(pspec=zspscalar(:,:), pnorm=znormscalar, kvset=ivsetsc) + else + if (nfld > 0) call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormsc3a, kvset=ivset) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsc2, kvset=ivsetsc2) endif - ! Surface pressure if (myproc == 1) then - zmaxerr(:) = -999.0 - do ifld = 1, 1 - zerr(1) = abs(znormsp1(ifld)/znormsp(ifld) - 1.0_jprb) - zmaxerr(1) = max(zmaxerr(1), zerr(1)) - enddo - ! Divergence - do ifld = 1, nflevg - zerr(2) = abs(znormdiv1(ifld)/znormdiv(ifld) - 1.0_jprb) - zmaxerr(2) = max(zmaxerr(2), zerr(2)) - enddo - ! Vorticity - do ifld = 1, nflevg - zerr(3) = abs(znormvor1(ifld)/znormvor(ifld) - 1.0_jprb) - zmaxerr(3) = max(zmaxerr(3),zerr(3)) - enddo - ! Temperature - if (nfld > 0) then - do ifld = 1, nflevg - zerr(4) = abs(znormt1(ifld)/znormt(ifld) - 1.0_jprb) - zmaxerr(4) = max(zmaxerr(4), zerr(4)) - enddo + zmaxerr(1) = maxval(abs((znormvor1 / znormvor) - 1.0_jprb)) + zmaxerr(2) = maxval(abs((znormdiv1 / znormdiv) - 1.0_jprb)) + if (icall_mode == 1) then + zmaxerr(3) = maxval(abs((znormscalar1 / znormscalar) - 1.0_jprb)) write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& - & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & - & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(4), zmaxerr(1) + & " | zspdiv max err="e10.3," | zspscalar max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(1), zmaxerr(2), zmaxerr(3) else - write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& - & " | zspdiv max err="e10.3," | zspsc2 max err="e10.3)') & - & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(1) + zmaxerr(4) = maxval(abs((znormsc21 / znormsc2) - 1.0_jprb)) + if (nfld > 0) then + zmaxerr(3) = maxval(abs((znormsc3a1 / znormsc3a) - 1.0_jprb)) + write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& + & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(1), zmaxerr(2), zmaxerr(3), zmaxerr(4) + else + write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& + & " | zspdiv max err="e10.3," | zspsc2 max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(1), zmaxerr(2), zmaxerr(4) + endif endif endif call gstats(6,1) @@ -762,62 +723,69 @@ program ectrans_benchmark write(nout,'(" ")') if (lprint_norms .or. ncheck > 0) then - call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) - call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) + + if (icall_mode == 1) then + call specnorm(pspec=zspscalar(:,:), pnorm=znormscalar, kvset=ivsetsc) + else + if (nfld > 0) call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormsc3a, kvset=ivset) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsc2, kvset=ivsetsc2) endif - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc) if (myproc == 1) then - zmaxerr(:) = -999.0 - do ifld = 1, nflevg - zerr(3) = abs(real(znormvor1(ifld),kind=jprd)/real(znormvor(ifld),kind=jprd) - 1.0_jprd) - zmaxerr(3) = max(zmaxerr(3), zerr(3)) - if (verbosity >= 1) then - write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor(ifld), zerr(3) + zmaxerr = -99.0_jprd + zmaxerr(1) = maxval(abs((real(znormvor1,jprd) / (real(znormvor,jprd)) - 1.0_jprd))) + if (verbosity >= 1) then + do ifld = 1, nflevg + write(nout,'("norm zspvor( ",i4,") = ",f20.15)') ifld, znormvor(ifld) write(nout,'("0x",Z16.16)') znormvor(ifld) + enddo + endif + zmaxerr(2) = maxval(abs((real(znormdiv1,jprd) / (real(znormdiv,jprd)) - 1.0_jprd))) + if (verbosity >= 1) then + do ifld = 1, nflevg + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv(ifld) + write(nout,'("0x",Z16.16)') znormdiv(ifld) + enddo + endif + if (icall_mode == 1) then + zmaxerr(3) = maxval(abs((znormscalar1 / znormscalar) - 1.0_jprb)) + if (verbosity >= 1) then + do ifld = 1, nfld*nflevg+1 + write(nout,'("norm znormscalar( ",i4,",:) = ",f20.15)') ifld, znormscalar(ifld) + write(nout,'("0x",Z16.16)') znormscalar(ifld) + enddo endif - enddo - do ifld = 1, nflevg - zerr(2) = abs(real(znormdiv1(ifld),kind=jprd)/real(znormdiv(ifld),kind=jprd) - 1.0d0) - zmaxerr(2) = max(zmaxerr(2),zerr(2)) + else + zmaxerr(4) = maxval(abs((znormsc21 / znormsc2) - 1.0_jprb)) if (verbosity >= 1) then - write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv(ifld), zerr(2) - write(nout,'("0x",Z16.16)') znormdiv(ifld) + write(nout,'("norm znormsc2( ",i4,",:) = ",f20.15)') 1, znormsc2(1) + write(nout,'("0x",Z16.16)') znormsc2(1) endif - enddo - if (nfld > 0) then - do ifld = 1, nflevg - zerr(4) = abs(real(znormt1(ifld),kind=jprd)/real(znormt(ifld),kind=jprd) - 1.0d0) - zmaxerr(4) = max(zmaxerr(4), zerr(4)) + if (nfld > 0) then + zmaxerr(3) = maxval(abs((znormsc3a1 / znormsc3a) - 1.0_jprb)) if (verbosity >= 1) then - write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt(ifld), zerr(4) - write(nout,'("0x",Z16.16)') znormt(ifld) + do ifld = 1, nflevg + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormsc3a(ifld) + write(nout,'("0x",Z16.16)') znormsc3a(ifld) + enddo endif - enddo - endif - do ifld = 1, 1 - zerr(1) = abs(real(znormsp1(ifld),kind=jprd)/real(znormsp(ifld),kind=jprd) - 1.0d0) - zmaxerr(1) = max(zmaxerr(1), zerr(1)) - if (verbosity >= 1) then - write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp(ifld), zerr(1) - write(nout,'("0x",Z16.16)') znormsp(ifld) endif - enddo + endif ! maximum error across all fields - if (nfld > 0) then - zmaxerrg = max(zmaxerr(1), zmaxerr(2), zmaxerr(3), zmaxerr(4)) - else - zmaxerrg = max(zmaxerr(1), zmaxerr(2), zmaxerr(3)) - endif + zmaxerrg = maxval(zmaxerr) if (verbosity >= 1) write(nout,*) - write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(1) write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) - if (nfld > 0) write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) - write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) + if (icall_mode == 1) then + write(nout,'("max error zspscalar(1:nlev,:,1) = ",e10.3)') zmaxerr(3) + else + if (nfld > 0) write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(4) + endif write(nout,*) write(nout,'("max error combined = = ",e10.3)') zmaxerrg write(nout,*) @@ -946,8 +914,7 @@ program ectrans_benchmark ! Cleanup !=================================================================================================== -deallocate(zgmv) -deallocate(zgmvs) + !=================================================================================================== @@ -1127,6 +1094,7 @@ & subroutine on memory usage, thread-binding etc." write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& & tolerance for correctness checking" + write(nout, "(a)") " --callmode The call mode for INV_TRANS and DIR_TRANS (1 or 2)" write(nout, "(a)") "" write(nout, "(a)") "DEBUGGING" write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" @@ -1151,9 +1119,9 @@ subroutine parsing_failed(message) !=================================================================================================== -subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvders, & +subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvder, & & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, & - & lmeminfo, nprtrv, nprtrw, ncheck) + & lmeminfo, nprtrv, nprtrw, ncheck, icall_mode) #ifdef _OPENACC use openacc, only: acc_init, acc_get_device_type @@ -1167,7 +1135,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n integer, intent(inout) :: nlev ! Number of vertical levels logical, intent(inout) :: lvordiv ! Also transform vorticity/divergence logical, intent(inout) :: lscders ! Compute scalar derivatives - logical, intent(inout) :: luvders ! Compute uv East-West derivatives + logical, intent(inout) :: luvder ! Compute uv East-West derivatives logical, intent(inout) :: luseflt ! Use fast Legendre transforms integer, intent(inout) :: nopt_mem_tr ! Use of heap or stack memory for ZCOMBUF arrays in transposition arrays (0 for heap, 1 for stack) integer, intent(inout) :: nproma ! NPROMA @@ -1180,6 +1148,9 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n integer, intent(inout) :: nprtrw ! Size of W set (spectral decomposition) integer, intent(inout) :: ncheck ! The multiplier of the machine epsilon used as a ! tolerance for correctness checking + integer, intent(inout) :: icall_mode ! The call mode for inv_trans and dir_trans + ! 1: pspvor, pspdiv, pspscalar, pgp + ! 2: pspvor, pspdiv, pspsc3a, pspsc2, pgpuv, pgp3a, pgp2 character(len=128) :: carg ! Storage variable for command line arguments integer :: iarg = 1 ! Argument index @@ -1223,7 +1194,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n case('-l', '--nlev'); nlev = get_int_value('-l', iarg) case('--vordiv'); lvordiv = .True. case('--scders'); lscders = .True. - case('--uvders'); luvders = .True. + case('--uvders'); luvder = .True. case('--flt'); luseflt = .True. case('--mem-tr'); nopt_mem_tr = get_int_value('--mem-tr', iarg) case('--nproma'); nproma = get_int_value('--nproma', iarg) @@ -1233,6 +1204,11 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n case('--nprtrv'); nprtrv = get_int_value('--nprtrv', iarg) case('--nprtrw'); nprtrw = get_int_value('--nprtrw', iarg) case('-c', '--check'); ncheck = get_int_value('-c', iarg) + case('--callmode') + icall_mode = get_int_value('--callmode', iarg) + if (icall_mode /= 1 .and. icall_mode /= 2) then + call parsing_failed("Invalid argument for --callmode: must be 1 or 2") + end if case default call parsing_failed("Unrecognised argument: " // trim(carg)) @@ -1241,11 +1217,10 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n end do if (.not. lvordiv) then - luvders = .false. + luvder = .false. endif end subroutine get_command_line_arguments - !=================================================================================================== function cubic_octahedral_gaussian_grid(nsmax) result(cgrid) @@ -1295,31 +1270,18 @@ end function get_median !=================================================================================================== -subroutine initialize_spectral_arrays(nsmax, zsp, sp3d) - - integer, intent(in) :: nsmax ! Spectral truncation - real(kind=jprb), intent(inout) :: zsp(:,:) ! Surface pressure - real(kind=jprb), intent(inout) :: sp3d(:,:,:) ! 3D fields - - integer(kind=jpim) :: nflevl - integer(kind=jpim) :: nfield - - integer :: i, j +subroutine initialize_spectral_field(nsmax, field) - nflevl = size(sp3d, 1) - nfield = size(sp3d, 3) + integer, intent(in) :: nsmax ! Spectral truncation + real(kind=jprb), intent(inout) :: field(:,:) ! Field to initialize - ! First initialize surface pressure - call initialize_2d_spectral_field(nsmax, zsp(1,:)) + integer :: i - ! Then initialize all of the 3D fields - do i = 1, nflevl - do j = 1, nfield - call initialize_2d_spectral_field(nsmax, sp3d(i,:,j)) - end do - end do + do i = 1, size(field,1) + call initialize_2d_spectral_field(nsmax, field(i,:)) + enddo -end subroutine initialize_spectral_arrays +end subroutine initialize_spectral_field !=================================================================================================== diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9ba9cfe51..c639762db 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -84,48 +84,52 @@ if( HAVE_TESTS ) endif() foreach( mpi ${ntasks} ) foreach( omp ${nthreads} ) - set( t 47 ) - set( grid O48 ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld0 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 0 --meminfo --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --meminfo --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_scders - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --scders --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_uvders - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --uvders --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 2000 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_nproma16 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --nproma 16 --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) + foreach( callmode 1 2 ) + set( t 47 ) + set( grid O48 ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld0_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 0 --meminfo --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --meminfo --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_scders_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --scders --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_uvders_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --uvders --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + # The FLT gives higher errors (this is the consequence of the compression) so we must + # choose a higher error tolerance threshold + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 50000 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_nproma16_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --nproma 16 --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + endforeach() endforeach() endforeach() endif()