From 05f07fd52d0f497cc45afa1c2c0b6710a33d5d20 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 19 Jul 2024 09:48:35 +0000 Subject: [PATCH 01/10] Add ability to call ecTrans with two "call modes" INV_TRANS and DIR_TRANS support two ways of passing arrays: 1) PSPVOR, PSPDIV, PSPSCALAR <=> PGP 2) PSPVOR, PSPDIV <=> PGPUV; PSPSC3A <=> PGP3A; PSPSC2 <=> PGP2 Previously we only supported the second one. With this commit you can now also call the first style. In the IFS we use both cases in different places so it's important to have both cases covered. We can now extend the test suite so more of the code has coverage. In this commit I also tidied up a few things: - All spectral and grid point work arrays are separately allocated, rather than relying on big work arrays to store everything and pointers to access slices. - I deleted a couple unused variables. --- src/programs/ectrans-benchmark.F90 | 405 +++++++++++++---------------- 1 file changed, 187 insertions(+), 218 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index a4dc1bc28..70b105d82 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -69,15 +69,16 @@ program ectrans_benchmark ! Default parameters integer(kind=jpim) :: nsmax = 79 ! Spectral truncation integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test -integer(kind=jpim) :: nfld = 1 ! Number of scalar fields +integer(kind=jpim) :: nfld = 1 ! Number of 3D scalar fields integer(kind=jpim) :: nlev = 1 ! Number of vertical levels integer(kind=jpim) :: iters_warmup = 3 ! Number of warm up steps (for which timing statistics should be ignored) -integer(kind=jpim) :: nflevg +integer(kind=jpim) :: nflevg ! Total number of vertical levels integer(kind=jpim) :: ndgl ! Number of latitudes -integer(kind=jpim) :: nspec2 -integer(kind=jpim) :: ngptot -integer(kind=jpim) :: ngptotg +integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary) +integer(kind=jpim) :: ngptot ! Total number of grid points on this task +integer(kind=jpim) :: ngptotg ! Total number of grid points across all tasks + integer(kind=jpim) :: ifld integer(kind=jpim) :: jroc integer(kind=jpim) :: jb @@ -100,20 +101,19 @@ program ectrans_benchmark real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormt(:), znormt1(:) real(kind=jprd) :: zaveave(0:jpmaxstat) -! Grid-point space data structures -real(kind=jprb), allocatable, target PINNED_TAG :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt -real(kind=jprb), allocatable, target PINNED_TAG :: zgmvs (:,:,:) ! Single level fields at t and t-dt -real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt -real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt -real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt - ! Spectral space data structures -real(kind=jprb), allocatable, target PINNED_TAG :: sp3d(:,:,:) -real(kind=jprb), pointer :: zspvor(:,:) => null() -real(kind=jprb), pointer :: zspdiv(:,:) => null() -real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() +real(kind=jprb), allocatable PINNED_TAG :: zspvor(:,:) +real(kind=jprb), allocatable PINNED_TAG :: zspdiv(:,:) +real(kind=jprb), allocatable PINNED_TAG :: zspscalar(:,:) +real(kind=jprb), allocatable PINNED_TAG :: zspsc3a(:,:,:) real(kind=jprb), allocatable PINNED_TAG :: zspsc2(:,:) +! Grid-point space data structures +real(kind=jprb), allocatable PINNED_TAG :: zgp(:,:,:) +real(kind=jprb), allocatable PINNED_TAG :: zgpuv(:,:,:,:) +real(kind=jprb), allocatable PINNED_TAG :: zgp3a(:,:,:,:) +real(kind=jprb), allocatable PINNED_TAG :: zgp2(:,:,:) + logical :: lstack = .false. ! Output stack info logical :: luserpnm = .false. logical :: lkeeprpnm = .false. @@ -121,7 +121,6 @@ program ectrans_benchmark logical :: ltrace_stats = .false. logical :: lstats_omp = .false. logical :: lstats_comms = .false. -logical :: lstats_mpl = .false. logical :: lstats = .true. ! gstats statistics logical :: lbarrier_stats = .false. logical :: lbarrier_stats2 = .false. @@ -133,7 +132,7 @@ program ectrans_benchmark logical :: lxml_stats = .false. logical :: lvordiv = .false. logical :: lscders = .false. -logical :: luvders = .false. +logical :: luvder = .false. logical :: lprint_norms = .false. ! Calculate and print spectral norms logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end @@ -167,8 +166,8 @@ program ectrans_benchmark integer(kind=jpim) :: mp_type = 2 ! Message passing type integer(kind=jpim) :: mbx_size = 150000000 ! Mailbox size -integer(kind=jpim), allocatable :: numll(:), ivset(:) -integer(kind=jpim) :: ivsetsc(1) +integer(kind=jpim), allocatable :: numll(:), ivset(:), ivsetsc(:) +integer(kind=jpim) :: ivsetsc2(1) integer(kind=jpim) :: nflevl @@ -185,22 +184,6 @@ program ectrans_benchmark integer(kind=jpim) :: iprtrw integer(kind=jpim) :: iprused, ilevpp, irest, ilev, jlev -integer(kind=jpim) :: ndimgmv = 0 ! Third dim. of gmv "(nproma,nflevg,ndimgmv,ngpblks)" -integer(kind=jpim) :: ndimgmvs = 0 ! Second dim. gmvs "(nproma,ndimgmvs,ngpblks)" - -integer(kind=jpim) :: jbegin_uv = 0 -integer(kind=jpim) :: jend_uv = 0 -integer(kind=jpim) :: jbegin_sc = 0 -integer(kind=jpim) :: jend_sc = 0 -integer(kind=jpim) :: jbegin_scder_NS = 0 -integer(kind=jpim) :: jend_scder_NS = 0 -integer(kind=jpim) :: jbegin_scder_EW = 0 -integer(kind=jpim) :: jend_scder_EW = 0 -integer(kind=jpim) :: jbegin_uder_EW = 0 -integer(kind=jpim) :: jend_uder_EW = 0 -integer(kind=jpim) :: jbegin_vder_EW = 0 -integer(kind=jpim) :: jend_vder_EW = 0 - logical :: ldump_values = .false. integer, external :: ec_mpirank @@ -209,6 +192,9 @@ program ectrans_benchmark character(len=16) :: cgrid = '' integer(kind=jpim) :: ierr +integer :: icall_mode = 1 +integer :: inum_wind_fields, inum_sc_3d_fields, inum_sc_2d_fields, itotal_fields +integer :: igp_start real(kind=jprb), allocatable :: global_field(:,:) @@ -230,8 +216,9 @@ program ectrans_benchmark luse_mpi = detect_mpirun() ! Setup -call get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvders, & - & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck) +call get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvder, & + & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck, & + & icall_mode) if (cgrid == '') cgrid = cubic_octahedral_gaussian_grid(nsmax) call parse_grid(cgrid, ndgl, nloen) nflevg = nlev @@ -267,11 +254,8 @@ program ectrans_benchmark if (ldetailed_stats) then lstats_omp = .true. lstats_comms = .true. - lstats_mpl = .true. lstatscpu = .true. nprnt_stats = nproc -! lstats_mem = .true. -! lstats_alloc = .true. endif !=================================================================================================== @@ -363,7 +347,7 @@ program ectrans_benchmark nflevl = numll(mysetv) -ivsetsc(1) = iprused +ivsetsc2(1) = iprused ifld = 0 !=================================================================================================== @@ -444,34 +428,32 @@ program ectrans_benchmark write(nout,'("nopt_mem_tr",i0)') nopt_mem_tr write(nout,'("lvordiv ",l1)') lvordiv write(nout,'("lscders ",l1)') lscders - write(nout,'("luvders ",l1)') luvders + write(nout,'("luvder ",l1)') luvder write(nout,'(" ")') write(nout,'(a)') '======= End of runtime parameters =======' write(nout,'(" ")') end if !=================================================================================================== -! Allocate and Initialize spectral arrays +! Allocate and initialize spectral arrays !=================================================================================================== -! Allocate spectral arrays -! Try to mimick IFS layout as much as possible -nullify(zspvor) -nullify(zspdiv) -nullify(zspsc3a) -allocate(sp3d(nflevl,nspec2,2+nfld)) -allocate(zspsc2(1,nspec2)) +allocate(zspvor(nflevl,nspec2)) +allocate(zspdiv(nflevl,nspec2)) +call initialize_spectral_field(nsmax, zspvor) +call initialize_spectral_field(nsmax, zspdiv) -call initialize_spectral_arrays(nsmax, zspsc2, sp3d) - -! Point convenience variables to storage variable sp3d -zspvor => sp3d(:,:,1) -zspdiv => sp3d(:,:,2) -zspsc3a => sp3d(:,:,3:3+(nfld-1)) - -!=================================================================================================== -! Allocate gridpoint arrays -!=================================================================================================== +if (icall_mode == 1) then + allocate(zspscalar(nfld*nflevl+1,nspec2)) + call initialize_spectral_field(nsmax, zspscalar) +else + allocate(zspsc3a(nflevl,nspec2,nfld)) + allocate(zspsc2(1,nspec2)) + do i = 1, nfld + call initialize_spectral_field(nsmax, zspsc3a(:,:,i)) + enddo + call initialize_spectral_field(nsmax, zspsc2) +endif allocate(ivset(nflevg)) @@ -484,48 +466,55 @@ program ectrans_benchmark enddo enddo -! Allocate grid-point arrays +allocate(ivsetsc(nfld*nflevg+1)) +do i = 1, nfld + ilev = 0 + do jb = 1, nprtrv + do jlev = 1, numll(jb) + ilev = ilev + 1 + ivsetsc(ilev + (i - 1)*nflevg) = jb + enddo + enddo +enddo +ivsetsc(nfld*nflevg+1) = 1 + +!=================================================================================================== +! Allocate gridpoint arrays +!=================================================================================================== + +! Also enable vorticity divergence? if (lvordiv) then - jbegin_uv = 1 - jend_uv = 2 -endif -if (luvders) then - jbegin_uder_EW = jend_uv + 1 - jend_uder_EW = jbegin_uder_EW + 1 - jbegin_vder_EW = jend_uder_EW + 1 - jend_vder_EW = jbegin_vder_EW + 1 + inum_wind_fields = 4 + igp_start = 2 * nflevg + 1 ! If lvordiv, skip the vor and div elements when passing zgp else - jbegin_uder_EW = jend_uv - jend_uder_EW = jend_uv - jbegin_vder_EW = jend_uv - jend_vder_EW = jend_uv + ! Otherwise just U and V + inum_wind_fields = 2 + igp_start = 1 endif -jbegin_sc = jend_vder_EW + 1 -jend_sc = jend_vder_EW + nfld +! Also calculate East-West derivatives of winds? +if (luvder) inum_wind_fields = inum_wind_fields + 2 -if (lscders) then - ndimgmvs = 3 - jbegin_scder_NS = jend_sc + 1 - jend_scder_NS = jend_sc + nfld - jbegin_scder_EW = jend_scder_NS + 1 - jend_scder_EW = jend_scder_NS + nfld -else - ndimgmvs = 1 - jbegin_scder_NS = jend_sc - jend_scder_NS = jend_sc - jbegin_scder_EW = jend_sc - jend_scder_EW = jend_sc -endif +! We always have our nfld 3D scalar fields +inum_sc_3d_fields = nfld -ndimgmv = jend_scder_EW +! We always have one 2D scalar field +inum_sc_2d_fields = 1 -allocate(zgmv(nproma,nflevg,ndimgmv,ngpblks)) -allocate(zgmvs(nproma,ndimgmvs,ngpblks)) +! Also calculate North-South and East-West derivatives of scalar fields +if (lscders) then + inum_sc_3d_fields = inum_sc_3d_fields * 2 + inum_sc_2d_fields = inum_sc_2d_fields * 2 +endif -zgpuv => zgmv(:,:,1:jend_vder_EW,:) -zgp3a => zgmv(:,:,jbegin_sc:jend_scder_EW,:) -zgp2 => zgmvs(:,:,:) +if (icall_mode == 1) then + itotal_fields = nflevg * (inum_wind_fields + inum_sc_3d_fields) + inum_sc_2d_fields + allocate(zgp(nproma,itotal_fields,ngpblks)) +else + allocate(zgpuv(nproma,nflevg,inum_wind_fields,ngpblks)) + allocate(zgp3a(nproma,nflevg,inum_sc_3d_fields,ngpblks)) + allocate(zgp2(nproma,inum_sc_2d_fields,ngpblks)) +endif !=================================================================================================== ! Allocate norm arrays @@ -543,10 +532,13 @@ program ectrans_benchmark call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor1, kvset=ivset(1:nflevg)) call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv1, kvset=ivset(1:nflevg)) - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt1, kvset=ivset(1:nflevg)) + + if (icall_mode == 2) then + if (nfld > 0) then + call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt1, kvset=ivset(1:nflevg)) + endif + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp1, kvset=ivsetsc2) endif - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp1, kvset=ivsetsc) if (verbosity >= 1 .and. myproc == 1) then do ifld = 1, nflevg @@ -557,16 +549,18 @@ program ectrans_benchmark write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv1(ifld) write(nout,'("0x",Z16.16)') znormdiv1(ifld) enddo - if (nfld > 0) then - do ifld = 1, nflevg - write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt1(ifld) - write(nout,'("0x",Z16.16)') znormt1(ifld) + if (icall_mode == 2) then + if (nfld > 0) then + do ifld = 1, nflevg + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt1(ifld) + write(nout,'("0x",Z16.16)') znormt1(ifld) + enddo + endif + do ifld = 1, 1 + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp1(ifld) + write(nout,'("0x",Z16.16)') znormsp1(ifld) enddo endif - do ifld = 1, 1 - write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp1(ifld) - write(nout,'("0x",Z16.16)') znormsp1(ifld) - enddo endif endif @@ -621,31 +615,16 @@ program ectrans_benchmark ztstep1(jstep) = timef() call gstats(4,0) - if (lvordiv) then - call inv_trans(kresol=1, kproma=nproma, & - & pspsc2=zspsc2, & ! spectral surface pressure - & pspvor=zspvor, & ! spectral vorticity - & pspdiv=zspdiv, & ! spectral divergence - & pspsc3a=zspsc3a, & ! spectral scalars - & ldscders=lscders, & - & ldvorgp=.false., & ! no gridpoint vorticity - & lddivgp=.false., & ! no gridpoint divergence - & lduvder=luvders, & - & kvsetuv=ivset, & - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset, & - & pgp2=zgp2, & - & pgpuv=zgpuv, & - & pgp3a=zgp3a) + if (icall_mode == 1) then + call inv_trans(pspvor=zspvor, pspdiv=zspdiv, pspscalar=zspscalar, pgp=zgp, & + & kvsetuv=ivset, kvsetsc=ivsetsc, & + & ldscders=lscders, ldvorgp=lvordiv, lddivgp=lvordiv, lduvder=luvder, & + & kproma=nproma) else - call inv_trans(kresol=1, kproma=nproma, & - & pspsc2=zspsc2, & ! spectral surface pressure - & pspsc3a=zspsc3a, & ! spectral scalars - & ldscders=lscders, & ! scalar derivatives - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset, & - & pgp2=zgp2, & - & pgp3a=zgp3a) + call inv_trans(pspvor=zspvor, pspdiv=zspdiv, pspsc3a=zspsc3a, pspsc2=zspsc2, pgpuv=zgpuv, & + & pgp3a=zgp3a, pgp2=zgp2, & + & kvsetuv=ivset, kvsetsc2=ivsetsc2, kvsetsc3a=ivset, & + & ldscders=lscders, ldvorgp=lvordiv, lddivgp=lvordiv, lduvder=luvder, kproma=nproma) endif call gstats(4,1) @@ -656,13 +635,14 @@ program ectrans_benchmark !================================================================================================= if (ldump_values .and. mod(jstep,10) == 1) then + ! dump a field to a binary file if (myproc == 1) then allocate(global_field(ngptotg,1)) endif - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp2(:,1:1,:), 's', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,1,:), 'u', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,2,:), 'v', noutdump) - call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp3a(:,nflevg:nflevg,1,:), 't', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,1,:), 'U', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgpuv(:,nflevg:nflevg,2,:), 'V', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp2(:,1:1,:), 'S', noutdump) + call dump_gridpoint_field(jstep, myproc, nproma, global_field, zgp3a(:,nflevg:nflevg,1,:), 'T', noutdump) if (myproc == 1) then deallocate(global_field) endif @@ -675,26 +655,14 @@ program ectrans_benchmark ztstep2(jstep) = timef() call gstats(5,0) - if (lvordiv) then - call dir_trans(kresol=1, kproma=nproma, & - & pgp2=zgmvs(:,1:1,:), & - & pgpuv=zgpuv(:,:,1:2,:), & - & pgp3a=zgp3a(:,:,1:nfld,:), & - & pspvor=zspvor, & - & pspdiv=zspdiv, & - & pspsc2=zspsc2, & - & pspsc3a=zspsc3a, & - & kvsetuv=ivset, & - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset) + if (icall_mode == 1) then + call dir_trans(pgp=zgp(:,igp_start:,:), pspvor=zspvor, pspdiv=zspdiv, pspscalar=zspscalar, & + & kvsetuv=ivset, kvsetsc=ivsetsc, & + & kproma=nproma) else - call dir_trans(kresol=1, kproma=nproma, & - & pgp2=zgmvs(:,1:1,:), & - & pgp3a=zgp3a(:,:,1:nfld,:), & - & pspsc2=zspsc2, & - & pspsc3a=zspsc3a, & - & kvsetsc2=ivsetsc, & - & kvsetsc3a=ivset) + call dir_trans(pgpuv=zgpuv, pgp3a=zgp3a, pgp2=zgp2, & + & pspvor=zspvor, pspdiv=zspdiv, pspsc3a=zspsc3a, pspsc2=zspsc2, & + & kvsetuv=ivset, kvsetsc2=ivsetsc2, kvsetsc3a=ivset, kproma=nproma) endif call gstats(5,1) ztstep2(jstep) = (timef() - ztstep2(jstep))/1000.0_jprd @@ -707,36 +675,41 @@ program ectrans_benchmark if (lprint_norms) then call gstats(6,0) - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc(1:1)) call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset(1:nflevg)) call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset(1:nflevg)) - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) + + if (icall_mode == 2) then + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc2(1:1)) + if (nfld > 0) then + call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) + endif endif - ! Surface pressure if (myproc == 1) then zmaxerr(:) = -999.0 - do ifld = 1, 1 - zerr(1) = abs(znormsp1(ifld)/znormsp(ifld) - 1.0_jprb) - zmaxerr(1) = max(zmaxerr(1), zerr(1)) + ! Vorticity + do ifld = 1, nflevg + zerr(3) = abs(znormvor1(ifld)/znormvor(ifld) - 1.0_jprb) + zmaxerr(3) = max(zmaxerr(3),zerr(3)) enddo ! Divergence do ifld = 1, nflevg zerr(2) = abs(znormdiv1(ifld)/znormdiv(ifld) - 1.0_jprb) zmaxerr(2) = max(zmaxerr(2), zerr(2)) enddo - ! Vorticity - do ifld = 1, nflevg - zerr(3) = abs(znormvor1(ifld)/znormvor(ifld) - 1.0_jprb) - zmaxerr(3) = max(zmaxerr(3),zerr(3)) - enddo - ! Temperature if (nfld > 0) then - do ifld = 1, nflevg - zerr(4) = abs(znormt1(ifld)/znormt(ifld) - 1.0_jprb) - zmaxerr(4) = max(zmaxerr(4), zerr(4)) - enddo + if (icall_mode == 2) then + ! Temperature + do ifld = 1, nflevg + zerr(4) = abs(znormt1(ifld)/znormt(ifld) - 1.0_jprb) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + enddo + ! Surface pressure + do ifld = 1, 1 + zerr(1) = abs(znormsp1(ifld)/znormsp(ifld) - 1.0_jprb) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) + enddo + endif write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(4), zmaxerr(1) @@ -764,10 +737,12 @@ program ectrans_benchmark if (lprint_norms .or. ncheck > 0) then call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) + if (icall_mode == 2) then + if (nfld > 0) then + call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) + endif + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc2) endif - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc) if (myproc == 1) then zmaxerr(:) = -999.0 @@ -787,24 +762,26 @@ program ectrans_benchmark write(nout,'("0x",Z16.16)') znormdiv(ifld) endif enddo - if (nfld > 0) then - do ifld = 1, nflevg - zerr(4) = abs(real(znormt1(ifld),kind=jprd)/real(znormt(ifld),kind=jprd) - 1.0d0) - zmaxerr(4) = max(zmaxerr(4), zerr(4)) + if (icall_mode == 2) then + if (nfld > 0) then + do ifld = 1, nflevg + zerr(4) = abs(real(znormt1(ifld),kind=jprd)/real(znormt(ifld),kind=jprd) - 1.0d0) + zmaxerr(4) = max(zmaxerr(4), zerr(4)) + if (verbosity >= 1) then + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt(ifld), zerr(4) + write(nout,'("0x",Z16.16)') znormt(ifld) + endif + enddo + endif + do ifld = 1, 1 + zerr(1) = abs(real(znormsp1(ifld),kind=jprd)/real(znormsp(ifld),kind=jprd) - 1.0d0) + zmaxerr(1) = max(zmaxerr(1), zerr(1)) if (verbosity >= 1) then - write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt(ifld), zerr(4) - write(nout,'("0x",Z16.16)') znormt(ifld) + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp(ifld), zerr(1) + write(nout,'("0x",Z16.16)') znormsp(ifld) endif enddo endif - do ifld = 1, 1 - zerr(1) = abs(real(znormsp1(ifld),kind=jprd)/real(znormsp(ifld),kind=jprd) - 1.0d0) - zmaxerr(1) = max(zmaxerr(1), zerr(1)) - if (verbosity >= 1) then - write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp(ifld), zerr(1) - write(nout,'("0x",Z16.16)') znormsp(ifld) - endif - enddo ! maximum error across all fields if (nfld > 0) then @@ -946,8 +923,7 @@ program ectrans_benchmark ! Cleanup !=================================================================================================== -deallocate(zgmv) -deallocate(zgmvs) + !=================================================================================================== @@ -1127,6 +1103,7 @@ & subroutine on memory usage, thread-binding etc." write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& & tolerance for correctness checking" + write(nout, "(a)") " --callmode The call mode for INV_TRANS and DIR_TRANS (1 or 2)" write(nout, "(a)") "" write(nout, "(a)") "DEBUGGING" write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" @@ -1151,9 +1128,9 @@ subroutine parsing_failed(message) !=================================================================================================== -subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvders, & +subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvder, & & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, & - & lmeminfo, nprtrv, nprtrw, ncheck) + & lmeminfo, nprtrv, nprtrw, ncheck, icall_mode) #ifdef _OPENACC use openacc, only: acc_init, acc_get_device_type @@ -1167,7 +1144,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n integer, intent(inout) :: nlev ! Number of vertical levels logical, intent(inout) :: lvordiv ! Also transform vorticity/divergence logical, intent(inout) :: lscders ! Compute scalar derivatives - logical, intent(inout) :: luvders ! Compute uv East-West derivatives + logical, intent(inout) :: luvder ! Compute uv East-West derivatives logical, intent(inout) :: luseflt ! Use fast Legendre transforms integer, intent(inout) :: nopt_mem_tr ! Use of heap or stack memory for ZCOMBUF arrays in transposition arrays (0 for heap, 1 for stack) integer, intent(inout) :: nproma ! NPROMA @@ -1180,6 +1157,9 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n integer, intent(inout) :: nprtrw ! Size of W set (spectral decomposition) integer, intent(inout) :: ncheck ! The multiplier of the machine epsilon used as a ! tolerance for correctness checking + integer, intent(inout) :: icall_mode ! The call mode for inv_trans and dir_trans + ! 1: pspvor, pspdiv, pspscalar, pgp + ! 2: pspvor, pspdiv, pspsc3a, pspsc2, pgpuv, pgp3a, pgp2 character(len=128) :: carg ! Storage variable for command line arguments integer :: iarg = 1 ! Argument index @@ -1223,7 +1203,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n case('-l', '--nlev'); nlev = get_int_value('-l', iarg) case('--vordiv'); lvordiv = .True. case('--scders'); lscders = .True. - case('--uvders'); luvders = .True. + case('--uvders'); luvder = .True. case('--flt'); luseflt = .True. case('--mem-tr'); nopt_mem_tr = get_int_value('--mem-tr', iarg) case('--nproma'); nproma = get_int_value('--nproma', iarg) @@ -1233,6 +1213,11 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n case('--nprtrv'); nprtrv = get_int_value('--nprtrv', iarg) case('--nprtrw'); nprtrw = get_int_value('--nprtrw', iarg) case('-c', '--check'); ncheck = get_int_value('-c', iarg) + case('--callmode') + icall_mode = get_int_value('--callmode', iarg) + if (icall_mode /= 1 .and. icall_mode /= 2) then + call parsing_failed("Invalid argument for --calmode: must be 1 or 2") + end if case default call parsing_failed("Unrecognised argument: " // trim(carg)) @@ -1241,11 +1226,10 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n end do if (.not. lvordiv) then - luvders = .false. + luvder = .false. endif end subroutine get_command_line_arguments - !=================================================================================================== function cubic_octahedral_gaussian_grid(nsmax) result(cgrid) @@ -1295,33 +1279,18 @@ end function get_median !=================================================================================================== -subroutine initialize_spectral_arrays(nsmax, zsp, sp3d) - - integer, intent(in) :: nsmax ! Spectral truncation - real(kind=jprb), intent(inout) :: zsp(:,:) ! Surface pressure - real(kind=jprb), intent(inout) :: sp3d(:,:,:) ! 3D fields - - integer(kind=jpim) :: nflevl - integer(kind=jpim) :: nfield - - integer :: i, j +subroutine initialize_spectral_field(nsmax, field) - nflevl = size(sp3d, 1) - nfield = size(sp3d, 3) + integer, intent(in) :: nsmax ! Spectral truncation + real(kind=jprb), intent(inout) :: field(:,:) ! Field to initialize - ! First initialize surface pressure - call initialize_2d_spectral_field(nsmax, zsp(1,:)) + integer :: i - ! Then initialize all of the 3D fields - do i = 1, nflevl - do j = 1, nfield - call initialize_2d_spectral_field(nsmax, sp3d(i,:,j)) - end do - end do - -end subroutine initialize_spectral_arrays + do i = 1, size(field,1) + call initialize_2d_spectral_field(nsmax, field(i,:)) + enddo -!=================================================================================================== +end subroutine initialize_spectral_field subroutine initialize_2d_spectral_field(nsmax, field) From 850b2e1b79bec161d3cb3b183a64f52d806bf3f4 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 19 Jul 2024 13:20:34 +0000 Subject: [PATCH 02/10] Update error norm calculation in benchmark program --- src/programs/ectrans-benchmark.F90 | 198 ++++++++++++++--------------- 1 file changed, 98 insertions(+), 100 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 70b105d82..2451ba928 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -97,8 +97,10 @@ program ectrans_benchmark real(kind=jprd) :: ztstepmax2, ztstepmin2, ztstepavg2, ztstepmed2 real(kind=jprd), allocatable :: ztstep(:), ztstep1(:), ztstep2(:) -real(kind=jprb), allocatable :: znormsp(:), znormsp1(:), znormdiv(:), znormdiv1(:) -real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormt(:), znormt1(:) +real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormdiv(:), znormdiv1(:) +real(kind=jprb), allocatable :: znormscalar(:), znormscalar1(:) +real(kind=jprb), allocatable :: znormsc3a(:), znormsc3a1(:), znormsc2(:), znormsc21(:) + real(kind=jprd) :: zaveave(0:jpmaxstat) ! Spectral space data structures @@ -521,23 +523,25 @@ program ectrans_benchmark !=================================================================================================== if (lprint_norms .or. ncheck > 0) then - allocate(znormsp(1)) - allocate(znormsp1(1)) allocate(znormvor(nflevg)) allocate(znormvor1(nflevg)) allocate(znormdiv(nflevg)) allocate(znormdiv1(nflevg)) - allocate(znormt(nflevg)) - allocate(znormt1(nflevg)) - call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor1, kvset=ivset(1:nflevg)) - call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv1, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor1, kvset=ivset) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv1, kvset=ivset) - if (icall_mode == 2) then - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt1, kvset=ivset(1:nflevg)) - endif - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp1, kvset=ivsetsc2) + if (icall_mode == 1) then + allocate(znormscalar(nfld*nflevg+1)) + allocate(znormscalar1(nfld*nflevg+1)) + call specnorm(pspec=zspscalar(:,:), pnorm=znormscalar1, kvset=ivsetsc) + else + allocate(znormsc3a(nflevg)) + allocate(znormsc3a1(nflevg)) + allocate(znormsc2(1)) + allocate(znormsc21(1)) + if (nfld > 0) call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormsc3a1, kvset=ivset) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsc21, kvset=ivsetsc2) endif if (verbosity >= 1 .and. myproc == 1) then @@ -549,17 +553,20 @@ program ectrans_benchmark write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv1(ifld) write(nout,'("0x",Z16.16)') znormdiv1(ifld) enddo - if (icall_mode == 2) then + if (icall_mode == 1) then + do ifld = 1, nfld*nflevg+1 + write(nout,'("norm zspscalar(",i4,",:,1) = ",f20.15)') ifld, znormscalar1(ifld) + write(nout,'("0x",Z16.16)') znormscalar1(ifld) + enddo + else if (nfld > 0) then do ifld = 1, nflevg - write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt1(ifld) - write(nout,'("0x",Z16.16)') znormt1(ifld) + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormsc3a1(ifld) + write(nout,'("0x",Z16.16)') znormsc3a1(ifld) enddo endif - do ifld = 1, 1 - write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp1(ifld) - write(nout,'("0x",Z16.16)') znormsp1(ifld) - enddo + write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') 1, znormsc21(1) + write(nout,'("0x",Z16.16)') znormsc21(1) endif endif endif @@ -675,48 +682,36 @@ program ectrans_benchmark if (lprint_norms) then call gstats(6,0) - call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset(1:nflevg)) - call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset(1:nflevg)) + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) - if (icall_mode == 2) then - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc2(1:1)) - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) - endif + if (icall_mode == 1) then + call specnorm(pspec=zspscalar(:,:), pnorm=znormscalar, kvset=ivsetsc) + else + if (nfld > 0) call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormsc3a, kvset=ivset) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsc2, kvset=ivsetsc2) endif if (myproc == 1) then - zmaxerr(:) = -999.0 - ! Vorticity - do ifld = 1, nflevg - zerr(3) = abs(znormvor1(ifld)/znormvor(ifld) - 1.0_jprb) - zmaxerr(3) = max(zmaxerr(3),zerr(3)) - enddo - ! Divergence - do ifld = 1, nflevg - zerr(2) = abs(znormdiv1(ifld)/znormdiv(ifld) - 1.0_jprb) - zmaxerr(2) = max(zmaxerr(2), zerr(2)) - enddo - if (nfld > 0) then - if (icall_mode == 2) then - ! Temperature - do ifld = 1, nflevg - zerr(4) = abs(znormt1(ifld)/znormt(ifld) - 1.0_jprb) - zmaxerr(4) = max(zmaxerr(4), zerr(4)) - enddo - ! Surface pressure - do ifld = 1, 1 - zerr(1) = abs(znormsp1(ifld)/znormsp(ifld) - 1.0_jprb) - zmaxerr(1) = max(zmaxerr(1), zerr(1)) - enddo - endif + zmaxerr(1) = maxval(abs((znormvor1 / znormvor) - 1.0_jprb)) + zmaxerr(2) = maxval(abs((znormdiv1 / znormdiv) - 1.0_jprb)) + if (icall_mode == 1) then + zmaxerr(3) = maxval(abs((znormscalar1 / znormscalar) - 1.0_jprb)) write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& - & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & - & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(4), zmaxerr(1) + & " | zspdiv max err="e10.3," | zspscalar max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(1), zmaxerr(2), zmaxerr(3) else - write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& - & " | zspdiv max err="e10.3," | zspsc2 max err="e10.3)') & - & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(1) + zmaxerr(4) = maxval(abs((znormsc21 / znormsc2) - 1.0_jprb)) + if (nfld > 0) then + zmaxerr(3) = maxval(abs((znormsc3a1 / znormsc3a) - 1.0_jprb)) + write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& + & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(1), zmaxerr(2), zmaxerr(3), zmaxerr(4) + else + write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& + & " | zspdiv max err="e10.3," | zspsc2 max err="e10.3)') & + & jstep, ztstep(jstep), zmaxerr(1), zmaxerr(2), zmaxerr(4) + endif endif endif call gstats(6,1) @@ -735,66 +730,69 @@ program ectrans_benchmark write(nout,'(" ")') if (lprint_norms .or. ncheck > 0) then - call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) - call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) - if (icall_mode == 2) then - if (nfld > 0) then - call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) - endif - call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc2) + call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) + call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) + + if (icall_mode == 1) then + call specnorm(pspec=zspscalar(:,:), pnorm=znormscalar, kvset=ivsetsc) + else + if (nfld > 0) call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormsc3a, kvset=ivset) + call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsc2, kvset=ivsetsc2) endif if (myproc == 1) then - zmaxerr(:) = -999.0 - do ifld = 1, nflevg - zerr(3) = abs(real(znormvor1(ifld),kind=jprd)/real(znormvor(ifld),kind=jprd) - 1.0_jprd) - zmaxerr(3) = max(zmaxerr(3), zerr(3)) - if (verbosity >= 1) then - write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor(ifld), zerr(3) + zmaxerr = -99.0_jprd + zmaxerr(1) = maxval(abs((real(znormvor1,jprd) / (real(znormvor,jprd)) - 1.0_jprd))) + if (verbosity >= 1) then + do ifld = 1, nflevg + write(nout,'("norm zspvor( ",i4,") = ",f20.15)') ifld, znormvor(ifld) write(nout,'("0x",Z16.16)') znormvor(ifld) + enddo + endif + zmaxerr(2) = maxval(abs((real(znormdiv1,jprd) / (real(znormdiv,jprd)) - 1.0_jprd))) + if (verbosity >= 1) then + do ifld = 1, nflevg + write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv(ifld) + write(nout,'("0x",Z16.16)') znormdiv(ifld) + enddo + endif + if (icall_mode == 1) then + zmaxerr(3) = maxval(abs((znormscalar1 / znormscalar) - 1.0_jprb)) + if (verbosity >= 1) then + do ifld = 1, nfld*nflevg+1 + write(nout,'("norm znormscalar( ",i4,",:) = ",f20.15)') ifld, znormscalar(ifld) + write(nout,'("0x",Z16.16)') znormscalar(ifld) + enddo endif - enddo - do ifld = 1, nflevg - zerr(2) = abs(real(znormdiv1(ifld),kind=jprd)/real(znormdiv(ifld),kind=jprd) - 1.0d0) - zmaxerr(2) = max(zmaxerr(2),zerr(2)) + else + zmaxerr(4) = maxval(abs((znormsc21 / znormsc2) - 1.0_jprb)) if (verbosity >= 1) then - write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv(ifld), zerr(2) - write(nout,'("0x",Z16.16)') znormdiv(ifld) + write(nout,'("norm znormsc2( ",i4,",:) = ",f20.15)') 1, znormsc2(1) + write(nout,'("0x",Z16.16)') znormsc2(1) endif - enddo - if (icall_mode == 2) then if (nfld > 0) then - do ifld = 1, nflevg - zerr(4) = abs(real(znormt1(ifld),kind=jprd)/real(znormt(ifld),kind=jprd) - 1.0d0) - zmaxerr(4) = max(zmaxerr(4), zerr(4)) - if (verbosity >= 1) then - write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt(ifld), zerr(4) - write(nout,'("0x",Z16.16)') znormt(ifld) - endif - enddo - endif - do ifld = 1, 1 - zerr(1) = abs(real(znormsp1(ifld),kind=jprd)/real(znormsp(ifld),kind=jprd) - 1.0d0) - zmaxerr(1) = max(zmaxerr(1), zerr(1)) + zmaxerr(3) = maxval(abs((znormsc3a1 / znormsc3a) - 1.0_jprb)) if (verbosity >= 1) then - write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp(ifld), zerr(1) - write(nout,'("0x",Z16.16)') znormsp(ifld) + do ifld = 1, nflevg + write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormsc3a(ifld) + write(nout,'("0x",Z16.16)') znormsc3a(ifld) + enddo endif - enddo + endif endif ! maximum error across all fields - if (nfld > 0) then - zmaxerrg = max(zmaxerr(1), zmaxerr(2), zmaxerr(3), zmaxerr(4)) - else - zmaxerrg = max(zmaxerr(1), zmaxerr(2), zmaxerr(3)) - endif + zmaxerrg = maxval(zmaxerr) if (verbosity >= 1) write(nout,*) - write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(1) write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) - if (nfld > 0) write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) - write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) + if (icall_mode == 1) then + write(nout,'("max error zspscalar(1:nlev,:,1) = ",e10.3)') zmaxerr(3) + else + if (nfld > 0) write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(3) + write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(4) + endif write(nout,*) write(nout,'("max error combined = = ",e10.3)') zmaxerrg write(nout,*) From 37e40d4962b1b2a04ed7305af0a1b1bbcfc8974d Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 19 Jul 2024 13:20:50 +0000 Subject: [PATCH 03/10] Add callmode to list of parameters for test suite --- tests/CMakeLists.txt | 86 ++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9ba9cfe51..ba0707c64 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -84,48 +84,50 @@ if( HAVE_TESTS ) endif() foreach( mpi ${ntasks} ) foreach( omp ${nthreads} ) - set( t 47 ) - set( grid O48 ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld0 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 0 --meminfo --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --meminfo --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_scders - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --scders --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_uvders - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --uvders --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 2000 --norms -v - MPI ${mpi} - OMP ${omp} - ) - ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_nproma16 - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --nproma 16 --check 100 --norms -v - MPI ${mpi} - OMP ${omp} - ) + foreach( callmode 1 2 ) + set( t 47 ) + set( grid O48 ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld0_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 0 --meminfo --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --meminfo --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_scders_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --scders --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_vordiv_uvders_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --vordiv --uvders --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 2000 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_nproma16_callmode${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --nproma 16 --check 100 --norms -v --callmode ${callmode} + MPI ${mpi} + OMP ${omp} + ) + endforeach() endforeach() endforeach() endif() From 262850a56b855a1b6575788ecba8abbb17b5b56a Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 19 Jul 2024 14:40:51 +0000 Subject: [PATCH 04/10] Properly slice all arrays --- src/programs/ectrans-benchmark.F90 | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 2451ba928..bb544121f 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -196,7 +196,7 @@ program ectrans_benchmark integer(kind=jpim) :: ierr integer :: icall_mode = 1 integer :: inum_wind_fields, inum_sc_3d_fields, inum_sc_2d_fields, itotal_fields -integer :: igp_start +integer :: ipgp_start, ipgp_end, ipgpuv_start, ipgpuv_end real(kind=jprb), allocatable :: global_field(:,:) @@ -484,17 +484,25 @@ program ectrans_benchmark ! Allocate gridpoint arrays !=================================================================================================== +ipgp_start = 1 +ipgp_end = (2 + nfld) * nflevg + 1 +ipgpuv_start = 1 +ipgpuv_end = 2 + ! Also enable vorticity divergence? if (lvordiv) then inum_wind_fields = 4 - igp_start = 2 * nflevg + 1 ! If lvordiv, skip the vor and div elements when passing zgp + ! If lvordiv, skip the vor and div elements when passing zgp + ipgp_start = ipgp_start + 2 * nflevg + ipgp_end = ipgp_end + 2 * nflevg + ipgpuv_start = ipgpuv_start + 2 + ipgpuv_end = ipgpuv_end + 2 else ! Otherwise just U and V inum_wind_fields = 2 - igp_start = 1 endif -! Also calculate East-West derivatives of winds? +! Also make room for East-West derivatives of winds? if (luvder) inum_wind_fields = inum_wind_fields + 2 ! We always have our nfld 3D scalar fields @@ -503,10 +511,10 @@ program ectrans_benchmark ! We always have one 2D scalar field inum_sc_2d_fields = 1 -! Also calculate North-South and East-West derivatives of scalar fields +! Also make room for North-South and East-West derivatives of scalar fields if (lscders) then - inum_sc_3d_fields = inum_sc_3d_fields * 2 - inum_sc_2d_fields = inum_sc_2d_fields * 2 + inum_sc_3d_fields = inum_sc_3d_fields * 3 + inum_sc_2d_fields = inum_sc_2d_fields * 3 endif if (icall_mode == 1) then @@ -663,11 +671,11 @@ program ectrans_benchmark call gstats(5,0) if (icall_mode == 1) then - call dir_trans(pgp=zgp(:,igp_start:,:), pspvor=zspvor, pspdiv=zspdiv, pspscalar=zspscalar, & - & kvsetuv=ivset, kvsetsc=ivsetsc, & - & kproma=nproma) + call dir_trans(pgp=zgp(:,ipgp_start:ipgp_end,:), pspvor=zspvor, pspdiv=zspdiv, & + & pspscalar=zspscalar, kvsetuv=ivset, kvsetsc=ivsetsc, kproma=nproma) else - call dir_trans(pgpuv=zgpuv, pgp3a=zgp3a, pgp2=zgp2, & + call dir_trans(pgpuv=zgpuv(:,:,ipgpuv_start:ipgpuv_end,:), & + & pgp3a=zgp3a(:,:,1:nfld,:), pgp2=zgp2(:,1:1,:), & & pspvor=zspvor, pspdiv=zspdiv, pspsc3a=zspsc3a, pspsc2=zspsc2, & & kvsetuv=ivset, kvsetsc2=ivsetsc2, kvsetsc3a=ivset, kproma=nproma) endif From 30e645edfd39790e03228700b886d51beb46e2f7 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 19 Jul 2024 14:41:20 +0000 Subject: [PATCH 05/10] Increase tolerance for FLT test This needs to be investigated. --- tests/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ba0707c64..3ee0c961b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -117,8 +117,9 @@ if( HAVE_TESTS ) MPI ${mpi} OMP ${omp} ) + # TODO: Find out why the FLT gives so much higher errors ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt_callmode${callmode} - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 2000 --norms -v --callmode ${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 1000000 --norms -v --callmode ${callmode} MPI ${mpi} OMP ${omp} ) From 63ab9362ec4bd0717ada59b842f1356854626f47 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 19 Jul 2024 15:34:37 +0000 Subject: [PATCH 06/10] Reorganise ectrans_benchmark --- src/programs/ectrans-benchmark.F90 | 87 +++++++++++++----------------- 1 file changed, 36 insertions(+), 51 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index bb544121f..bf13cd6e8 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -21,25 +21,6 @@ program ectrans_benchmark ! This test performs spectral to real and real to spectral transforms repeated in ! timed loop. ! -! 1) One "surface" field is always transformed: -! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk) -! -! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0" -! -! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk) -! -! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and -! can be enabled with "--vordiv" -! -! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk) -! -! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2) -! This must be enabled with "--scders" -! -! 5) Optionally uv East-West derivate can be computed from vorticity/divergence. -! This must be enabled with "--vordiv --uvders" -! -! ! Authors : George Mozdzynski ! Willem Deconinck ! Ioan Hadade @@ -58,7 +39,7 @@ program ectrans_benchmark integer(kind=jpim), parameter :: min_octa_points = 20 integer(kind=jpim) :: istack, getstackusage -real(kind=jprd), dimension(1) :: zmaxerr(5), zerr(5) +real(kind=jprd) :: zmaxerr(5) real(kind=jprd) :: zmaxerrg ! Output unit numbers @@ -67,16 +48,15 @@ program ectrans_benchmark integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output ! Default parameters -integer(kind=jpim) :: nsmax = 79 ! Spectral truncation integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test integer(kind=jpim) :: nfld = 1 ! Number of 3D scalar fields integer(kind=jpim) :: nlev = 1 ! Number of vertical levels integer(kind=jpim) :: iters_warmup = 3 ! Number of warm up steps (for which timing statistics should be ignored) -integer(kind=jpim) :: nflevg ! Total number of vertical levels -integer(kind=jpim) :: ndgl ! Number of latitudes -integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary) -integer(kind=jpim) :: ngptot ! Total number of grid points on this task +integer(kind=jpim) :: nflevg ! Total number of vertical levels + +integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary) +integer(kind=jpim) :: ngptot ! Total number of grid points on this task integer(kind=jpim) :: ngptotg ! Total number of grid points across all tasks integer(kind=jpim) :: ifld @@ -88,7 +68,7 @@ program ectrans_benchmark integer(kind=jpim) :: ib integer(kind=jpim) :: jprtrv -integer(kind=jpim), allocatable :: nloen(:), nprcids(:) +integer(kind=jpim), allocatable :: nprcids(:) integer(kind=jpim) :: myproc, jj integer :: jstep @@ -117,13 +97,25 @@ program ectrans_benchmark real(kind=jprb), allocatable PINNED_TAG :: zgp2(:,:,:) logical :: lstack = .false. ! Output stack info -logical :: luserpnm = .false. -logical :: lkeeprpnm = .false. + +! setup_trans options +integer(kind=jpim) :: nsmax = 79 ! Spectral truncation +integer(kind=jpim) :: ndgl ! Number of latitudes +integer(kind=jpim), allocatable :: nloen(:) ! Number of points on each latitude +logical :: luserpnm = .false. ! Use Belusov algorithm to compute RPNM array instead of per m logical :: luseflt = .false. ! Use fast legendre transforms + +! Extra inv_trans options +logical :: lvordiv = .false. ! Compute vorticity and divergence in grid point space +logical :: lscders = .false. ! Compute derivatives of scalar (North-South and East-West) in grid + ! point space +logical :: luvder = .false. ! Compute East-West derivatives of U and V wind in grid point space + +! GSTATS options +logical :: lstats = .true. ! gstats statistics logical :: ltrace_stats = .false. logical :: lstats_omp = .false. logical :: lstats_comms = .false. -logical :: lstats = .true. ! gstats statistics logical :: lbarrier_stats = .false. logical :: lbarrier_stats2 = .false. logical :: ldetailed_stats = .false. @@ -132,17 +124,14 @@ program ectrans_benchmark logical :: lstatscpu = .false. logical :: lstats_mem = .false. logical :: lxml_stats = .false. -logical :: lvordiv = .false. -logical :: lscders = .false. -logical :: luvder = .false. -logical :: lprint_norms = .false. ! Calculate and print spectral norms -logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end - integer(kind=jpim) :: nstats_mem = 0 integer(kind=jpim) :: ntrace_stats = 0 integer(kind=jpim) :: nprnt_stats = 1 integer(kind=jpim) :: nopt_mem_tr = 0 +logical :: lprint_norms = .false. ! Calculate and print spectral norms +logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end + ! The multiplier of the machine epsilon used as a tolerance for correctness checking ! ncheck = 0 (the default) means that correctness checking is disabled integer(kind=jpim) :: ncheck = 0 @@ -152,11 +141,6 @@ program ectrans_benchmark ! Verbosity level (0 or 1) integer :: verbosity = 0 -real(kind=jprd) :: zra = 6371229._jprd - -integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions -integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib - integer(kind=jpim) :: nproc ! Number of procs integer(kind=jpim) :: nthread integer(kind=jpim) :: nprgpns ! Grid-point decomp @@ -178,7 +162,6 @@ program ectrans_benchmark logical :: lsync_trans = .true. ! Activate barrier sync logical :: leq_regions = .true. ! Eq regions flag - integer(kind=jpim) :: nproma = 0 integer(kind=jpim) :: ngpblks ! locals @@ -374,10 +357,9 @@ program ectrans_benchmark if (verbosity >= 1) write(nout,'(a)')'======= Setup ecTrans =======' call gstats(1, 0) -call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & - & kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, & - & kprtrw=nprtrw, ldsync_trans=lsync_trans, & - & ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi,& +call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & + & kprgpns=nprgpns, kprgpew=nprgpew, kprtrw=nprtrw, ldsync_trans=lsync_trans, & + & ldeq_regions=leq_regions, ldalloperm=.true., ldmpoff=.not.luse_mpi, & & kopt_memory_tr=nopt_mem_tr) call gstats(1, 1) @@ -386,8 +368,7 @@ program ectrans_benchmark call set_ectrans_gpu_nflev(nflevl) ! We pass nflevl via environment variable in order not to change API ! In long run, ectrans should grow its internal buffers automatically -call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., & - & lduserpnm=luserpnm, ldkeeprpnm=lkeeprpnm, & +call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., lduserpnm=luserpnm, & & lduseflt=luseflt) call gstats(2, 1) @@ -440,11 +421,13 @@ program ectrans_benchmark ! Allocate and initialize spectral arrays !=================================================================================================== +! Initialize vorticity and divergence - same for both call modes allocate(zspvor(nflevl,nspec2)) allocate(zspdiv(nflevl,nspec2)) call initialize_spectral_field(nsmax, zspvor) call initialize_spectral_field(nsmax, zspdiv) +! Initialize spectral arrays differently depending on call mode if (icall_mode == 1) then allocate(zspscalar(nfld*nflevl+1,nspec2)) call initialize_spectral_field(nsmax, zspscalar) @@ -457,9 +440,8 @@ program ectrans_benchmark call initialize_spectral_field(nsmax, zspsc2) endif +! Compute spectral distribution variables allocate(ivset(nflevg)) - -! Compute spectral distribution ilev = 0 do jb = 1, nprtrv do jlev=1, numll(jb) @@ -484,6 +466,7 @@ program ectrans_benchmark ! Allocate gridpoint arrays !=================================================================================================== +! Determine start and end slice points for grid point arrays when they are passed back to dir_trans ipgp_start = 1 ipgp_end = (2 + nfld) * nflevg + 1 ipgpuv_start = 1 @@ -491,8 +474,9 @@ program ectrans_benchmark ! Also enable vorticity divergence? if (lvordiv) then - inum_wind_fields = 4 - ! If lvordiv, skip the vor and div elements when passing zgp + inum_wind_fields = 4 ! Four fields - U, V, vorticity, divergence + ! If lvordiv, skip the vorticity and divergence elements when passing zgp + ! These two come first when enabled ipgp_start = ipgp_start + 2 * nflevg ipgp_end = ipgp_end + 2 * nflevg ipgpuv_start = ipgpuv_start + 2 @@ -517,6 +501,7 @@ program ectrans_benchmark inum_sc_2d_fields = inum_sc_2d_fields * 3 endif +! Finally, allocate grid point arrays if (icall_mode == 1) then itotal_fields = nflevg * (inum_wind_fields + inum_sc_3d_fields) + inum_sc_2d_fields allocate(zgp(nproma,itotal_fields,ngpblks)) From 9e75a16e87bfaea6ad83049a6b78309ce5fa2455 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Wed, 11 Sep 2024 08:28:07 +0000 Subject: [PATCH 07/10] Add back comment separator --- src/programs/ectrans-benchmark.F90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index bf13cd6e8..01fe8e23a 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -1283,6 +1283,8 @@ subroutine initialize_spectral_field(nsmax, field) end subroutine initialize_spectral_field +!=================================================================================================== + subroutine initialize_2d_spectral_field(nsmax, field) integer, intent(in) :: nsmax ! Spectral truncation From 5d188093b5d6ab7d75a1193291fa6e30ec1ab93c Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Wed, 11 Sep 2024 14:51:49 +0000 Subject: [PATCH 08/10] Reduce FLT error tolerance --- tests/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3ee0c961b..1228e663d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -117,9 +117,10 @@ if( HAVE_TESTS ) MPI ${mpi} OMP ${omp} ) - # TODO: Find out why the FLT gives so much higher errors + # The FLT gives higher errors (this is the consequence of the compression) so we must + # choose a higher error tolerance threshold ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt_callmode${callmode} - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 1000000 --norms -v --callmode ${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 25000 --norms -v --callmode ${callmode} MPI ${mpi} OMP ${omp} ) From a309b688569b4ed8ff993c121c994ad3bcd095fd Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Mon, 18 Nov 2024 15:35:55 +0000 Subject: [PATCH 09/10] Fix typo --- src/programs/ectrans-benchmark.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 01fe8e23a..d8c5da838 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -1207,7 +1207,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, n case('--callmode') icall_mode = get_int_value('--callmode', iarg) if (icall_mode /= 1 .and. icall_mode /= 2) then - call parsing_failed("Invalid argument for --calmode: must be 1 or 2") + call parsing_failed("Invalid argument for --callmode: must be 1 or 2") end if case default call parsing_failed("Unrecognised argument: " // trim(carg)) From 064136ddbc345dc2b8164db1a018d866476f1298 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Mon, 18 Nov 2024 16:45:18 +0000 Subject: [PATCH 10/10] Increase FLT error tolerance again --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1228e663d..c639762db 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -120,7 +120,7 @@ if( HAVE_TESTS ) # The FLT gives higher errors (this is the consequence of the compression) so we must # choose a higher error tolerance threshold ecbuild_add_test( TARGET ectrans_test_benchmark_${prec}_T${t}_${grid}_mpi${mpi}_omp${omp}_nfld10_nlev20_flt_callmode${callmode} - COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 25000 --norms -v --callmode ${callmode} + COMMAND ectrans-benchmark-cpu-${prec} ARGS --truncation ${t} --grid ${grid} --niter 2 --nfld 10 --nlev 20 --flt --check 50000 --norms -v --callmode ${callmode} MPI ${mpi} OMP ${omp} )