Merge pull request #400 from FESOM/refactoring_albedo_env

Refactoring albedo env
FESOM · Dec 22, 2022 · e08f711 · e08f711
2 parents 29ad2a1 + 025a738
commit e08f711
Show file tree

Hide file tree

Showing 10 changed files with 914 additions and 700 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,6 +14,7 @@ set(OIFS_COUPLED OFF CACHE BOOL "compile fesom coupled to OpenIFS. (Also needs F
 set(CRAY OFF CACHE BOOL "compile with cray ftn")
 set(USE_ICEPACK OFF CACHE BOOL "compile fesom with the Iceapck modules for sea ice column physics.")
 set(OPENMP_REPRODUCIBLE OFF CACHE BOOL "serialize OpenMP loops that are critical for reproducible results")
+
 #set(VERBOSE OFF CACHE BOOL "toggle debug output")
 #add_subdirectory(oasis3-mct/lib/psmile)
 add_subdirectory(src)
diff --git a/env/albedo/shell b/env/albedo/shell
@@ -1,9 +1,23 @@
 # make the contents as shell agnostic as possible so we can include them with bash, zsh and others
-
 module load intel-oneapi-compilers 
-module load intel-oneapi-mkl/2022.1.0
+export FC="mpiifort -qmkl" CC=mpiicc CXX=mpiicpc
 module load intel-oneapi-mpi/2021.6.0
-export FC=mpiifort CC=mpiicc CXX=mpiicpc
 
+module load intel-oneapi-mkl/2022.1.0
 module load netcdf-fortran/4.5.4-intel-oneapi-mpi2021.6.0-oneapi2022.1.0
-module load netcdf-c/4.8.1-intel-oneapi-mpi2021.6.0-oneapi2022.1.0
+module load netcdf-c/4.8.1-intel-oneapi-mpi2021.6.0-oneapi2022.1.0
+
+# from DKRZ recommented environment variables on levante
+# (https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html) 
+export HCOLL_ENABLE_MCAST_ALL="0"
+export HCOLL_MAIN_IB=mlx5_0:1
+export UCX_IB_ADDR_TYPE=ib_global
+export UCX_NET_DEVICES=mlx5_0:1
+export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self # this line here brings the most speedup factor ~1.5
+export UCX_UNIFIED_MODE=y
+export UCX_HANDLE_ERRORS=bt
+export HDF5_USE_FILE_LOCKING=FALSE
+export I_MPI_PMI=pmi2
+export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi2.so
+
+export ENABLE_ALBEDO_INTELMPI_WORKAROUNDS=''
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -13,6 +13,12 @@ if(DEFINED ENV{ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS}) # be able to set the initial
 else()
    option(ALEPH_CRAYMPICH_WORKAROUNDS "workaround for performance issues on aleph" OFF)
 endif()
+if(DEFINED ENV{ENABLE_ALBEDO_INTELMPI_WORKAROUNDS}) # be able to set the initial cache value from our env settings for aleph, not only via cmake command
+   option(ALBEDO_INTELMPI_WORKAROUNDS "workaround for performance issues on albedo" ON)
+else()
+   option(ALBEDO_INTELMPI_WORKAROUNDS "workaround for performance issues on albedo" OFF)
+endif()
+
 
 if(ALEPH_CRAYMPICH_WORKAROUNDS)
    # todo: enable these options only for our targets
@@ -24,8 +30,11 @@ if(ALEPH_CRAYMPICH_WORKAROUNDS)
    #add_compile_options(-DDISABLE_PARALLEL_RESTART_READ) # reading restarts is slow when doing it on parallel on aleph, switch it off for now
    add_compile_options(-DENABLE_ALEPH_CRAYMPICH_WORKAROUNDS)
 endif()
+if(ALBEDO_INTELMPI_WORKAROUNDS)
+   add_compile_options(-DENABLE_ALBEDO_INTELMPI_WORKAROUNDS)
+endif()
 
-option(DISABLE_MULTITHREADING "disable asynchronous operations" OFF)
+option(DISABLE_MULTITHREADING "disable asynchronous operations" ON)
 option(ENABLE_OPENACC "compile with OpenACC support" OFF)
 set(NV_GPU_ARCH "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)")
 
@@ -129,13 +138,17 @@ if(${CMAKE_Fortran_COMPILER_ID} STREQUAL  Intel )
    else()
         target_compile_options(${PROJECT_NAME} PRIVATE -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -ip -init=zero -no-wrap-margin)
    endif()
-#   target_compile_options(${PROJECT_NAME} PRIVATE -qopenmp -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -xHost -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
-#   target_compile_options(${PROJECT_NAME} PRIVATE -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -xHost -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
    if(${FESOM_PLATFORM_STRATEGY} STREQUAL  levante.dkrz.de )
       target_compile_options(${PROJECT_NAME} PRIVATE -march=core-avx2 -mtune=core-avx2)
+   elseif(${FESOM_PLATFORM_STRATEGY} STREQUAL  albedo)
+      target_compile_options(${PROJECT_NAME} PRIVATE -march=core-avx2 -O3 -ip -fPIC -qopt-malloc-options=2 -qopt-prefetch=5 -unroll-aggressive) #NEC mpi option
    else()
       target_compile_options(${PROJECT_NAME} PRIVATE -xHost)
    endif()
+#    target_compile_options(${PROJECT_NAME} PRIVATE -g -traceback ) #-check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
+#    target_compile_options(${PROJECT_NAME} PRIVATE -qopenmp -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -xHost -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
+#    target_compile_options(${PROJECT_NAME} PRIVATE -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
+
 elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL  GNU )
 #    target_compile_options(${PROJECT_NAME} PRIVATE -O3 -finit-local-zero  -finline-functions -fimplicit-none  -fdefault-real-8 -ffree-line-length-none)
    target_compile_options(${PROJECT_NAME} PRIVATE -O2 -g -ffloat-store -finit-local-zero  -finline-functions -fimplicit-none  -fdefault-real-8 -ffree-line-length-none)

diff --git a/src/MOD_PARTIT.F90 b/src/MOD_PARTIT.F90
@@ -64,6 +64,7 @@ module MOD_PARTIT
   integer            :: npes
   integer            :: mype
   integer            :: maxPEnum=100
+!PS   logical            :: flag_debug=.false.
   integer, allocatable, dimension(:)  :: part
 
   ! Mesh partition
@@ -151,6 +152,7 @@ subroutine WRITE_T_PARTIT(partit, unit, iostat, iomsg)
     write(unit, iostat=iostat, iomsg=iomsg) partit%npes
     write(unit, iostat=iostat, iomsg=iomsg) partit%mype
     write(unit, iostat=iostat, iomsg=iomsg) partit%maxPEnum
+!PS     write(unit, iostat=iostat, iomsg=iomsg) partit%flag_debug
     call write_bin_array(partit%part,           unit, iostat, iomsg)
 
     write(unit, iostat=iostat, iomsg=iomsg) partit%myDim_nod2D
@@ -182,6 +184,7 @@ subroutine READ_T_PARTIT(partit, unit, iostat, iomsg)
     read(unit, iostat=iostat, iomsg=iomsg) partit%npes
     read(unit, iostat=iostat, iomsg=iomsg) partit%mype
     read(unit, iostat=iostat, iomsg=iomsg) partit%maxPEnum
+!PS     read(unit, iostat=iostat, iomsg=iomsg) partit%flag_debug
     call read_bin_array(partit%part,           unit, iostat, iomsg)
 
     read(unit, iostat=iostat, iomsg=iomsg) partit%myDim_nod2D

diff --git a/src/gen_model_setup.F90 b/src/gen_model_setup.F90
@@ -30,6 +30,9 @@ subroutine setup_model(partit)
   read (fileunit, NML=run_config)
 !!$  read (fileunit, NML=machine)
   close (fileunit)
+
+!PS   partit%flag_debug=flag_debug
+
   ! ==========
   ! compute dt
   ! ========== 

diff --git a/src/info_module.F90 b/src/info_module.F90
@@ -97,6 +97,11 @@ subroutine print_definitions()
 #else
       print '(g0)', 'ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS is OFF'
 #endif  
+#ifdef ENABLE_ALBEDO_INTELMPI_WORKAROUNDS
+      print '(g0)', 'ENABLE_ALBEDO_INTELMPI_WORKAROUNDS is ON'
+#else
+      print '(g0)', 'ENABLE_ALBEDO_INTELMPI_WORKAROUNDS is OFF'
+#endif  
 #ifdef ENABLE_NVHPC_WORKAROUNDS
       print '(g0)', 'ENABLE_NVHPC_WORKAROUNDS is ON'
 #else

diff --git a/src/io_fesom_file.F90 b/src/io_fesom_file.F90
@@ -197,6 +197,8 @@ subroutine read_and_scatter_variables(this)
 #ifdef ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS
         ! aleph cray-mpich workaround
         call MPI_Barrier(this%comm, mpierr)
+#elif ENABLE_ALBEDO_INTELMPI_WORKAROUNDS
+        call MPI_Barrier(this%comm, mpierr)
 #endif
         if(this%is_iorank()) then
           if(is_2d) then
@@ -263,6 +265,8 @@ subroutine gather_and_write_variables(this)
 #ifdef ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS
         ! aleph cray-mpich workaround
         call MPI_Barrier(this%comm, mpierr)
+#elif ENABLE_ALBEDO_INTELMPI_WORKAROUNDS
+        call MPI_Barrier(this%comm, mpierr)        
 #endif
         ! the data from our pointer is not contiguous (if it is 3D data), so we can not pass the pointer directly to MPI
         laux = var%local_data_copy(lvl,:) ! todo: remove this buffer and pass the data directly to MPI (change order of data layout to be levelwise or do not gather levelwise but by columns)