Skip to content

Commit

Permalink
Merge pull request #400 from FESOM/refactoring_albedo_env
Browse files Browse the repository at this point in the history
Refactoring albedo env
  • Loading branch information
koldunovn authored Dec 22, 2022
2 parents 29ad2a1 + 025a738 commit e08f711
Show file tree
Hide file tree
Showing 10 changed files with 914 additions and 700 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ set(OIFS_COUPLED OFF CACHE BOOL "compile fesom coupled to OpenIFS. (Also needs F
set(CRAY OFF CACHE BOOL "compile with cray ftn")
set(USE_ICEPACK OFF CACHE BOOL "compile fesom with the Iceapck modules for sea ice column physics.")
set(OPENMP_REPRODUCIBLE OFF CACHE BOOL "serialize OpenMP loops that are critical for reproducible results")

#set(VERBOSE OFF CACHE BOOL "toggle debug output")
#add_subdirectory(oasis3-mct/lib/psmile)
add_subdirectory(src)
22 changes: 18 additions & 4 deletions env/albedo/shell
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
# make the contents as shell agnostic as possible so we can include them with bash, zsh and others

module load intel-oneapi-compilers
module load intel-oneapi-mkl/2022.1.0
export FC="mpiifort -qmkl" CC=mpiicc CXX=mpiicpc
module load intel-oneapi-mpi/2021.6.0
export FC=mpiifort CC=mpiicc CXX=mpiicpc

module load intel-oneapi-mkl/2022.1.0
module load netcdf-fortran/4.5.4-intel-oneapi-mpi2021.6.0-oneapi2022.1.0
module load netcdf-c/4.8.1-intel-oneapi-mpi2021.6.0-oneapi2022.1.0
module load netcdf-c/4.8.1-intel-oneapi-mpi2021.6.0-oneapi2022.1.0

# from DKRZ recommented environment variables on levante
# (https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html)
export HCOLL_ENABLE_MCAST_ALL="0"
export HCOLL_MAIN_IB=mlx5_0:1
export UCX_IB_ADDR_TYPE=ib_global
export UCX_NET_DEVICES=mlx5_0:1
export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self # this line here brings the most speedup factor ~1.5
export UCX_UNIFIED_MODE=y
export UCX_HANDLE_ERRORS=bt
export HDF5_USE_FILE_LOCKING=FALSE
export I_MPI_PMI=pmi2
export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi2.so

export ENABLE_ALBEDO_INTELMPI_WORKAROUNDS=''
19 changes: 16 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ if(DEFINED ENV{ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS}) # be able to set the initial
else()
option(ALEPH_CRAYMPICH_WORKAROUNDS "workaround for performance issues on aleph" OFF)
endif()
if(DEFINED ENV{ENABLE_ALBEDO_INTELMPI_WORKAROUNDS}) # be able to set the initial cache value from our env settings for aleph, not only via cmake command
option(ALBEDO_INTELMPI_WORKAROUNDS "workaround for performance issues on albedo" ON)
else()
option(ALBEDO_INTELMPI_WORKAROUNDS "workaround for performance issues on albedo" OFF)
endif()


if(ALEPH_CRAYMPICH_WORKAROUNDS)
# todo: enable these options only for our targets
Expand All @@ -24,8 +30,11 @@ if(ALEPH_CRAYMPICH_WORKAROUNDS)
#add_compile_options(-DDISABLE_PARALLEL_RESTART_READ) # reading restarts is slow when doing it on parallel on aleph, switch it off for now
add_compile_options(-DENABLE_ALEPH_CRAYMPICH_WORKAROUNDS)
endif()
if(ALBEDO_INTELMPI_WORKAROUNDS)
add_compile_options(-DENABLE_ALBEDO_INTELMPI_WORKAROUNDS)
endif()

option(DISABLE_MULTITHREADING "disable asynchronous operations" OFF)
option(DISABLE_MULTITHREADING "disable asynchronous operations" ON)
option(ENABLE_OPENACC "compile with OpenACC support" OFF)
set(NV_GPU_ARCH "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)")

Expand Down Expand Up @@ -129,13 +138,17 @@ if(${CMAKE_Fortran_COMPILER_ID} STREQUAL Intel )
else()
target_compile_options(${PROJECT_NAME} PRIVATE -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -ip -init=zero -no-wrap-margin)
endif()
# target_compile_options(${PROJECT_NAME} PRIVATE -qopenmp -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -xHost -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
# target_compile_options(${PROJECT_NAME} PRIVATE -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -xHost -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
if(${FESOM_PLATFORM_STRATEGY} STREQUAL levante.dkrz.de )
target_compile_options(${PROJECT_NAME} PRIVATE -march=core-avx2 -mtune=core-avx2)
elseif(${FESOM_PLATFORM_STRATEGY} STREQUAL albedo)
target_compile_options(${PROJECT_NAME} PRIVATE -march=core-avx2 -O3 -ip -fPIC -qopt-malloc-options=2 -qopt-prefetch=5 -unroll-aggressive) #NEC mpi option
else()
target_compile_options(${PROJECT_NAME} PRIVATE -xHost)
endif()
# target_compile_options(${PROJECT_NAME} PRIVATE -g -traceback ) #-check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
# target_compile_options(${PROJECT_NAME} PRIVATE -qopenmp -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -xHost -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)
# target_compile_options(${PROJECT_NAME} PRIVATE -r8 -i4 -fp-model precise -no-prec-div -no-prec-sqrt -fimf-use-svml -ip -g -traceback -check all,noarg_temp_created,bounds,uninit ) #-ftrapuv ) #-init=zero)

elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL GNU )
# target_compile_options(${PROJECT_NAME} PRIVATE -O3 -finit-local-zero -finline-functions -fimplicit-none -fdefault-real-8 -ffree-line-length-none)
target_compile_options(${PROJECT_NAME} PRIVATE -O2 -g -ffloat-store -finit-local-zero -finline-functions -fimplicit-none -fdefault-real-8 -ffree-line-length-none)
Expand Down
3 changes: 3 additions & 0 deletions src/MOD_PARTIT.F90
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ module MOD_PARTIT
integer :: npes
integer :: mype
integer :: maxPEnum=100
!PS logical :: flag_debug=.false.
integer, allocatable, dimension(:) :: part

! Mesh partition
Expand Down Expand Up @@ -151,6 +152,7 @@ subroutine WRITE_T_PARTIT(partit, unit, iostat, iomsg)
write(unit, iostat=iostat, iomsg=iomsg) partit%npes
write(unit, iostat=iostat, iomsg=iomsg) partit%mype
write(unit, iostat=iostat, iomsg=iomsg) partit%maxPEnum
!PS write(unit, iostat=iostat, iomsg=iomsg) partit%flag_debug
call write_bin_array(partit%part, unit, iostat, iomsg)

write(unit, iostat=iostat, iomsg=iomsg) partit%myDim_nod2D
Expand Down Expand Up @@ -182,6 +184,7 @@ subroutine READ_T_PARTIT(partit, unit, iostat, iomsg)
read(unit, iostat=iostat, iomsg=iomsg) partit%npes
read(unit, iostat=iostat, iomsg=iomsg) partit%mype
read(unit, iostat=iostat, iomsg=iomsg) partit%maxPEnum
!PS read(unit, iostat=iostat, iomsg=iomsg) partit%flag_debug
call read_bin_array(partit%part, unit, iostat, iomsg)

read(unit, iostat=iostat, iomsg=iomsg) partit%myDim_nod2D
Expand Down
3 changes: 3 additions & 0 deletions src/gen_model_setup.F90
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ subroutine setup_model(partit)
read (fileunit, NML=run_config)
!!$ read (fileunit, NML=machine)
close (fileunit)

!PS partit%flag_debug=flag_debug

! ==========
! compute dt
! ==========
Expand Down
5 changes: 5 additions & 0 deletions src/info_module.F90
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ subroutine print_definitions()
#else
print '(g0)', 'ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS is OFF'
#endif
#ifdef ENABLE_ALBEDO_INTELMPI_WORKAROUNDS
print '(g0)', 'ENABLE_ALBEDO_INTELMPI_WORKAROUNDS is ON'
#else
print '(g0)', 'ENABLE_ALBEDO_INTELMPI_WORKAROUNDS is OFF'
#endif
#ifdef ENABLE_NVHPC_WORKAROUNDS
print '(g0)', 'ENABLE_NVHPC_WORKAROUNDS is ON'
#else
Expand Down
4 changes: 4 additions & 0 deletions src/io_fesom_file.F90
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ subroutine read_and_scatter_variables(this)
#ifdef ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS
! aleph cray-mpich workaround
call MPI_Barrier(this%comm, mpierr)
#elif ENABLE_ALBEDO_INTELMPI_WORKAROUNDS
call MPI_Barrier(this%comm, mpierr)
#endif
if(this%is_iorank()) then
if(is_2d) then
Expand Down Expand Up @@ -263,6 +265,8 @@ subroutine gather_and_write_variables(this)
#ifdef ENABLE_ALEPH_CRAYMPICH_WORKAROUNDS
! aleph cray-mpich workaround
call MPI_Barrier(this%comm, mpierr)
#elif ENABLE_ALBEDO_INTELMPI_WORKAROUNDS
call MPI_Barrier(this%comm, mpierr)
#endif
! the data from our pointer is not contiguous (if it is 3D data), so we can not pass the pointer directly to MPI
laux = var%local_data_copy(lvl,:) ! todo: remove this buffer and pass the data directly to MPI (change order of data layout to be levelwise or do not gather levelwise but by columns)
Expand Down
Loading

0 comments on commit e08f711

Please sign in to comment.