-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Document how to install and run ImpactX on LUMI (CSC).
- Loading branch information
Showing
5 changed files
with
435 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ HPC Systems | |
:maxdepth: 1 | ||
|
||
hpc/perlmutter | ||
hpc/lumi | ||
|
||
.. tip:: | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#!/bin/bash | ||
# | ||
# Copyright 2023 The ImpactX Community | ||
# | ||
# This file is part of ImpactX. | ||
# | ||
# Author: Axel Huebl, Luca Fedeli | ||
# License: BSD-3-Clause-LBNL | ||
|
||
# Exit on first error encountered ############################################# | ||
# | ||
set -eu -o pipefail | ||
|
||
|
||
# Check: ###################################################################### | ||
# | ||
# Was perlmutter_gpu_impactx.profile sourced and configured correctly? | ||
if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your lumi_impactx.profile file! Please edit its line 2 to continue!"; exit 1; fi | ||
|
||
|
||
# Remove old dependencies ##################################################### | ||
# | ||
SW_DIR="${HOME}/sw/lumi/gpu" | ||
rm -rf ${SW_DIR} | ||
mkdir -p ${SW_DIR} | ||
|
||
# remove common user mistakes in python, located in .local instead of a venv | ||
python3 -m pip uninstall -qq -y pyimpactx | ||
python3 -m pip uninstall -qq -y impactx | ||
python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true | ||
|
||
|
||
# General extra dependencies ################################################## | ||
# | ||
|
||
# tmpfs build directory: avoids issues often seen with $HOME and is faster | ||
build_dir=$(mktemp -d) | ||
|
||
# c-blosc (I/O compression, for openPMD) | ||
if [ -d $HOME/src/c-blosc ] | ||
then | ||
cd $HOME/src/c-blosc | ||
git fetch --prune | ||
git checkout v1.21.1 | ||
cd - | ||
else | ||
git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git $HOME/src/c-blosc | ||
fi | ||
rm -rf $HOME/src/c-blosc-lu-build | ||
cmake -S $HOME/src/c-blosc -B ${build_dir}/c-blosc-lu-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${HOME}/sw/lumi/gpu/c-blosc-1.21.1 | ||
cmake --build ${build_dir}/c-blosc-lu-build --target install --parallel 16 | ||
rm -rf ${build_dir}/c-blosc-lu-build | ||
|
||
# ADIOS2 (for openPMD) | ||
if [ -d $HOME/src/adios2 ] | ||
then | ||
cd $HOME/src/adios2 | ||
git fetch --prune | ||
git checkout v2.8.3 | ||
cd - | ||
else | ||
git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git $HOME/src/adios2 | ||
fi | ||
rm -rf $HOME/src/adios2-lu-build | ||
cmake -S $HOME/src/adios2 -B ${build_dir}/adios2-lu-build -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${HOME}/sw/lumi/gpu/adios2-2.8.3 | ||
cmake --build ${build_dir}/adios2-lu-build --target install -j 16 | ||
rm -rf ${build_dir}/adios2-lu-build | ||
|
||
|
||
# Python ###################################################################### | ||
# | ||
python3 -m pip install --upgrade pip | ||
python3 -m pip install --upgrade virtualenv | ||
python3 -m pip cache purge | ||
rm -rf ${SW_DIR}/venvs/impactx-lumi | ||
python3 -m venv ${SW_DIR}/venvs/impactx-lumi | ||
source ${SW_DIR}/venvs/impactx-lumi/bin/activate | ||
python3 -m pip install --upgrade pip | ||
python3 -m pip install --upgrade build | ||
python3 -m pip install --upgrade packaging | ||
python3 -m pip install --upgrade wheel | ||
python3 -m pip install --upgrade setuptools | ||
python3 -m pip install --upgrade cython | ||
python3 -m pip install --upgrade numpy | ||
python3 -m pip install --upgrade pandas | ||
python3 -m pip install --upgrade scipy | ||
MPICC="cc -shared" python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py | ||
python3 -m pip install --upgrade openpmd-api | ||
python3 -m pip install --upgrade matplotlib | ||
python3 -m pip install --upgrade yt | ||
# install or update ImpactX dependencies | ||
python3 -m pip install --upgrade -r $HOME/src/impactx/requirements.txt | ||
# cupy: no ROCm 5.2 Python wheels | ||
#python3 -m pip install --upgrade torch --index-url https://download.pytorch.org/whl/rocm5.4.2 | ||
#python3 -m pip install --upgrade optimas[all] |
50 changes: 50 additions & 0 deletions
50
docs/source/install/hpc/lumi-csc/lumi_impactx.profile.example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# please set your project account | ||
#export proj="project_..." | ||
|
||
# required dependencies | ||
module load LUMI/23.09 partition/G | ||
module load rocm/5.2.3 | ||
module load buildtools/23.09 | ||
|
||
# optional: just an additional text editor | ||
module load nano | ||
|
||
# optional: for openPMD support | ||
SW_DIR="${HOME}/sw/lumi/gpu" | ||
module load cray-hdf5-parallel/1.12.2.7 | ||
export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:$CMAKE_PREFIX_PATH | ||
export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:$CMAKE_PREFIX_PATH | ||
export PATH=${SW_DIR}/adios2-2.8.3/bin:${PATH} | ||
|
||
# optional: for Python bindings or libEnsemble | ||
module load cray-python/3.10.10 | ||
|
||
if [ -d "${SW_DIR}/venvs/impactx-lumi" ] | ||
then | ||
source ${SW_DIR}/venvs/impactx-lumi/bin/activate | ||
fi | ||
|
||
# an alias to request an interactive batch node for one hour | ||
# for paralle execution, start on the batch node: srun <command> | ||
alias getNode="salloc -A $proj -J impactx -t 01:00:00 -p dev-g -N 1 --ntasks-per-node=8 --gpus-per-task=1 --gpu-bind=closest" | ||
# an alias to run a command on a batch node for up to 30min | ||
# usage: runNode <command> | ||
alias runNode="srun -A $proj -J impactx -t 00:30:00 -p dev-g -N 1 --ntasks-per-node=8 --gpus-per-task=1 --gpu-bind=closest" | ||
|
||
# GPU-aware MPI | ||
export MPICH_GPU_SUPPORT_ENABLED=1 | ||
|
||
# optimize ROCm/HIP compilation for MI250X | ||
export AMREX_AMD_ARCH=gfx90a | ||
|
||
# compiler environment hints | ||
# Warning: using the compiler wrappers cc and CC | ||
# instead of amdclang and amdclang++ | ||
# currently results in a significant | ||
# loss of performances | ||
export CC=$(which amdclang) | ||
export CXX=$(which amdclang++) | ||
export FC=$(which amdflang) | ||
export CFLAGS="-I${ROCM_PATH}/include" | ||
export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" | ||
export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#!/bin/bash -l | ||
|
||
#SBATCH -A <project id> | ||
#SBATCH --nodes=1 | ||
#SBATCH --time=00:10:00 | ||
#SBATCH --job-name=impactx | ||
#SBATCH --output=ImpactX.o%j | ||
#SBATCH --error=ImpactX.e%j | ||
#SBATCH --partition=standard-g | ||
#SBATCH --ntasks-per-node=8 | ||
#SBATCH --gpus-per-node=8 | ||
|
||
date | ||
|
||
# note (12-12-22) | ||
# this environment setting is currently needed on LUMI to work-around a | ||
# known issue with Libfabric | ||
#export FI_MR_CACHE_MAX_COUNT=0 # libfabric disable caching | ||
# or, less invasive: | ||
export FI_MR_CACHE_MONITOR=memhooks # alternative cache monitor | ||
|
||
# Seen since August 2023 seen on OLCF (not yet seen on LUMI?) | ||
# OLCFDEV-1597: OFI Poll Failed UNDELIVERABLE Errors | ||
# https://docs.olcf.ornl.gov/systems/frontier_user_guide.html#olcfdev-1597-ofi-poll-failed-undeliverable-errors | ||
#export MPICH_SMP_SINGLE_COPY_MODE=NONE | ||
#export FI_CXI_RX_MATCH_MODE=software | ||
|
||
# note (9-2-22, OLCFDEV-1079) | ||
# this environment setting is needed to avoid that rocFFT writes a cache in | ||
# the home directory, which does not scale. | ||
export ROCFFT_RTC_CACHE_PATH=/dev/null | ||
|
||
# Seen since August 2023 | ||
# OLCFDEV-1597: OFI Poll Failed UNDELIVERABLE Errors | ||
# https://docs.olcf.ornl.gov/systems/frontier_user_guide.html#olcfdev-1597-ofi-poll-failed-undeliverable-errors | ||
export MPICH_SMP_SINGLE_COPY_MODE=NONE | ||
export FI_CXI_RX_MATCH_MODE=software | ||
|
||
# LUMI documentation suggests using the following wrapper script | ||
# to set the ROCR_VISIBLE_DEVICES to the value of SLURM_LOCALID | ||
# see https://docs.lumi-supercomputer.eu/runjobs/scheduled-jobs/lumig-job/ | ||
cat << EOF > select_gpu | ||
#!/bin/bash | ||
export ROCR_VISIBLE_DEVICES=\$SLURM_LOCALID | ||
exec \$* | ||
EOF | ||
|
||
chmod +x ./select_gpu | ||
|
||
sleep 1 | ||
|
||
# LUMI documentation suggests using the following CPU bind | ||
# in order to have 6 threads per GPU (blosc compression in adios2 uses threads) | ||
# see https://docs.lumi-supercomputer.eu/runjobs/scheduled-jobs/lumig-job/ | ||
# | ||
# WARNING: the following CPU_BIND options don't work on the dev-g partition. | ||
# If you want to run your simulation on dev-g, please comment them | ||
# out and replace them with CPU_BIND="map_cpu:49,57,17,25,1,9,33,41" | ||
# | ||
CPU_BIND="mask_cpu:7e000000000000,7e00000000000000" | ||
CPU_BIND="${CPU_BIND},7e0000,7e000000" | ||
CPU_BIND="${CPU_BIND},7e,7e00" | ||
CPU_BIND="${CPU_BIND},7e00000000,7e0000000000" | ||
|
||
export OMP_NUM_THREADS=6 | ||
|
||
export MPICH_GPU_SUPPORT_ENABLED=1 | ||
|
||
srun --cpu-bind=${CPU_BIND} ./select_gpu \ | ||
./impactx inputs \ | ||
| tee outputs.txt | ||
|
||
rm -rf ./select_gpu |
Oops, something went wrong.