From 186049e393917ce7434e1971537252ff5d60861d Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 28 Mar 2024 15:32:41 -0700 Subject: [PATCH] Doc: LUMI Document how to install and run ImpactX on LUMI (CSC). --- docs/source/install/hpc.rst | 1 + .../hpc/lumi-csc/install_dependencies.sh | 127 ++++++++++ .../hpc/lumi-csc/lumi_impactx.profile.example | 51 +++++ docs/source/install/hpc/lumi-csc/submit.sh | 81 +++++++ docs/source/install/hpc/lumi.rst | 216 ++++++++++++++++++ 5 files changed, 476 insertions(+) create mode 100755 docs/source/install/hpc/lumi-csc/install_dependencies.sh create mode 100644 docs/source/install/hpc/lumi-csc/lumi_impactx.profile.example create mode 100644 docs/source/install/hpc/lumi-csc/submit.sh create mode 100644 docs/source/install/hpc/lumi.rst diff --git a/docs/source/install/hpc.rst b/docs/source/install/hpc.rst index 5a90fa202..e9f836647 100644 --- a/docs/source/install/hpc.rst +++ b/docs/source/install/hpc.rst @@ -25,6 +25,7 @@ HPC Systems :maxdepth: 1 hpc/perlmutter + hpc/lumi .. tip:: diff --git a/docs/source/install/hpc/lumi-csc/install_dependencies.sh b/docs/source/install/hpc/lumi-csc/install_dependencies.sh new file mode 100755 index 000000000..2ba792154 --- /dev/null +++ b/docs/source/install/hpc/lumi-csc/install_dependencies.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# +# Copyright 2023 The ImpactX Community +# +# This file is part of ImpactX. +# +# Author: Axel Huebl, Luca Fedeli +# License: BSD-3-Clause-LBNL + +# Exit on first error encountered ############################################# +# +set -eu -o pipefail + + +# Check: ###################################################################### +# +# Was perlmutter_gpu_impactx.profile sourced and configured correctly? +if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your lumi_impactx.profile file! Please edit its line 2 to continue!"; exit 1; fi + + +# Remove old dependencies ##################################################### +# +SRC_DIR="${HOME}/src" +SW_DIR="${HOME}/sw/lumi/gpu" +rm -rf ${SW_DIR} +mkdir -p ${SW_DIR} +mkdir -p ${SRC_DIR} + +# remove common user mistakes in python, located in .local instead of a venv +python3 -m pip uninstall -qq -y pyimpactx +python3 -m pip uninstall -qq -y impactx +python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true + + +# General extra dependencies ################################################## +# + +# tmpfs build directory: avoids issues often seen with $HOME and is faster +build_dir=$(mktemp -d) + +# c-blosc (I/O compression, for openPMD) +if [ -d ${SRC_DIR}/c-blosc ] +then + cd ${SRC_DIR}/c-blosc + git fetch --prune + git checkout v1.21.1 + cd - +else + git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git ${SRC_DIR}/c-blosc +fi +rm -rf ${SRC_DIR}/c-blosc-lu-build +cmake -S ${SRC_DIR}/c-blosc \ + -B ${build_dir}/c-blosc-lu-build \ + -DBUILD_TESTS=OFF \ + -DBUILD_BENCHMARKS=OFF \ + -DDEACTIVATE_AVX2=OFF \ + -DCMAKE_INSTALL_PREFIX=${HOME}/sw/lumi/gpu/c-blosc-1.21.1 +cmake --build ${build_dir}/c-blosc-lu-build --target install --parallel 16 +rm -rf ${build_dir}/c-blosc-lu-build + +# HDF5 (for openPMD) +if [ -d ${SRC_DIR}/hdf5 ] +then + cd ${SRC_DIR}/hdf5 + git fetch --prune + git checkout hdf5-1_14_1-2 + cd - +else + git clone -b hdf5-1_14_1-2 https://github.com/HDFGroup/hdf5.git ${SRC_DIR}/hdf5 +fi +cmake -S ${SRC_DIR}/hdf5 \ + -B ${build_dir}/hdf5-build \ + -DBUILD_TESTING=OFF \ + -DHDF5_ENABLE_PARALLEL=ON \ + -DCMAKE_INSTALL_PREFIX=${SW_DIR}/hdf5-1.14.1.2 +cmake --build ${build_dir}/hdf5-build --target install --parallel 10 +rm -rf ${build_dir}/hdf5-build + +# ADIOS2 (for openPMD) +if [ -d ${SRC_DIR}/adios2 ] +then + cd ${SRC_DIR}/adios2 + git fetch --prune + git checkout v2.8.3 + cd - +else + git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git ${SRC_DIR}/adios2 +fi +rm -rf ${SRC_DIR}/adios2-lu-build +cmake -S ${SRC_DIR}/adios2 \ + -B ${build_dir}/adios2-lu-build \ + -DADIOS2_USE_Blosc=ON \ + -DADIOS2_USE_Fortran=OFF \ + -DADIOS2_USE_HDF5=OFF \ + -DADIOS2_USE_Python=OFF \ + -DADIOS2_USE_ZeroMQ=OFF \ + -DCMAKE_INSTALL_PREFIX=${HOME}/sw/lumi/gpu/adios2-2.8.3 +cmake --build ${build_dir}/adios2-lu-build --target install -j 16 +rm -rf ${build_dir}/adios2-lu-build + + +# Python ###################################################################### +# +python3 -m pip install --upgrade pip +python3 -m pip install --upgrade virtualenv +python3 -m pip cache purge +rm -rf ${SW_DIR}/venvs/impactx-lumi +python3 -m venv ${SW_DIR}/venvs/impactx-lumi +source ${SW_DIR}/venvs/impactx-lumi/bin/activate +python3 -m pip install --upgrade pip +python3 -m pip install --upgrade build +python3 -m pip install --upgrade packaging +python3 -m pip install --upgrade wheel +python3 -m pip install --upgrade setuptools +python3 -m pip install --upgrade cython +python3 -m pip install --upgrade numpy +python3 -m pip install --upgrade pandas +python3 -m pip install --upgrade scipy +MPICC="cc -shared" python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py +python3 -m pip install --upgrade openpmd-api +python3 -m pip install --upgrade matplotlib +python3 -m pip install --upgrade yt +# install or update ImpactX dependencies +python3 -m pip install --upgrade -r ${SRC_DIR}/impactx/requirements.txt +# cupy: no ROCm 5.2 Python wheels +#python3 -m pip install --upgrade torch --index-url https://download.pytorch.org/whl/rocm5.4.2 +#python3 -m pip install --upgrade optimas[all] diff --git a/docs/source/install/hpc/lumi-csc/lumi_impactx.profile.example b/docs/source/install/hpc/lumi-csc/lumi_impactx.profile.example new file mode 100644 index 000000000..1bd371a17 --- /dev/null +++ b/docs/source/install/hpc/lumi-csc/lumi_impactx.profile.example @@ -0,0 +1,51 @@ +# please set your project account +#export proj="project_..." + +# required dependencies +module load LUMI/23.09 partition/G +module load rocm/5.2.3 +module load buildtools/23.09 + +# optional: just an additional text editor +module load nano + +# optional: for openPMD support +SW_DIR="${HOME}/sw/lumi/gpu" +export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/hdf5-1.14.1.2:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:$CMAKE_PREFIX_PATH +export PATH=${SW_DIR}/adios2-2.8.3/bin:${PATH} +export PATH=${SW_DIR}/hdf5-1.14.1.2/bin:${PATH} + +# optional: for Python bindings or libEnsemble +module load cray-python/3.10.10 + +if [ -d "${SW_DIR}/venvs/impactx-lumi" ] +then + source ${SW_DIR}/venvs/impactx-lumi/bin/activate +fi + +# an alias to request an interactive batch node for one hour +# for paralle execution, start on the batch node: srun +alias getNode="salloc -A $proj -J impactx -t 01:00:00 -p dev-g -N 1 --ntasks-per-node=8 --gpus-per-task=1 --gpu-bind=closest" +# an alias to run a command on a batch node for up to 30min +# usage: runNode +alias runNode="srun -A $proj -J impactx -t 00:30:00 -p dev-g -N 1 --ntasks-per-node=8 --gpus-per-task=1 --gpu-bind=closest" + +# GPU-aware MPI +export MPICH_GPU_SUPPORT_ENABLED=1 + +# optimize ROCm/HIP compilation for MI250X +export AMREX_AMD_ARCH=gfx90a + +# compiler environment hints +# Warning: using the compiler wrappers cc and CC +# instead of amdclang and amdclang++ +# currently results in a significant +# loss of performances +export CC=$(which amdclang) +export CXX=$(which amdclang++) +export FC=$(which amdflang) +export CFLAGS="-I${ROCM_PATH}/include" +export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" +export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" diff --git a/docs/source/install/hpc/lumi-csc/submit.sh b/docs/source/install/hpc/lumi-csc/submit.sh new file mode 100644 index 000000000..afbc9905d --- /dev/null +++ b/docs/source/install/hpc/lumi-csc/submit.sh @@ -0,0 +1,81 @@ +#!/bin/bash -l + +#SBATCH -A +#SBATCH --nodes=1 +#SBATCH --time=00:10:00 +#SBATCH --job-name=impactx +#SBATCH --output=ImpactX.o%j +#SBATCH --error=ImpactX.e%j +#SBATCH --partition=standard-g +#SBATCH --ntasks-per-node=8 +#SBATCH --gpus-per-node=8 + +# executable & inputs file or ... +EXE=./impactx +INPUTS=inputs +# ... python interpreter & PICMI script here +#EXE=python3 +#INPUTS=run_fodo.py + +date + +# note (12-12-22) +# this environment setting is currently needed on LUMI to work-around a +# known issue with Libfabric +#export FI_MR_CACHE_MAX_COUNT=0 # libfabric disable caching +# or, less invasive: +export FI_MR_CACHE_MONITOR=memhooks # alternative cache monitor + +# Seen since August 2023 seen on OLCF (not yet seen on LUMI?) +# OLCFDEV-1597: OFI Poll Failed UNDELIVERABLE Errors +# https://docs.olcf.ornl.gov/systems/frontier_user_guide.html#olcfdev-1597-ofi-poll-failed-undeliverable-errors +#export MPICH_SMP_SINGLE_COPY_MODE=NONE +#export FI_CXI_RX_MATCH_MODE=software + +# note (9-2-22, OLCFDEV-1079) +# this environment setting is needed to avoid that rocFFT writes a cache in +# the home directory, which does not scale. +export ROCFFT_RTC_CACHE_PATH=/dev/null + +# Seen since August 2023 +# OLCFDEV-1597: OFI Poll Failed UNDELIVERABLE Errors +# https://docs.olcf.ornl.gov/systems/frontier_user_guide.html#olcfdev-1597-ofi-poll-failed-undeliverable-errors +export MPICH_SMP_SINGLE_COPY_MODE=NONE +export FI_CXI_RX_MATCH_MODE=software + +# LUMI documentation suggests using the following wrapper script +# to set the ROCR_VISIBLE_DEVICES to the value of SLURM_LOCALID +# see https://docs.lumi-supercomputer.eu/runjobs/scheduled-jobs/lumig-job/ +cat << EOF > select_gpu +#!/bin/bash + +export ROCR_VISIBLE_DEVICES=\$SLURM_LOCALID +exec \$* +EOF + +chmod +x ./select_gpu + +sleep 1 + +# LUMI documentation suggests using the following CPU bind +# in order to have 6 threads per GPU (blosc compression in adios2 uses threads) +# see https://docs.lumi-supercomputer.eu/runjobs/scheduled-jobs/lumig-job/ +# +# WARNING: the following CPU_BIND options don't work on the dev-g partition. +# If you want to run your simulation on dev-g, please comment them +# out and replace them with CPU_BIND="map_cpu:49,57,17,25,1,9,33,41" +# +CPU_BIND="mask_cpu:7e000000000000,7e00000000000000" +CPU_BIND="${CPU_BIND},7e0000,7e000000" +CPU_BIND="${CPU_BIND},7e,7e00" +CPU_BIND="${CPU_BIND},7e00000000,7e0000000000" + +export OMP_NUM_THREADS=6 + +export MPICH_GPU_SUPPORT_ENABLED=1 + +srun --cpu-bind=${CPU_BIND} ./select_gpu \ + ${EXE} ${INPUTS} \ + | tee outputs.txt + +rm -rf ./select_gpu diff --git a/docs/source/install/hpc/lumi.rst b/docs/source/install/hpc/lumi.rst new file mode 100644 index 000000000..56ee01ab4 --- /dev/null +++ b/docs/source/install/hpc/lumi.rst @@ -0,0 +1,216 @@ +.. _building-lumi: + +LUMI (CSC) +========== + +The `LUMI cluster `__ is located at CSC (Finland). +Each node contains 4 AMD MI250X GPUs, each with 2 Graphics Compute Dies (GCDs) for a total of 8 GCDs per node. +You can think of the 8 GCDs as 8 separate GPUs, each having 64 GB of high-bandwidth memory (HBM2E). + +Introduction +------------ + +If you are new to this system, **please see the following resources**: + +* `Lumi user guide `__ + + * `Project Maintainance `__ and `SSH Key management `__ + * `Quotas and projects `__ +* Batch system: `Slurm `__ +* `Data analytics and visualization `__ +* `Production directories `__: + + * ``$HOME``: single user, intended to store user configuration files and personal data (20GB default quota) + * ``/project/$proj``: shared with all members of a project, purged at the end of a project (50 GB default quota) + * ``/scratch/$proj``: temporary storage, main storage to be used for disk I/O needs when running simulations on LUMI, purged every 90 days (50TB default quota) + + +.. _building-lumi-preparation: + +Preparation +----------- + +Use the following commands to download the ImpactX source code: + +.. code-block:: bash + + git clone https://github.com/ECP-WarpX/impactx.git $HOME/src/impactx + +We use system software modules, add environment hints and further dependencies via the file ``$HOME/lumi_impactx.profile``. +Create it now: + +.. code-block:: bash + + cp $HOME/src/impactx/docs/source/install/hpc/lumi-csc/lumi_impactx.profile.example $HOME/lumi_impactx.profile + +.. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down + + .. literalinclude:: lumi-csc/lumi_impactx.profile.example + :language: bash + +Edit the 2nd line of this script, which sets the ``export proj="project_..."`` variable using a text editor +such as ``nano``, ``emacs``, or ``vim`` (all available by default on LUMI login nodes). +You can find out your project name by running ``lumi-ldap-userinfo`` on LUMI. +For example, if you are member of the project ``project_465000962``, then run ``nano $HOME/lumi_impactx.profile`` and edit line 2 to read: + +.. code-block:: bash + + export proj="project_465000962" + +Exit the ``nano`` editor with ``Ctrl`` + ``O`` (save) and then ``Ctrl`` + ``X`` (exit). + +.. important:: + + Now, and as the first step on future logins to LUMI, activate these environment settings: + + .. code-block:: bash + + source $HOME/lumi_impactx.profile + +Finally, since LUMI does not yet provide software modules for some of our dependencies, install them once: + +.. code-block:: bash + + bash $HOME/src/impactx/docs/source/install/hpc/lumi-csc/install_dependencies.sh + source $HOME/sw/lumi/gpu/venvs/impactx-lumi/bin/activate + +.. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down + + .. literalinclude:: lumi-csc/install_dependencies.sh + :language: bash + + +.. _building-lumi-compilation: + +Compilation +----------- + +Use the following :ref:`cmake commands ` to compile the application executable: + +.. code-block:: bash + + cd $HOME/src/impactx + rm -rf build_lumi + + cmake -S . -B build_lumi -DImpactX_COMPUTE=HIP + cmake --build build_lumi -j 16 + +The ImpactX application executables are now in ``$HOME/src/impactx/build_lumi/bin/``. +Additionally, the following commands will install ImpactX as a Python module: + +.. code-block:: bash + + rm -rf build_lumi_py + + cmake -S . -B build_lumi_py -DImpactX_COMPUTE=HIP -DImpactX_PYTHON=ON + cmake --build build_lumi_py -j 16 --target pip_install + + +.. _building-lumi-update: + +Update ImpactX & Dependencies +--------------------------- + +If you already installed ImpactX in the past and want to update it, start by getting the latest source code: + +.. code-block:: bash + + cd $HOME/src/impactx + + # read the output of this command - does it look ok? + git status + + # get the latest ImpactX source code + git fetch + git pull + + # read the output of these commands - do they look ok? + git status + git log # press q to exit + +And, if needed, + +- :ref:`update the lumi_impactx.profile file `, +- log out and into the system, activate the now updated environment profile as usual, +- :ref:`execute the dependency install scripts `. + +As a last step, clean the build directory ``rm -rf $HOME/src/impactx/build_lumi`` and rebuild ImpactX. + + +.. _running-cpp-lumi: + +Running +------- + +.. _running-cpp-lumi-MI250X-GPUs: + +MI250X GPUs (2x64 GB) +^^^^^^^^^^^^^^^^^^^^^ + +The GPU partition on the supercomputer LUMI at CSC has up to `2978 nodes `__, each with 8 Graphics Compute Dies (GCDs). +ImpactX runs one MPI rank per Graphics Compute Die. + +For interactive runs, simply use the aliases ``getNode`` or ``runNode ...``. + +The batch script below can be used to run an ImpactX simulation on multiple nodes (change ``-N`` accordingly). +Replace descriptions between chevrons ``<>`` by relevant values, for instance ```` or the concete inputs file. +Copy the executable or point to it via ``EXE`` and adjust the path for the ``INPUTS`` variable accordingly. + +.. literalinclude:: lumi-csc/submit.sh + :language: bash + :caption: You can copy this file from ``lumi-csc/submit.sh``. + + +.. _post-processing-lumi: + +Post-Processing +--------------- + +.. note:: + + TODO: Document any Jupyter or data services. + + +.. _known-lumi-issues: + +Known System Issues +------------------- + +.. warning:: + + December 12th, 2022: + There is a caching bug in libFabric that causes ImpactX simulations to occasionally hang on LUMI on more than 1 node. + + As a work-around, please export the following environment variable in your job scripts until the issue is fixed: + + .. code-block:: bash + + #export FI_MR_CACHE_MAX_COUNT=0 # libfabric disable caching + # or, less invasive: + export FI_MR_CACHE_MONITOR=memhooks # alternative cache monitor + +.. warning:: + + January, 2023: + We discovered a regression in AMD ROCm, leading to 2x slower current deposition (and other slowdowns) in ROCm 5.3 and 5.4. + + June, 2023: + Although a fix was planned for ROCm 5.5, we still see the same issue in this release and continue to exchange with AMD and HPE on the issue. + + Stay with the ROCm 5.2 module to avoid a 2x slowdown. + +.. warning:: + + May 2023: + rocFFT in ROCm 5.1-5.3 tries to `write to a cache `__ in the home area by default. + This does not scale, disable it via: + + .. code-block:: bash + + export ROCFFT_RTC_CACHE_PATH=/dev/null