Skip to content

Commit

Permalink
Merge pull request #295 from ammarhakim/main_perlmutterMachineFiles
Browse files Browse the repository at this point in the history
  • Loading branch information
ammarhakim authored Mar 7, 2024
2 parents d7d663b + c9e2b50 commit f8a2b1a
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 6 deletions.
17 changes: 14 additions & 3 deletions machines/configure.perlmutter.gpu.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
#.MF 2024/03/07: At the time we got this to work I had the following modules loaded
#. 1) craype-x86-milan 7) cpe/23.12 13) craype-accel-nvidia80
#. 2) libfabric/1.15.2.0 8) gpu/1.0 14) cray-mpich/8.1.28 (mpi)
#. 3) craype-network-ofi 9) craype/2.7.30 (c) 15) cudatoolkit/12.0 (g)
#. 4) xpmem/2.6.2-2.5_2.38__gd067c3f.shasta 10) cray-dsmml/0.2.2 16) nccl/2.18.3-cu12
#. 5) gcc-native/12.3 11) cray-libsci/23.12.5 (math)
#. 6) perftools-base/23.12.0 12) PrgEnv-gnu/8.5.0 (cpe)
#.Most of these are loaded by default, so we just load some extra/key ones here.
module load PrgEnv-gnu/8.5.0
module load craype-accel-nvidia80
module load cray-mpich/8.1.28
module load cudatoolkit/12.0
module load openmpi/5.0.0rc12
module load nccl/2.18.3-cu12
module unload darshan

: "${PREFIX:=$HOME/gkylsoft}"
./configure CC=nvcc ARCH_FLAGS="-march=native" CUDA_ARCH=80 --prefix=$PREFIX --lapack-inc=$PREFIX/OpenBLAS/include --lapack-lib=$PREFIX/OpenBLAS/lib/libopenblas.a --superlu-inc=$PREFIX/superlu/include --superlu-lib=$PREFIX/superlu/lib/libsuperlu.a --cudamath-libdir=/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/math_libs/12.0/lib64 --use-mpi=yes --mpi-inc=$OPENMPI_ROOT/include --mpi-lib=$OPENMPI_ROOT/lib --use-nccl=yes --nccl-inc=$NCCL_DIR/include --nccl-lib=$NCCL_DIR/lib;

./configure CC=nvcc ARCH_FLAGS="-march=native" CUDA_ARCH=80 --prefix=$PREFIX --lapack-inc=$PREFIX/OpenBLAS/include --lapack-lib=$PREFIX/OpenBLAS/lib/libopenblas.a --superlu-inc=$PREFIX/superlu/include --superlu-lib=$PREFIX/superlu/lib/libsuperlu.a --cudamath-libdir=/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/math_libs/12.0/lib64 --use-mpi=yes --mpi-inc=$CRAY_MPICH_DIR/include --mpi-lib=$CRAY_MPICH_DIR/lib --use-nccl=yes --nccl-inc=$NCCL_DIR/include --nccl-lib=$NCCL_DIR/lib;


51 changes: 51 additions & 0 deletions machines/jobScript.perlmutter-gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash -l

#.Declare a name for this job, preferably with 16 or fewer characters.
#SBATCH -J <Job Name>
#SBATCH -A <Account Number>

#.Request the queue (enter the possible names, if omitted, default is the default)
#.this job is going to use the default
#SBATCH -q regular

#.Number of nodes to request (Perlmutter has 64 cores and 4 GPUs per node)
#SBATCH -N 2
#SBATCH --ntasks 8

#.Specify GPU needs:
#SBATCH --constraint gpu
#SBATCH --gpus 8

#.Request wall time
#SBATCH -t 00:30:00

#.Mail is sent to you when the job starts and when it terminates or aborts.
#SBATCH --mail-user=<your email>
#SBATCH --mail-type=END,FAIL,REQUEUE

#.Load modules (this must match those in the machines/configure script).
module load PrgEnv-gnu/8.5.0
module load craype-accel-nvidia80
module load cray-mpich/8.1.28
module load cudatoolkit/12.0
module load nccl/2.18.3-cu12

#.On Perlmutter some jobs get warnings about DVS_MAXNODES (used in file stripping).
#.We set it to 24 for now, but really this depends on the amount/size of I/O being performed.
#.See online NERSC docs and the intro_mpi man page.
export DVS_MAXNODES=24_
export MPICH_MPIIO_DVS_MAXNODES=24

#.Run the rt_gk_sheath_2x2v_p1 executable using 1 GPU along x (-c 1) and 8
#.GPUs along the field line (-d 8). See './rt_gk_sheath_2x2v_p1 -h' for
#.more details/options on decomposition. It also assumes the executable is
#.in the present directory. If it isn't, change `./` to point to the
#.directory containing the executable.

echo "srun -u -n 8 --gpus 8 ./rt_gk_sheath_2x2v_p1 -g -M -c 1 -d 8"
srun -u -n 8 --gpus 8 ./rt_gk_sheath_2x2v_p1 -g -M -c 1 -d 8





17 changes: 14 additions & 3 deletions machines/mkdeps.perlmutter.gpu.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
#.MF 2024/03/07: At the time we got this to work I had the following modules loaded
#. 1) craype-x86-milan 7) cpe/23.12 13) craype-accel-nvidia80
#. 2) libfabric/1.15.2.0 8) gpu/1.0 14) cray-mpich/8.1.28 (mpi)
#. 3) craype-network-ofi 9) craype/2.7.30 (c) 15) cudatoolkit/12.0 (g)
#. 4) xpmem/2.6.2-2.5_2.38__gd067c3f.shasta 10) cray-dsmml/0.2.2 16) nccl/2.18.3-cu12
#. 5) gcc-native/12.3 11) cray-libsci/23.12.5 (math)
#. 6) perftools-base/23.12.0 12) PrgEnv-gnu/8.5.0 (cpe)
#.Most of these are loaded by default, so we just load some extra/key ones here.
module load PrgEnv-gnu/8.5.0
module load craype-accel-nvidia80
module load cray-mpich/8.1.28
module load cudatoolkit/12.0
module load openmpi/5.0.0rc12
module load nccl/2.18.3-cu12
module unload darshan
cd install-deps

: "${PREFIX:=$HOME/gkylsoft}"

cd install-deps
./mkdeps.sh --build-openblas=yes --build-superlu=yes --prefix=$PREFIX --build-openmpi=no MPICC=mpicc MPICXX=mpicxx

0 comments on commit f8a2b1a

Please sign in to comment.