-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #295 from ammarhakim/main_perlmutterMachineFiles
- Loading branch information
Showing
3 changed files
with
79 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,19 @@ | ||
#.MF 2024/03/07: At the time we got this to work I had the following modules loaded | ||
#. 1) craype-x86-milan 7) cpe/23.12 13) craype-accel-nvidia80 | ||
#. 2) libfabric/1.15.2.0 8) gpu/1.0 14) cray-mpich/8.1.28 (mpi) | ||
#. 3) craype-network-ofi 9) craype/2.7.30 (c) 15) cudatoolkit/12.0 (g) | ||
#. 4) xpmem/2.6.2-2.5_2.38__gd067c3f.shasta 10) cray-dsmml/0.2.2 16) nccl/2.18.3-cu12 | ||
#. 5) gcc-native/12.3 11) cray-libsci/23.12.5 (math) | ||
#. 6) perftools-base/23.12.0 12) PrgEnv-gnu/8.5.0 (cpe) | ||
#.Most of these are loaded by default, so we just load some extra/key ones here. | ||
module load PrgEnv-gnu/8.5.0 | ||
module load craype-accel-nvidia80 | ||
module load cray-mpich/8.1.28 | ||
module load cudatoolkit/12.0 | ||
module load openmpi/5.0.0rc12 | ||
module load nccl/2.18.3-cu12 | ||
module unload darshan | ||
|
||
: "${PREFIX:=$HOME/gkylsoft}" | ||
./configure CC=nvcc ARCH_FLAGS="-march=native" CUDA_ARCH=80 --prefix=$PREFIX --lapack-inc=$PREFIX/OpenBLAS/include --lapack-lib=$PREFIX/OpenBLAS/lib/libopenblas.a --superlu-inc=$PREFIX/superlu/include --superlu-lib=$PREFIX/superlu/lib/libsuperlu.a --cudamath-libdir=/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/math_libs/12.0/lib64 --use-mpi=yes --mpi-inc=$OPENMPI_ROOT/include --mpi-lib=$OPENMPI_ROOT/lib --use-nccl=yes --nccl-inc=$NCCL_DIR/include --nccl-lib=$NCCL_DIR/lib; | ||
|
||
./configure CC=nvcc ARCH_FLAGS="-march=native" CUDA_ARCH=80 --prefix=$PREFIX --lapack-inc=$PREFIX/OpenBLAS/include --lapack-lib=$PREFIX/OpenBLAS/lib/libopenblas.a --superlu-inc=$PREFIX/superlu/include --superlu-lib=$PREFIX/superlu/lib/libsuperlu.a --cudamath-libdir=/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/math_libs/12.0/lib64 --use-mpi=yes --mpi-inc=$CRAY_MPICH_DIR/include --mpi-lib=$CRAY_MPICH_DIR/lib --use-nccl=yes --nccl-inc=$NCCL_DIR/include --nccl-lib=$NCCL_DIR/lib; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/bin/bash -l | ||
|
||
#.Declare a name for this job, preferably with 16 or fewer characters. | ||
#SBATCH -J <Job Name> | ||
#SBATCH -A <Account Number> | ||
|
||
#.Request the queue (enter the possible names, if omitted, default is the default) | ||
#.this job is going to use the default | ||
#SBATCH -q regular | ||
|
||
#.Number of nodes to request (Perlmutter has 64 cores and 4 GPUs per node) | ||
#SBATCH -N 2 | ||
#SBATCH --ntasks 8 | ||
|
||
#.Specify GPU needs: | ||
#SBATCH --constraint gpu | ||
#SBATCH --gpus 8 | ||
|
||
#.Request wall time | ||
#SBATCH -t 00:30:00 | ||
|
||
#.Mail is sent to you when the job starts and when it terminates or aborts. | ||
#SBATCH --mail-user=<your email> | ||
#SBATCH --mail-type=END,FAIL,REQUEUE | ||
|
||
#.Load modules (this must match those in the machines/configure script). | ||
module load PrgEnv-gnu/8.5.0 | ||
module load craype-accel-nvidia80 | ||
module load cray-mpich/8.1.28 | ||
module load cudatoolkit/12.0 | ||
module load nccl/2.18.3-cu12 | ||
|
||
#.On Perlmutter some jobs get warnings about DVS_MAXNODES (used in file stripping). | ||
#.We set it to 24 for now, but really this depends on the amount/size of I/O being performed. | ||
#.See online NERSC docs and the intro_mpi man page. | ||
export DVS_MAXNODES=24_ | ||
export MPICH_MPIIO_DVS_MAXNODES=24 | ||
|
||
#.Run the rt_gk_sheath_2x2v_p1 executable using 1 GPU along x (-c 1) and 8 | ||
#.GPUs along the field line (-d 8). See './rt_gk_sheath_2x2v_p1 -h' for | ||
#.more details/options on decomposition. It also assumes the executable is | ||
#.in the present directory. If it isn't, change `./` to point to the | ||
#.directory containing the executable. | ||
|
||
echo "srun -u -n 8 --gpus 8 ./rt_gk_sheath_2x2v_p1 -g -M -c 1 -d 8" | ||
srun -u -n 8 --gpus 8 ./rt_gk_sheath_2x2v_p1 -g -M -c 1 -d 8 | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,18 @@ | ||
#.MF 2024/03/07: At the time we got this to work I had the following modules loaded | ||
#. 1) craype-x86-milan 7) cpe/23.12 13) craype-accel-nvidia80 | ||
#. 2) libfabric/1.15.2.0 8) gpu/1.0 14) cray-mpich/8.1.28 (mpi) | ||
#. 3) craype-network-ofi 9) craype/2.7.30 (c) 15) cudatoolkit/12.0 (g) | ||
#. 4) xpmem/2.6.2-2.5_2.38__gd067c3f.shasta 10) cray-dsmml/0.2.2 16) nccl/2.18.3-cu12 | ||
#. 5) gcc-native/12.3 11) cray-libsci/23.12.5 (math) | ||
#. 6) perftools-base/23.12.0 12) PrgEnv-gnu/8.5.0 (cpe) | ||
#.Most of these are loaded by default, so we just load some extra/key ones here. | ||
module load PrgEnv-gnu/8.5.0 | ||
module load craype-accel-nvidia80 | ||
module load cray-mpich/8.1.28 | ||
module load cudatoolkit/12.0 | ||
module load openmpi/5.0.0rc12 | ||
module load nccl/2.18.3-cu12 | ||
module unload darshan | ||
cd install-deps | ||
|
||
: "${PREFIX:=$HOME/gkylsoft}" | ||
|
||
cd install-deps | ||
./mkdeps.sh --build-openblas=yes --build-superlu=yes --prefix=$PREFIX --build-openmpi=no MPICC=mpicc MPICXX=mpicxx |