-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_ensemble.sh
executable file
·35 lines (30 loc) · 1.51 KB
/
run_ensemble.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
# Choose which executable to run
EXE_VERSION=MMAP # or "default" or "MMAP_2MB" or "MMAP_1GB"
# The problem size needs to be large enough to give repeatable, near-asymptotic performance
# Using 24 cores on a single Xeon Platinum 8160, performance is close to asymptotic for
# sizes of 12000 or larger.
# The (internal) iteration count should be large enough to distinguish between runs with
# consistently slow performance and runs with slow performance due to external interference.
# The default of 12 yields 11 timed iterations, which is usually sufficient to identify
# systematically slow runs.
# The number of trials will typically be at least 200, since only one of 100-200
# runs shows a strong snoop filter conflict. If this is being run on multiple
# servers, only the aggregate (#nodes * NUMTRIALS) needs to be large. Most
# of the results in the paper included 651 trials (21 trials on each of 31 nodes).
SIZE=12000
ITERS=12
NUMTRIALS=200
MAXTRIAL=$(( $NUMTRIALS - 1 ))
# All cores on a single socket of a Xeon Platinum 8160 -- modify if desired
export MKL_NUM_THREADS=24
# Force the threads to spread out as much as possible
export KMP_AFFINITY=scatter
# The "numactl" command is set up to bind the data and executing threads to socket 0,
# but any socket (or multiple sockets) can be used.
for TRIAL in `seq 0 $MAXTRIAL`
do
LABEL=`printf %.3d $TRIAL`
# echo $LABEL
time numactl --membind=0 --cpunodebind=0 ./dgemm_test_${EXE_VERSION}.exe $SIZE $ITERS > log.${EXE_VERSION}.$LABEL
done