-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathnodeinfo
executable file
·144 lines (129 loc) · 3.69 KB
/
nodeinfo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/bin/bash
# Compile node info
# Sync to /project/rivanna_software/public
# Ruoshi Sun
# 2021-03-09
ALL_NAME=()
ALL_SYSTEM=()
ALL_CORE=()
ALL_MEMORY=()
ALL_NUMNODE=()
ALL_GPUNAME=()
ALL_GPUMEMORY=()
ALL_GPU=()
function getval {
echo "$1" | awk -v s=$2 -F'=' '$0~s {print $NF}'
}
function append {
for i in ${!ALL_NAME[@]}; do
if [ "$1" = "${ALL_NAME[$i]}" ]; then
ALL_NUMNODE[$i]=$((${ALL_NUMNODE[$i]}+1))
return
fi
done
for i in NAME SYSTEM CORE MEMORY GPUNAME GPUMEMORY GPU; do
eval ALL_${i}+=\("$1"\)
shift
done
ALL_NUMNODE+=(1)
}
function report {
# put GPU nodes at the bottom
# print memory in GB
# replace KNL's 256 with 64 physical cores
# RTX2080 -> RTX2080Ti
echo "System,Cores/Node,Memory/Node,Specialty Hardware,GPU memory/Device,GPU devices/Node,# of Nodes"
for i in ${!ALL_NAME[@]}; do
printf -- "%s,%d,%s,%s,%s,%s,%s\n" \
"${ALL_SYSTEM[$i]}" "${ALL_CORE[$i]}" "$((ALL_MEMORY[$i]/1024))GB" \
"${ALL_GPUNAME[$i]}" "${ALL_GPUMEMORY[$i]}" "${ALL_GPU[$i]}" \
"${ALL_NUMNODE[$i]}"
done | sort -t, -k4 | \
awk -F, -v OFS=, '{if($4!="-") {sub(/^/, "GPU: ", $4); print} else print}' | \
sed -e 's/^256,180GB,-/64,180GB,KNL/' \
-e 's/RTX2080/RTX2080Ti/g' \
-e '/112GB/d'
}
NODES=($(scontrol show hostnames $(sinfo -ho "%N")))
#NODES=($(scontrol show hostnames $(sinfo -p interactive -ho "%N")))
NUM_NODES=${#NODES[@]}
for i in ${!NODES[@]}; do
echo -ne "$((i+1))/$NUM_NODES\r"
NODE=${NODES[$i]}
INFO=$(scontrol show node $NODE -o | tr ' ' '\n')
CORE=$(getval "$INFO" CPUTot)
MEMORY=$(getval "$INFO" RealMemory)
PARTITION=$(getval "$INFO" Partitions)
if [[ "$PARTITION" = "gpu" || "$PARTITION" = "bii-gpu" ]]; then
# get first feature
GPUNAME=$(getval "$INFO" AvailableFeatures|sed 's/[,:].*$//')
GPUNAME="${GPUNAME^^}"
GPU=$(getval "$INFO" CfgTRES)
# hardcode GPU memory
case $GPUNAME in
RTX2080)
GPUMEMORY="11GB"
;;
RTX3090)
GPUMEMORY="24GB"
;;
V100)
case $CORE in
40|36)
GPUMEMORY="32GB"
;;
28)
GPUMEMORY="16GB"
;;
esac
;;
A100)
case $MEMORY in
2000000) GPUMEMORY="80GB" ;;
1000000) GPUMEMORY="40GB" ;;
esac
;;
A6000)
GPUMEMORY="48GB"
;;
A40)
GPUMEMORY="48GB"
;;
esac
# all GPUs listed as Afton
SYSTEM=Afton
else
# interactive GPUs
if getval "$INFO" AvailableFeatures|grep -q rtx; then
GPUNAME=$(getval "$INFO" AvailableFeatures|sed 's/[,:].*$//')
GPUNAME="${GPUNAME^^}"
GPU=$(getval "$INFO" CfgTRES)
case $GPUNAME in
RTX2080)
GPUMEMORY="11GB"
;;
RTX3090)
GPUMEMORY="24GB"
;;
esac
SYSTEM=Afton
else
GPUNAME="-"
GPUMEMORY="-"
GPU="-"
SYSTEM=Rivanna
# Rivanna or Afton
# as of 2024-05 Afton only contains 96-core EPYC nodes
if [ $(getval "$INFO" AvailableFeatures|sed 's/[,:].*$//') = "afton" ]; then
if [ $CORE -eq 96 ]; then
SYSTEM=Afton
fi
fi
fi
fi
NAME="${CORE}_${MEMORY}_${GPUNAME}"
append $NAME $SYSTEM $CORE $MEMORY $GPUNAME $GPUMEMORY $GPU
count=$((count+1))
done
echo
report >nodeinfo.csv