File tree 3 files changed +18
-12
lines changed
aws-parallelcluster-computefleet
templates/compute_fleet_status
aws-parallelcluster-slurm/libraries
3 files changed +18
-12
lines changed Original file line number Diff line number Diff line change 38
38
mode '0755'
39
39
end
40
40
41
+ template "/usr/local/bin/is_fleet_ready.sh" do
42
+ source 'compute_fleet_status/is_fleet_ready.erb'
43
+ owner 'root'
44
+ group 'root'
45
+ mode '0755'
46
+ end
47
+
41
48
template "#{ node [ 'cluster' ] [ 'etc_dir' ] } /clusterstatusmgtd.conf" do
42
49
source 'clusterstatusmgtd/clusterstatusmgtd.conf.erb'
43
50
owner 'root'
Original file line number Diff line number Diff line change
1
+ #! /bin/bash
2
+
3
+ sinfo_output=$( < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/sinfo -h -o ' %N %t' | grep -v -E ' (idle|alloc|mix|maint)$' )
4
+ while IFS= read -r line; do
5
+ nodelist=$( echo " $line " | awk ' {print $1}' )
6
+ < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/scontrol show hostnames " $nodelist " | { grep -E ' ^[a-z0-9\-]+\-st\-[a-z0-9\-]+\-[0-9]+.*' || true ; }
7
+ done <<< " $sinfo_output"
Original file line number Diff line number Diff line change @@ -204,18 +204,10 @@ def check_for_protected_mode(fleet_status_command) # rubocop:disable Lint/Nested
204
204
"/usr/local/bin/get-compute-fleet-status.sh"
205
205
)
206
206
# Example output for sinfo
207
- # $ /opt/slurm/bin/sinfo -N -h -o '%N %t'
208
- # ondemand-dy-c52xlarge-1 idle~
209
- # ondemand-dy-c52xlarge-2 idle~
210
- # spot-dy-c5xlarge-1 idle~
211
- # spot-st-t2large-1 down
212
- # spot-st-t2large-2 idle
213
- # capacity-block-st-t2micro-1 maint
214
- # capacity-block-dy-t2micro-1 maint
215
- is_fleet_ready_command = Shellwords . escape (
216
- "set -o pipefail && #{ node [ 'cluster' ] [ 'slurm' ] [ 'install_dir' ] } /bin/sinfo -N -h -o '%N %t' | { grep -E '^[a-z0-9\\ -]+\\ -st\\ -[a-z0-9\\ -]+\\ -[0-9]+ .*' || true; } | { grep -v -E '(idle|alloc|mix|maint)$' || true; }"
217
- )
218
- until shell_out! ( "/bin/bash -c #{ is_fleet_ready_command } " ) . stdout . strip . empty?
207
+ # sinfo -h -o '%N %t'
208
+ # queue-0-dy-compute-resource-g4dn-0-[1-10],queue-1-dy-compute-resource-g4dn-1-[1-10] idle~
209
+ # queue-2-dy-compute-resource-g4dn-2-[1-10],queue-3-dy-compute-resource-g4dn-3-[1-10] idle
210
+ until shell_out! ( "/bin/bash -c /usr/local/bin/is_fleet_ready.sh" ) . stdout . strip . empty?
219
211
check_for_protected_mode ( fleet_status_command )
220
212
221
213
Chef ::Log . info ( "Waiting for static fleet capacity provisioning" )
You can’t perform that action at this time.
0 commit comments