-
Notifications
You must be signed in to change notification settings - Fork 0
/
openmpiscript
executable file
·227 lines (191 loc) · 7.05 KB
/
openmpiscript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env bash
##**************************************************************
##
## Copyright (C) 1990-2017, Condor Team, Computer Sciences Department,
## University of Wisconsin-Madison, WI.
##
## Licensed under the Apache License, Version 2.0 (the "License"); you
## may not use this file except in compliance with the License. You may
## obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
##**************************************************************
# This is a script to run OpenMPI jobs under the HTCondor parallel universe.
# OpenMPI assumes that a full install is available on all execute nodes.
## sample submit script
#universe = parallel
#executable = openmpiscript
#arguments = actual_mpi_job arg1 arg2 arg3
#getenv = true
#
#should_transfer_files = yes
#transfer_input_files = actual_mpi_job
#when_to_transfer_output = on_exit_or_evict
#
#output = out.$(NODE)
#error = err.$(NODE)
#log = log
#
#notification = never
#machine_count = 8
#queue
##
# Fail if any command fails
set -x
## configuration notes
# $MPDIR points to the location of the OpenMPI install
# You may set it manually (not recommended)
#MPDIR=/usr/lib64/openmpi
# The pool admin may set it via OPENMPI_INSTALL_PATH in the condor_config (recommended)
MPDIR=$(condor_config_val OPENMPI_INSTALL_PATH)
# $EXINT is a comma-delimited list of excluded network interfaces.
# If your mpi jobs are hanging, OpenMPI may be trying to use too many
# network interfaces to communicate between nodes.
# You may set it manually (not recommended)
#EXINT="docker0,virbr0"
# The pool admin may set it via OPENMPI_EXCLUDE_NETWORK_INTERFACES in the condor_config (recommended)
EXINT=$(condor_config_val OPENMPI_EXCLUDE_NETWORK_INTERFACES)
# We recommend that your pool admin use MOUNT_UNDER_SCRATCH = /tmp
# so that OpenMPI caches all data under the user's scratch directory.
# Not having /tmp mounted under scratch can also lead to unlink errors,
# which may hang mpi jobs.
_USE_SCRATCH=$(condor_config_val MOUNT_UNDER_SCRATCH)
if [ -z $_USE_SCRATCH ]; then
echo "WARNING: MOUNT_UNDER_SCRATCH not set in condor_config"
elif test "${_USE_SCRATCH#*/tmp}" == "$_USE_SCRATCH"; then
echo "WARNING: /tmp not included in MOUNT_UNDER_SCRATCH"
fi
# If MPDIR is not set, then use a default value
if [ -z $MPDIR ]; then
echo "WARNING: Using default value for \$MPDIR in openmpiscript"
MPDIR=/usr/lib64/openmpi
fi
PATH=$MPDIR/bin:.:$PATH
export PATH
export OMP_NUM_THREADS=1
# If EXINT is not set, then use some default values
if [ -z $EXINT ]; then
echo "WARNING: Using default values for \$EXINT in openmpiscript"
EXINT="docker0,virbr0"
fi
# The condor_ssh and sshd.sh helper scripts reside in $(LIBEXEC)
CONDOR_SSH=$(condor_config_val libexec)
CONDOR_SSH=$CONDOR_SSH/condor_ssh
SSHD_SH=<home_dir>/<nicer_path>
SSHD_SH=$SSHD_SH/sshd.sh
##
# Set up SSHD on the node
. $SSHD_SH $_CONDOR_PROCNO $_CONDOR_NPROCS
# Set up mpirun cleanup function
_MPIRUN_PID=0
mpirun_cleanup() {
echo "Caught SIGTERM, cleaning up..."
if [ "$_MPIRUN_PID" -ne "0" ]; then
# Send SIGTERM to mpirun
echo "Sending SIGTERM to mpirun (${_MPIRUN_PID})..."
kill -s SIGTERM $_MPIRUN_PID
# Give mpirun 60 seconds to exit nicely before proceeding
echo "Waiting for mpirun to exit..."
for i in {1..12}; do
kill -0 $_MPIRUN_PID 2> /dev/null # Returns 0 if PID is running
if [ "$?" -ne "0" ]; then
break
fi
sleep 5
done
fi
# Cleanup sshd
echo "Cleaning up sshd files..."
sshd_cleanup
rm -f machines
echo "Exiting early."
exit 1
}
# If not the head node, just sleep forever to let the SSHDs run
if [ $_CONDOR_PROCNO -ne 0 ]
then
wait
sshd_cleanup
exit 0
# If the head node, then set the trap to cleanup mpirun (also does sshd_cleanup)
else
trap mpirun_cleanup SIGTERM
fi
EXECUTABLE=$1
shift
# The binary is copied but the executable flag may be cleared.
chmod +x $EXECUTABLE
# Set the location of the contact file
CONDOR_CONTACT_FILE=$_CONDOR_SCRATCH_DIR/contact
export CONDOR_CONTACT_FILE
# The second field in the contact file contains the node ranks.
# mpirun will use a list a of these node ranks,
# and condor_ssh will translate them into a hostname:port.
_CONDOR_JOB_AD=.job.ad
_CONDOR_REQUEST_CPUS=$(condor_q -jobads $_CONDOR_JOB_AD -af RequestCpus)
sort -n -k 1 < $CONDOR_CONTACT_FILE | awk '{print $1 " slots='$_CONDOR_REQUEST_CPUS'"}' > machines
# Check for which ssh agent to use because one or the other
# have each been deprecated at one OpenMPI version or another.
_MCA_FAIL=true
# Added on 09/12/18
#export LIGO_DATAFIND_SERVER=$LIGO_DATAFIND_SERVER
#export X509_USER_PROXY=$X509_USER_PROXY
set +x
##
for mca_ssh_agent in orte_rsh_agent plm_rsh_agent
do
if $(ompi_info -a | grep \"${mca_ssh_agent}\" 1>/dev/null 2>&1)
then
if $(ompi_info -a | grep \"${mca_ssh_agent}\" | grep deprecated 1>/dev/null 2>&1); then continue; fi
_MCA_FAIL=false
# set MCA values for running on HTCondor
export OMPI_MCA_plm_rsh_no_tree_spawn="true" # disable ssh tree spawn
export OMPI_MCA_btl_tcp_if_exclude="lo,$EXINT" # exclude network interfaces
# Added on 09/12/18
export LIGO_DATAFIND_SERVER=$LIGO_DATAFIND_SERVER
export X509_USER_PROXY=$X509_USER_PROXY
##
# optionally set MCA values for increasing mpirun verbosity
# export OMPI_MCA_plm_base_verbose=30
# export OMPI_MCA_btl_base_verbose=30
# run mpirun in the background and wait for it to exit
#mpirun -v --prefix $MPDIR --mca $mca_ssh_agent $CONDOR_SSH -n $(( $_CONDOR_NPROCS * $_CONDOR_REQUEST_CPUS )) -hostfile machines -x PYTHONPATH -x PATH -x OMP_NUM_THREADS -x LD_LIBRARY_PATH -x LAL_DATA_PATH $EXECUTABLE $@ &
#_MPIRUN_PID=$!
#wait $_MPIRUN_PID
# run mpirun in the background and wait for it to exit
# Replaced the above on 09/12/18
export _CONDOR_NPROCS _CONDOR_REQUEST_CPUS
echo "=== About to start mpirun ==="
echo "Contents of machines:"
cat machines
echo "Working directory:"
pwd
/usr/lib64/openmpi/bin/mpirun -v --prefix $MPDIR --mca $mca_ssh_agent $CONDOR_SSH -n $(( $_CONDOR_NPROCS * $_CONDOR_REQUEST_CPUS )) -hostfile machines -x _CONDOR_NPROCS -x _CONDOR_REQUEST_CPUS -x PYTHONPATH -x PATH -x LD_LIBRARY_PATH $EXECUTABLE $@
mpi_return=$?
if [ $mpi_return -ne 0 ]; then
sshd_cleanup
rm -f machines
echo "ERROR: mpirun exited with code ${mpi_return}"
exit ${mpi_return}
fi
##
break
fi
done
if $_MCA_FAIL
then
echo could not find a suitable MCA ssh agent
exit 255
fi
sshd_cleanup
rm -f machines
#exit $?
# Replaced on -9/12/18
exit $mpi_return