-
Notifications
You must be signed in to change notification settings - Fork 18
/
submit.yml.erb
169 lines (162 loc) · 5.06 KB
/
submit.yml.erb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
<%-
cores = num_cores.to_i
if cores == 0 && cluster == "pitzer"
# little optimization for pitzer nodes. They want the whole node, if they chose 'any',
# it can be scheduled on p18 or p20 nodes. If not, they'll get the constraint below.
base_slurm_args = ["--nodes", "1", "--exclusive"]
elsif cores == 0
# full node on owens
cores = 28
base_slurm_args = ["--nodes", "1", "--ntasks-per-node", "28"]
else
base_slurm_args = ["--nodes", "1", "--ntasks-per-node", "#{cores}"]
end
slurm_args = case node_type
when "gpu-40core"
base_slurm_args += ["--constraint", "40core"]
when "gpu-48core"
base_slurm_args += ["--constraint", "48core"]
when "any-40core"
base_slurm_args += ["--constraint", "40core"]
when "any-48core"
base_slurm_args += ["--constraint", "48core"]
when "hugemem"
base_slurm_args += ["--partition", "hugemem"]
when "largemem"
base_slurm_args += ["--partition", "largemem"]
when "debug"
base_slurm_args += ["--partition", "debug"]
else
base_slurm_args
end
-%>
---
batch_connect:
template: "basic"
conn_params:
- csrf_token
<% if cluster =~ /owens|pitzer|ascend/ -%>
script:
<% if node_type =~ /gpu/ -%>
gpus_per_node: 1
<% end -%>
native:
<%- slurm_args.each do |arg| %>
- "<%= arg %>"
<%- end %>
<% elsif cluster =~ /kubernetes/
if node_type =~ /owens/
compute_cluster = "owens"
apps_path = "/usr/local"
# Memory per core with hyperthreading enabled
memory_mb = num_cores.to_i * 2200
elsif node_type =~ /pitzer/
compute_cluster = "pitzer"
apps_path = "/apps"
# Memory per core with hyperthreading enabled
memory_mb = num_cores.to_i * 4000
end
mounts = {
'home' => OodSupport::User.new.home,
'support' => OodSupport::User.new('support').home,
'project' => '/fs/project',
'scratch' => '/fs/scratch',
'ess' => '/fs/ess',
}
-%>
script:
wall_time: "<%= bc_num_hours.to_i * 3600 %>"
<%- if node_type =~ /gpu/ -%>
gpus_per_node: 1
<%- end -%>
native:
container:
name: "rstudio-server"
image: "docker-registry.osc.edu/ondemand/ondemand-base-rhel7:0.11.1"
image_pull_policy: "IfNotPresent"
command: ["/bin/bash","-l","<%= staged_root %>/job_script_content.sh"]
restart_policy: 'OnFailure'
env:
CLUSTER: "<%= compute_cluster %>"
KUBECONFIG: "/dev/null"
TMPDIR: "/tmp"
labels:
osc.edu/cluster: "<%= compute_cluster %>"
port: "8080"
cpu: "<%= num_cores %>"
memory: "<%= memory_mb %>Mi"
configmap:
files:
- filename: "logging.conf"
data: |
[*]
log-level=debug
logger-type=file
log-dir=<%= Pathname.new(staged_root).join('logs') %>
mount_path: '/etc/rstudio/logging.conf'
sub_path: 'logging.conf'
- filename: "database.conf"
data: |
directory=/tmp/lib/rstudio-server
mount_path: '/etc/rstudio/database.conf'
sub_path: 'database.conf'
- filename: 'k8_helper'
data: |
#!/usr/bin/env bash
set -x
KEY=$1
VALUE=$(echo -n $2 | base64)
CFG="$(hostname)-secret"
kubectl get secret ${CFG} -o json | jq --arg key $KEY --arg value $VALUE '.data[$key] = $value' | kubectl apply -f -
mount_path: '/opt/open_ondemand/helpers'
mounts:
<%- mounts.each_pair do |name, mount| -%>
- type: host
name: <%= name %>
host_type: Directory
path: <%= mount %>
destination_path: <%= mount %>
<%- end -%>
- type: host
name: munge-socket
host_type: Socket
path: /var/run/munge/munge.socket.2
destination_path: /var/run/munge/munge.socket.2
- type: host
name: slurm-conf
host_type: Directory
path: /etc/slurm
destination_path: /etc/slurm
- type: host
name: sssd-pipes
host_type: Directory
path: /var/lib/sss/pipes
destination_path: /var/lib/sss/pipes
- type: host
name: sssd-conf
host_type: Directory
path: /etc/sssd
destination_path: /etc/sssd
- type: host
name: nsswitch
host_type: File
path: /etc/nsswitch.conf
destination_path: /etc/nsswitch.conf
- type: host
name: lmod-init
host_type: File
path: /apps/<%= compute_cluster %>/lmod/lmod.sh
destination_path: /etc/profile.d/lmod.sh
- type: host
name: intel
host_type: Directory
path: /nfsroot/<%= compute_cluster %>/opt/intel
destination_path: /opt/intel
- type: host
name: apps
host_type: Directory
path: /apps/<%= compute_cluster %>
destination_path: <%= apps_path %>
node_selector:
node-role.kubernetes.io/ondemand: ''
<% end -%>