-
Notifications
You must be signed in to change notification settings - Fork 0
/
playbook-svc-mgmt.yml
478 lines (404 loc) · 14.4 KB
/
playbook-svc-mgmt.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
- name: Setup OpenHPC slurm+warewulf on mgmt node
hosts: "mgmt.{{domain}}"
become: true
pre_tasks:
- include_tasks: tasks/setup_backup_dir.yml
- include_tasks: tasks/backup_or_restore_host_keys.yml
- include_tasks: tasks/setup_unattended_security_updates.yml
- name: Setup missing Slurm dependencies
ansible.builtin.dnf:
name: ["/bin/mailx", "Lmod"]
- name: Create /etc/webhookd dir
ansible.builtin.file:
state: directory
path: /etc/webhookd
mode: 755
- name: Generate webhookd privatekeys
community.crypto.openssl_privatekey:
path: "{{host_backup_directory}}/{{item}}.key"
loop: [server, client]
delegate_to: localhost
become: false
- name: Create CSR for new certificate
community.crypto.openssl_csr_pipe:
privatekey_path: "{{host_backup_directory}}/{{item}}.key"
use_common_name_for_san: false
common_name: local CA
subject_alt_name:
- "DNS:mgmt.{{domain}}"
- "DNS:{{hostvars[inventory_hostname]['ansible_default_ipv4']['address']}}"
register: csr
loop: [server, client]
delegate_to: localhost
become: false
- name: Generate webhookd publickeys
community.crypto.x509_certificate:
privatekey_path: "{{host_backup_directory}}/{{item.item}}.key"
path: "{{host_backup_directory}}/{{item.item}}.pem"
csr_content: "{{item.csr}}"
provider: selfsigned
selfsigned_not_after: "+3650d"
loop: "{{csr.results}}"
delegate_to: localhost
become: false
- name: Upload webhookd keypairs
ansible.builtin.copy:
src: "{{host_backup_directory}}/{{item}}"
dest: "/etc/webhookd/{{item}}"
mode: 600
loop: [server.pem, server.key, client.pem, client.key]
- name: Allow binding webhook port in SELinux
community.general.seport:
ports: "{{mgmt_webhook_port}}"
proto: tcp
setype: http_port_t
state: present
- name: Allow network for httpd
ansible.posix.seboolean:
name: httpd_can_network_connect
state: true
persistent: true
roles:
- role: geerlingguy.nginx
nginx_vhosts:
- listen: "{{mgmt_webhook_port}} ssl"
server_name: "webhook"
state: "present"
index: "/"
extra_parameters: |
location / {
proxy_pass http://127.0.0.1:8080; # webhookd defaults to this port
}
ssl_protocols TLSv1.1 TLSv1.2;
ssl_ciphers HIGH:!aNULL:!MD5;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
ssl_certificate /etc/webhookd/server.pem;
ssl_certificate_key /etc/webhookd/server.key;
ssl_client_certificate /etc/webhookd/client.pem;
ssl_verify_client on;
- role: linux-system-roles.postfix
postfix_conf: { relayhost: "{{postfix_smtp_relay}}" }
- role: geerlingguy.mysql
mysql_root_password_update: true
mysql_user_password_update: true
# Slurm requires innodb be tuned the following way,
# see https://slurm.schedmd.com/accounting.html#slurm-accounting-configuration-before-build
mysql_innodb_buffer_pool_size: "1024M"
mysql_innodb_log_file_size: "64M"
mysql_innodb_lock_wait_timeout: "900"
mysql_users:
- name: slurm
host: "localhost"
password: "{{mgmt_password}}"
priv: "slurm_acct_db.*:ALL"
mysql_databases:
- name: slurm_acct_db
collation: utf8_general_ci
encoding: utf8
- role: stackhpc.openhpc
ansible_distribution_major_version: Alma9
ohpc_openhpc_repos: { "Alma9": [] } # already part of BOS
ohpc_default_extra_repos: { "Alma9": [] } # already part of BOS
openhpc_enable:
control: true # slurmctld
runtime: true # slurmd
database: true # slurmdbd
batch: false # compute node
openhpc_slurm_configless: yes
openhpc_slurmdbd_mysql_password: "{{mgmt_password}}"
openhpc_slurm_accounting_storage_type: "accounting_storage/slurmdbd"
openhpc_slurm_service_enabled: true
openhpc_slurm_control_host: "mgmt.{{domain}}"
openhpc_cluster_name: "{{domain}}"
openhpc_packages: []
openhpc_slurm_partitions:
- name: dummy # empty partition causes slurm to stall, we'll delete this partition later to make slurm start
openhpc_module_system_install: false
- role: freeipa.ansible_freeipa.ipaclient
state: present
ipaclient_domain: "{{domain}}"
ipaadmin_principal: admin
ipaadmin_password: "{{ipa_password}}"
ipaclient_mkhomedir: true
ipaclient_configure_dns_resolver: true
ipaclient_dns_servers: "{{idm_ip}}"
ipaclient_force_join: true
post_tasks:
### Update slurm nodes ###
- name: Copy setup oh-my-tmux script
ansible.builtin.copy:
src: scripts/setup-oh-my-tmux.sh
dest: /root/setup-oh-my-tmux.sh
mode: 755
- name: "Setup HBAC: Disable allow all"
freeipa.ansible_freeipa.ipahbacrule:
ipaadmin_password: "{{ipa_password}}"
name: allow_all
state: disabled
- name: "Setup HBAC: Allow admin all node access"
freeipa.ansible_freeipa.ipahbacrule:
ipaadmin_password: "{{ipa_password}}"
description: Allow admin all node access
name: allow_admin_all_access
group: "admins"
hostcategory: all
servicecategory: all
state: present
# See https://slurm.schedmd.com/configless_slurm.html on values for the SRV record
- name: Add slurm configless DNS SRV records to IdM
freeipa.ansible_freeipa.ipadnsrecord:
ipaadmin_password: "{{ipa_password}}"
records:
- name: _slurmctld._tcp
zone_name: "{{domain}}"
record_type: SRV
srv_priority: 0
srv_weight: 0
srv_port: 6817
srv_target: mgmt
- name: Permit slurmctld
ansible.posix.firewalld:
port: 6817/tcp
state: enabled
permanent: true
- name: Fetch munge key
ansible.builtin.fetch:
flat: true
src: /etc/munge/munge.key
dest: "{{host_backup_directory}}/munge.key"
### Warewulf ###
# XXX the official docs installs warewulf 3 via ohpc-warewulf which is too old
- name: Install warewulf 4
ansible.builtin.dnf:
name: [warewulf-ohpc]
retries: 5 # So AlmaLinux's mirror in the UK sometimes pick an unreliable source
delay: 2
- name: Setup warewulf config
template:
src: warewulf.conf.j2
dest: /etc/warewulf/warewulf.conf
- name: Setup warewulf default config
template:
src: defaults.conf.j2
dest: /etc/warewulf/defaults.conf
- name: Permit warewulf service
ansible.posix.firewalld:
service: warewulf
state: enabled
permanent: true
- name: Permit nfs service
ansible.posix.firewalld:
service: nfs
state: enabled
permanent: true
- name: Permit tftp service
ansible.posix.firewalld:
service: tftp
state: enabled
permanent: true
- name: Reload firewalld
ansible.builtin.systemd_service:
name: firewalld
state: reloaded
- name: Enable warewulfd
ansible.builtin.systemd_service:
name: warewulfd
state: started
enabled: true
# we're using FreeIPA for the compute nodes so don't sync passwd, causes random problems with gid mismatch
- name: Drop passwd overlay
ansible.builtin.file:
state: absent
path: /srv/warewulf/overlays/generic/etc/passwd
- name: Ensure munge key overlay path exists
ansible.builtin.file:
path: /srv/warewulf/overlays/generic/etc/munge/
state: directory
- name: Setup munge key overlay
ansible.builtin.copy:
content: '{%raw%}{{Include "/etc/munge/munge.key"}}{%endraw%}' # XXX templated by warewulf, not ansible
dest: /srv/warewulf/overlays/generic/etc/munge/munge.key.ww
owner: munge
group: munge
mode: 0400
- name: Ensure webhook key overlay path exists
ansible.builtin.file:
path: /srv/warewulf/overlays/generic/etc/webhookd/
state: directory
- name: Setup webhook key overlay
ansible.builtin.copy:
content: '{{''{{''}}Include "/etc/webhookd/{{item}}"}}' # XXX templated by warewulf, not ansible
dest: /srv/warewulf/overlays/generic/etc/webhookd/{{item}}.ww
mode: 0600
loop: [client.pem, client.key, server.pem]
- name: Ensure add host overlay path exists
ansible.builtin.file:
path: /srv/warewulf/overlays/generic/usr/local/bin/
state: directory
- name: Setup add host overlay
ansible.builtin.template:
src: webhookd_notify_add_host.sh.j2
dest: /srv/warewulf/overlays/generic/usr/local/bin/webhookd_notify_add_host.sh
mode: 0500
- name: Setup del host overlay
ansible.builtin.template:
src: webhookd_notify_del_host.sh.j2
dest: /srv/warewulf/overlays/generic/usr/local/bin/webhookd_notify_del_host.sh
mode: 0500
- name: Build arch-specific overlays
ansible.builtin.script: "scripts/build-arch-overlays.sh {{item}}"
loop: "{{all_arch}}"
### rds1 ###
- name: Install quota-rpc
ansible.builtin.dnf:
name: [quota-rpc]
- name: Create rds1 directories
ansible.builtin.file:
path: "/mnt/rds1/{{item}}"
state: directory
mode: "750"
group: "{{idm_default_group}}"
loop: "{{mgmt_exported_directories}}"
- name: Permit rpc-bind and dependent services
ansible.posix.firewalld:
service: "{{item}}"
state: enabled
permanent: true
loop: [rpc-bind, mountd, rquotad]
- name: Enable rquotad service
ansible.builtin.systemd_service:
name: rpc-rquotad
state: started
enabled: true
- name: Enable NFSv4 only
ansible.builtin.blockinfile:
path: /etc/nfs.conf
append_newline: true
prepend_newline: true
marker: "### {mark} Ansible generated block: do not edit this section! ###"
block: |
[nfsd]
vers3=n
vers4=y
vers4.0=y
vers4.1=y
vers4.2=y
### rds1 NFS ###
- name: Get fresh Kerberos ticket
ansible.builtin.shell: "kdestroy && kinit admin <<< {{ipa_password}}"
- name: Ensure IPA service is present
freeipa.ansible_freeipa.ipaservice:
ipaadmin_password: "{{ipa_password}}"
name: "nfs/mgmt.{{domain}}"
- name: Get IPA keytab for NFS
ansible.builtin.shell: "ipa-getkeytab -s idm.{{domain}} -p nfs/mgmt.{{domain}} -k /etc/krb5.keytab"
- name: Get fresh Kerberos ticket
ansible.builtin.shell: "kdestroy && kinit admin <<< {{ipa_password}}"
- name: Drop existing root user keytab
ansible.builtin.file:
state: absent
path: /etc/root.keytab
- name: Create root user keytab
ansible.builtin.expect:
command: ktutil
responses:
ktutil:
- "addent -password -p root -k 1 -f"
- "wkt /etc/root.keytab"
- "quit"
Password(.*): "{{ipa_password}}"
- name: Ensure default automount locations are present
freeipa.ansible_freeipa.ipaautomountlocation:
ipaadmin_password: "{{ipa_password}}"
name: default
state: present
- name: Clear existing direct mount map
freeipa.ansible_freeipa.ipaautomountmap:
ipaadmin_password: "{{ipa_password}}"
name: auto.direct
location: default
state: absent
- name: Create rds1 mount map
freeipa.ansible_freeipa.ipaautomountmap:
ipaadmin_password: "{{ipa_password}}"
name: auto.rds1
location: default
mount: "/-"
state: present
- name: Create rds1 mount keys
freeipa.ansible_freeipa.ipaautomountkey:
ipaadmin_password: "{{ipa_password}}"
location: default
mapname: auto.rds1
key: "/{{item}}"
info: "mgmt.{{domain}}:/mnt/rds1/{{item}}"
state: present
loop: "{{mgmt_exported_directories}}"
# - include_tasks: tasks/ipa_client_automount.yml
### webhookd ###
- name: Check if webhookd is already installed
stat:
path: /usr/bin/webhookd
register: webhookd
- name: Install webhookd dependencies
ansible.builtin.dnf:
name: [go]
when: webhookd.stat.exists == False
- name: Build webhookd
ansible.builtin.shell:
cmd: |
go install -buildmode=pie -ldflags=-s github.com/ncarlier/webhookd@0df2a52
cp go/bin/webhookd /usr/bin/webhookd
rm -rf go
chdir: /root
when: webhookd.stat.exists == False
- name: Create webhookd environment file
ansible.builtin.template:
src: webhookd.j2
dest: /etc/default/webhookd
- name: Create webhookd unit file
ansible.builtin.template:
src: webhookd.service.j2
dest: /lib/systemd/system/webhookd.service
mode: 644
- name: Reload units
ansible.builtin.systemd_service:
daemon_reload: true
- name: Enable webhookd service
ansible.builtin.systemd_service:
name: webhookd.service
state: started
enabled: true
- name: Permit webhookd service
ansible.posix.firewalld:
port: "{{mgmt_webhook_port}}/tcp"
state: enabled
permanent: true
- name: Ensure webhookd script directory exists
file:
path: /usr/share/webhookd/scripts/
state: directory
- name: Setup user provision webhook
ansible.builtin.template:
src: webhookd_exec_provision.sh.j2
dest: /usr/share/webhookd/scripts/webhookd_exec_provision.sh
mode: 0774
- name: Setup add host webhook
ansible.builtin.template:
src: webhookd_exec_add_host.sh.j2
dest: /usr/share/webhookd/scripts/webhookd_exec_add_host.sh
mode: 0774
- name: Setup del host webhook
ansible.builtin.template:
src: webhookd_exec_del_host.sh.j2
dest: /usr/share/webhookd/scripts/webhookd_exec_del_host.sh
mode: 0774
- name: Reload firewalld
ansible.builtin.systemd_service:
name: firewalld
state: reloaded
- name: Trim
ansible.builtin.shell: fstrim -av
- name: Reboot
ansible.builtin.reboot: