-
Notifications
You must be signed in to change notification settings - Fork 0
/
06-dashboard.yaml
127 lines (125 loc) · 3.82 KB
/
06-dashboard.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
---
- hosts: pakupi_controller
tasks:
# TODO Pyslurm depends on Python 3.9 Ubuntu ships with 3.10
# - name: Install required Python dependencies
# become: yes
# package:
# name:
# - python3-pip
# - cython3
# state: present
# - name: Install Pyslurm library
# become: yes
# pip:
# name: git+http://github.com/PySlurm/[email protected]
# Install and configure Prometheus Slurm exporter
- name: Install Go language support
become: yes
package:
name:
- golang
state: present
- name: Setup slurm exporter
block:
- name: Create temporary build directory
tempfile:
state: directory
suffix: slurm
register: pse_builddir
- name: Download prometheus slurm exporter
git:
repo: https://github.com/vpenso/prometheus-slurm-exporter.git
dest: "{{ pse_builddir.path }}/prometheus-slurm-exporter"
version: '0.20'
- name: "Fix unknown variables on build (Gaudeval/pakupi/issues/14)"
lineinfile:
path: "{{ pse_builddir.path }}/prometheus-slurm-exporter/Makefile"
regexp: '^SHELL :='
line: 'SHELL := $(shell which bash) -e -o pipefail'
- name: Build slurm exporter
make:
chdir: "{{ pse_builddir.path }}/prometheus-slurm-exporter"
- name: Install slurm exporter
become: yes
copy:
dest: /usr/local/bin/prometheus-slurm-exporter
src: "{{ pse_builddir.path }}/prometheus-slurm-exporter/bin/prometheus-slurm-exporter"
remote_src: yes
owner: root
group: root
mode: 'u=rwx,g=rwx,o=rx'
- name: Create service location
become: yes
file:
path: /usr/local/lib/systemd/system
state: directory
owner: root
group: root
mode: 'u=rwx,g=rwx,o=rx'
- name: Install slurm exporter service
become: yes
copy:
dest: /usr/local/lib/systemd/system/prometheus-slurm-exporter.service
src: ./files/prometheus-slurm-exporter.service
owner: root
group: root
mode: 'u=rw,g=r,o=r'
- name: Start and enable slurm exporter service
become: yes
service:
name: prometheus-slurm-exporter
state: restarted
enabled: yes
# TODO Only restart service on definition change / service binary change
# Install and configure Prometheus
- name: Install prometheus
become: yes
package:
name:
- prometheus
state: present
- name: Add slurm exporter to Prometheus sources
become: yes
blockinfile:
path: /etc/prometheus/prometheus.yml
insertafter: '^scrape_configs:'
block: |2
- job_name: 'slurm'
scrape_interval: 30s
scrape_timeout: 30s
static_configs:
- targets: ['localhost:10100']
validate: promtool check config %s
- name : Restart prometheus
become: yes
service:
name: prometheus
state: restarted # Check config with http://<ip>:9090/classic/graph
enabled: yes
# Install and configure grafana
- name: Add Grafana repository key
become: yes
apt_key:
url: https://packages.grafana.com/gpg.key
state: present
- name: Add Grafana repository
become: yes
apt_repository:
repo: "deb https://packages.grafana.com/oss/deb stable main"
state: present
filename: grafana
- name: Fetch packages from new repository
become: yes
apt:
name: grafana
state: present
update_cache: yes
- name: start Grafana server
become: yes
service:
name: grafana-server
state: started
enabled: yes
# TODO Set admin user and password https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#admin_user
# TODO Configure default dashboard (json) https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#default_home_dashboard_path