Skip to content

Commit 375895b

Browse files
zhuqi-lucascraigcondit
authored andcommitted
[YUNIKORN-2818] Fix state tracking metrics for app and queue (#951)
Closes: #951 Signed-off-by: Craig Condit <[email protected]>
1 parent 8a4acda commit 375895b

File tree

9 files changed

+499
-67
lines changed

9 files changed

+499
-67
lines changed

pkg/metrics/queue.go

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,16 @@ import (
2727
)
2828

2929
const (
30-
AppAccepted = "accepted"
31-
AppRunning = "running"
32-
AppFailed = "failed"
33-
AppRejected = "rejected"
34-
AppCompleted = "completed"
30+
AppNew = "new"
31+
AppAccepted = "accepted"
32+
AppRunning = "running"
33+
AppFailing = "failing"
34+
AppFailed = "failed"
35+
AppRejected = "rejected"
36+
AppResuming = "resuming"
37+
AppCompleting = "completing"
38+
AppCompleted = "completed"
39+
AppExpired = "expired"
3540

3641
ContainerReleased = "released"
3742
ContainerAllocated = "allocated"
@@ -65,15 +70,15 @@ func InitQueueMetrics(name string) *QueueMetrics {
6570
Namespace: Namespace,
6671
Name: "queue_app",
6772
ConstLabels: prometheus.Labels{"queue": name},
68-
Help: "Queue application metrics. State of the application includes `accepted`, `rejected`, `running`, `failed`, `completed`.",
73+
Help: "Queue application metrics. State of the application includes `new`, `accepted`, `rejected`, `running`, `failing`, `failed`, `resuming`, `completing`, `completed`.",
6974
}, []string{"state"})
7075

7176
q.appMetricsSubsystem = prometheus.NewGaugeVec(
7277
prometheus.GaugeOpts{
7378
Namespace: Namespace,
7479
Subsystem: replaceStr,
7580
Name: "queue_app",
76-
Help: "Queue application metrics. State of the application includes `accepted`, `rejected`, `running`, `failed`, `completed`.",
81+
Help: "Queue application metrics. State of the application includes `new`, `accepted`, `rejected`, `running`, `failing`, `failed`, `resuming`, `completing`, `completed`.",
7782
}, []string{"state"})
7883

7984
q.containerMetrics = prometheus.NewCounterVec(
@@ -160,10 +165,31 @@ func (m *QueueMetrics) GetQueueApplicationsRunning() (int, error) {
160165
return -1, err
161166
}
162167

168+
func (m *QueueMetrics) IncQueueApplicationsNew() {
169+
m.incQueueApplications(AppNew)
170+
}
171+
172+
func (m *QueueMetrics) DecQueueApplicationsNew() {
173+
m.decQueueApplications(AppNew)
174+
}
175+
176+
func (m *QueueMetrics) GetQueueApplicationsNew() (int, error) {
177+
metricDto := &dto.Metric{}
178+
err := m.appMetricsLabel.WithLabelValues(AppNew).Write(metricDto)
179+
if err == nil {
180+
return int(*metricDto.Gauge.Value), nil
181+
}
182+
return -1, err
183+
}
184+
163185
func (m *QueueMetrics) IncQueueApplicationsAccepted() {
164186
m.incQueueApplications(AppAccepted)
165187
}
166188

189+
func (m *QueueMetrics) DecQueueApplicationsAccepted() {
190+
m.decQueueApplications(AppAccepted)
191+
}
192+
167193
func (m *QueueMetrics) GetQueueApplicationsAccepted() (int, error) {
168194
metricDto := &dto.Metric{}
169195
err := m.appMetricsLabel.WithLabelValues(AppAccepted).Write(metricDto)
@@ -186,6 +212,40 @@ func (m *QueueMetrics) GetQueueApplicationsRejected() (int, error) {
186212
return -1, err
187213
}
188214

215+
func (m *QueueMetrics) IncQueueApplicationsResuming() {
216+
m.incQueueApplications(AppResuming)
217+
}
218+
219+
func (m *QueueMetrics) DecQueueApplicationsResuming() {
220+
m.decQueueApplications(AppResuming)
221+
}
222+
223+
func (m *QueueMetrics) GetQueueApplicationsResuming() (int, error) {
224+
metricDto := &dto.Metric{}
225+
err := m.appMetricsLabel.WithLabelValues(AppResuming).Write(metricDto)
226+
if err == nil {
227+
return int(*metricDto.Gauge.Value), nil
228+
}
229+
return -1, err
230+
}
231+
232+
func (m *QueueMetrics) IncQueueApplicationsFailing() {
233+
m.incQueueApplications(AppFailing)
234+
}
235+
236+
func (m *QueueMetrics) DecQueueApplicationsFailing() {
237+
m.decQueueApplications(AppFailing)
238+
}
239+
240+
func (m *QueueMetrics) GetQueueApplicationsFailing() (int, error) {
241+
metricDto := &dto.Metric{}
242+
err := m.appMetricsLabel.WithLabelValues(AppFailing).Write(metricDto)
243+
if err == nil {
244+
return int(*metricDto.Gauge.Value), nil
245+
}
246+
return -1, err
247+
}
248+
189249
func (m *QueueMetrics) IncQueueApplicationsFailed() {
190250
m.incQueueApplications(AppFailed)
191251
}
@@ -199,6 +259,23 @@ func (m *QueueMetrics) GetQueueApplicationsFailed() (int, error) {
199259
return -1, err
200260
}
201261

262+
func (m *QueueMetrics) IncQueueApplicationsCompleting() {
263+
m.incQueueApplications(AppCompleting)
264+
}
265+
266+
func (m *QueueMetrics) DecQueueApplicationsCompleting() {
267+
m.decQueueApplications(AppCompleting)
268+
}
269+
270+
func (m *QueueMetrics) GetQueueApplicationsCompleting() (int, error) {
271+
metricDto := &dto.Metric{}
272+
err := m.appMetricsLabel.WithLabelValues(AppCompleting).Write(metricDto)
273+
if err == nil {
274+
return int(*metricDto.Gauge.Value), nil
275+
}
276+
return -1, err
277+
}
278+
202279
func (m *QueueMetrics) IncQueueApplicationsCompleted() {
203280
m.incQueueApplications(AppCompleted)
204281
}

pkg/metrics/queue_test.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,38 @@ import (
3030

3131
var qm *QueueMetrics
3232

33+
func TestApplicationsNew(t *testing.T) {
34+
qm = getQueueMetrics()
35+
defer unregisterQueueMetrics()
36+
37+
qm.IncQueueApplicationsNew()
38+
verifyAppMetrics(t, "new")
39+
40+
curr, err := qm.GetQueueApplicationsNew()
41+
assert.NilError(t, err)
42+
assert.Equal(t, 1, curr)
43+
44+
qm.DecQueueApplicationsNew()
45+
curr, err = qm.GetQueueApplicationsNew()
46+
assert.NilError(t, err)
47+
assert.Equal(t, 0, curr)
48+
}
49+
3350
func TestApplicationsRunning(t *testing.T) {
3451
qm = getQueueMetrics()
3552
defer unregisterQueueMetrics()
3653

3754
qm.IncQueueApplicationsRunning()
3855
verifyAppMetrics(t, "running")
56+
57+
curr, err := qm.GetQueueApplicationsRunning()
58+
assert.NilError(t, err)
59+
assert.Equal(t, 1, curr)
60+
61+
qm.DecQueueApplicationsRunning()
62+
curr, err = qm.GetQueueApplicationsRunning()
63+
assert.NilError(t, err)
64+
assert.Equal(t, 0, curr)
3965
}
4066

4167
func TestApplicationsAccepted(t *testing.T) {
@@ -44,6 +70,49 @@ func TestApplicationsAccepted(t *testing.T) {
4470

4571
qm.IncQueueApplicationsAccepted()
4672
verifyAppMetrics(t, "accepted")
73+
74+
curr, err := qm.GetQueueApplicationsAccepted()
75+
assert.NilError(t, err)
76+
assert.Equal(t, 1, curr)
77+
78+
qm.DecQueueApplicationsAccepted()
79+
curr, err = qm.GetQueueApplicationsAccepted()
80+
assert.NilError(t, err)
81+
assert.Equal(t, 0, curr)
82+
}
83+
84+
func TestApplicationsResuming(t *testing.T) {
85+
qm = getQueueMetrics()
86+
defer unregisterQueueMetrics()
87+
88+
qm.IncQueueApplicationsResuming()
89+
verifyAppMetrics(t, "resuming")
90+
91+
curr, err := qm.GetQueueApplicationsResuming()
92+
assert.NilError(t, err)
93+
assert.Equal(t, 1, curr)
94+
95+
qm.DecQueueApplicationsResuming()
96+
curr, err = qm.GetQueueApplicationsResuming()
97+
assert.NilError(t, err)
98+
assert.Equal(t, 0, curr)
99+
}
100+
101+
func TestApplicationsFailing(t *testing.T) {
102+
qm = getQueueMetrics()
103+
defer unregisterQueueMetrics()
104+
105+
qm.IncQueueApplicationsFailing()
106+
verifyAppMetrics(t, "failing")
107+
108+
curr, err := qm.GetQueueApplicationsFailing()
109+
assert.NilError(t, err)
110+
assert.Equal(t, 1, curr)
111+
112+
qm.DecQueueApplicationsFailing()
113+
curr, err = qm.GetQueueApplicationsFailing()
114+
assert.NilError(t, err)
115+
assert.Equal(t, 0, curr)
47116
}
48117

49118
func TestApplicationsRejected(t *testing.T) {
@@ -52,6 +121,10 @@ func TestApplicationsRejected(t *testing.T) {
52121

53122
qm.IncQueueApplicationsRejected()
54123
verifyAppMetrics(t, "rejected")
124+
125+
curr, err := qm.GetQueueApplicationsRejected()
126+
assert.NilError(t, err)
127+
assert.Equal(t, 1, curr)
55128
}
56129

57130
func TestApplicationsFailed(t *testing.T) {
@@ -60,6 +133,27 @@ func TestApplicationsFailed(t *testing.T) {
60133

61134
qm.IncQueueApplicationsFailed()
62135
verifyAppMetrics(t, "failed")
136+
137+
curr, err := qm.GetQueueApplicationsFailed()
138+
assert.NilError(t, err)
139+
assert.Equal(t, 1, curr)
140+
}
141+
142+
func TestApplicationsCompleting(t *testing.T) {
143+
qm = getQueueMetrics()
144+
defer unregisterQueueMetrics()
145+
146+
qm.IncQueueApplicationsCompleting()
147+
verifyAppMetrics(t, "completing")
148+
149+
curr, err := qm.GetQueueApplicationsCompleting()
150+
assert.NilError(t, err)
151+
assert.Equal(t, 1, curr)
152+
153+
qm.DecQueueApplicationsCompleting()
154+
curr, err = qm.GetQueueApplicationsCompleting()
155+
assert.NilError(t, err)
156+
assert.Equal(t, 0, curr)
63157
}
64158

65159
func TestApplicationsCompleted(t *testing.T) {
@@ -68,6 +162,10 @@ func TestApplicationsCompleted(t *testing.T) {
68162

69163
qm.IncQueueApplicationsCompleted()
70164
verifyAppMetrics(t, "completed")
165+
166+
curr, err := qm.GetQueueApplicationsCompleted()
167+
assert.NilError(t, err)
168+
assert.Equal(t, 1, curr)
71169
}
72170

73171
func TestAllocatedContainers(t *testing.T) {

0 commit comments

Comments
 (0)