Skip to content

Commit 32e16ff

Browse files
authored
Add reason to upgrade job metric (#91)
Final states now add a reason why they reached the given state to the job metric. Successful jobs can have reasons such as `Completed` or `Skipped`. Failed jobs can have reasons like `HookFailed`.
1 parent c92e2a3 commit 32e16ff

File tree

2 files changed

+28
-10
lines changed

2 files changed

+28
-10
lines changed

controllers/upgrade_information_collector.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ var poolsPausedDesc = prometheus.NewDesc(
4747

4848
var jobStates = prometheus.NewDesc(
4949
MetricsNamespace+"_upgradejob_state",
50-
"Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states.",
50+
"Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states. Final states may have a reason.",
5151
[]string{
5252
"upgradejob",
5353
"start_after",
@@ -56,6 +56,7 @@ var jobStates = prometheus.NewDesc(
5656
"desired_version_image",
5757
"desired_version_version",
5858
"state",
59+
"reason",
5960
"matches_disruptive_hooks",
6061
},
6162
nil,
@@ -154,6 +155,7 @@ func (m *UpgradeInformationCollector) Collect(ch chan<- prometheus.Metric) {
154155
v.Image,
155156
v.Version,
156157
jobState(job),
158+
jobStateReason(job),
157159
strconv.FormatBool(jobHasMatchingDisruptiveHook(job, jobsHooks)),
158160
)
159161
}
@@ -166,6 +168,20 @@ func boolToFloat64(b bool) float64 {
166168
return 0
167169
}
168170

171+
// jobStateReason returns the reason for the current state of the job.
172+
// All final states should have a reason.
173+
func jobStateReason(job managedupgradev1beta1.UpgradeJob) string {
174+
sc := apimeta.FindStatusCondition(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionSucceeded)
175+
if sc != nil && sc.Status == metav1.ConditionTrue {
176+
return sc.Reason
177+
}
178+
sf := apimeta.FindStatusCondition(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionFailed)
179+
if sf != nil && sf.Status == metav1.ConditionTrue {
180+
return sf.Reason
181+
}
182+
return ""
183+
}
184+
169185
func jobState(job managedupgradev1beta1.UpgradeJob) string {
170186
if apimeta.IsStatusConditionTrue(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionSucceeded) {
171187
return "succeeded"

controllers/upgrade_information_collector_test.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ func Test_ClusterUpgradingMetric(t *testing.T) {
198198
Status: metav1.ConditionTrue,
199199
}, {
200200
Type: managedupgradev1beta1.UpgradeJobConditionSucceeded,
201+
Reason: managedupgradev1beta1.UpgradeJobReasonSkipped,
201202
Status: metav1.ConditionTrue,
202203
},
203204
},
@@ -214,6 +215,7 @@ func Test_ClusterUpgradingMetric(t *testing.T) {
214215
Status: metav1.ConditionTrue,
215216
}, {
216217
Type: managedupgradev1beta1.UpgradeJobConditionFailed,
218+
Reason: managedupgradev1beta1.UpgradeJobReasonHookFailed,
217219
Status: metav1.ConditionTrue,
218220
},
219221
},
@@ -275,17 +277,17 @@ openshift_upgrade_controller_machine_config_pools_upgrading{pool="master"} %d
275277
openshift_upgrade_controller_machine_config_pools_upgrading{pool="worker"} %d
276278
openshift_upgrade_controller_machine_config_pools_upgrading{pool="paused1"} 0
277279
openshift_upgrade_controller_machine_config_pools_upgrading{pool="paused2"} 0
278-
# HELP openshift_upgrade_controller_upgradejob_state Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states.
280+
# HELP openshift_upgrade_controller_upgradejob_state Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states. Final states may have a reason.
279281
# TYPE openshift_upgrade_controller_upgradejob_state gauge
280-
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="active",upgradejob="active"} 1
281-
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="failed",upgradejob="failed"} 1
282-
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="paused",upgradejob="paused"} 1
283-
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="succeeded",upgradejob="succeeded"} 1
284-
openshift_upgrade_controller_upgradejob_state{desired_version_force="true",desired_version_image="quay.io/openshift-release-dev/ocp-release@sha256:26f6d10b18",desired_version_version="4.11.23",matches_disruptive_hooks="false",start_after="2020-01-20T20:00:00Z",start_before="2020-01-20T21:00:00Z",state="pending",upgradejob="pending"} 1
282+
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="active",upgradejob="active"} 1
283+
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="HookFailed",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="failed",upgradejob="failed"} 1
284+
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="paused",upgradejob="paused"} 1
285+
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="Skipped",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="succeeded",upgradejob="succeeded"} 1
286+
openshift_upgrade_controller_upgradejob_state{desired_version_force="true",desired_version_image="quay.io/openshift-release-dev/ocp-release@sha256:26f6d10b18",desired_version_version="4.11.23",matches_disruptive_hooks="false",reason="",start_after="2020-01-20T20:00:00Z",start_before="2020-01-20T21:00:00Z",state="pending",upgradejob="pending"} 1
285287
286-
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive"} 1
287-
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-unclaimed-next"} 1
288-
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-claimed-next"} 1
288+
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive"} 1
289+
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-unclaimed-next"} 1
290+
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-claimed-next"} 1
289291
`
290292
return strings.NewReader(
291293
fmt.Sprintf(metrics, b2i(upgrading), b2i(masterUpgrading), b2i(workerUpgrading)),

0 commit comments

Comments
 (0)