Skip to content

Commit

Permalink
Add reason to upgrade job metric (#91)
Browse files Browse the repository at this point in the history
Final states now add a reason why they reached the given state to the job metric. Successful jobs can have reasons such as `Completed` or `Skipped`. Failed jobs can have reasons like `HookFailed`.
  • Loading branch information
bastjan authored Jun 21, 2024
1 parent c92e2a3 commit 32e16ff
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
18 changes: 17 additions & 1 deletion controllers/upgrade_information_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ var poolsPausedDesc = prometheus.NewDesc(

var jobStates = prometheus.NewDesc(
MetricsNamespace+"_upgradejob_state",
"Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states.",
"Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states. Final states may have a reason.",
[]string{
"upgradejob",
"start_after",
Expand All @@ -56,6 +56,7 @@ var jobStates = prometheus.NewDesc(
"desired_version_image",
"desired_version_version",
"state",
"reason",
"matches_disruptive_hooks",
},
nil,
Expand Down Expand Up @@ -154,6 +155,7 @@ func (m *UpgradeInformationCollector) Collect(ch chan<- prometheus.Metric) {
v.Image,
v.Version,
jobState(job),
jobStateReason(job),
strconv.FormatBool(jobHasMatchingDisruptiveHook(job, jobsHooks)),
)
}
Expand All @@ -166,6 +168,20 @@ func boolToFloat64(b bool) float64 {
return 0
}

// jobStateReason returns the reason for the current state of the job.
// All final states should have a reason.
func jobStateReason(job managedupgradev1beta1.UpgradeJob) string {
sc := apimeta.FindStatusCondition(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionSucceeded)
if sc != nil && sc.Status == metav1.ConditionTrue {
return sc.Reason
}
sf := apimeta.FindStatusCondition(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionFailed)
if sf != nil && sf.Status == metav1.ConditionTrue {
return sf.Reason
}
return ""
}

func jobState(job managedupgradev1beta1.UpgradeJob) string {
if apimeta.IsStatusConditionTrue(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionSucceeded) {
return "succeeded"
Expand Down
20 changes: 11 additions & 9 deletions controllers/upgrade_information_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ func Test_ClusterUpgradingMetric(t *testing.T) {
Status: metav1.ConditionTrue,
}, {
Type: managedupgradev1beta1.UpgradeJobConditionSucceeded,
Reason: managedupgradev1beta1.UpgradeJobReasonSkipped,
Status: metav1.ConditionTrue,
},
},
Expand All @@ -214,6 +215,7 @@ func Test_ClusterUpgradingMetric(t *testing.T) {
Status: metav1.ConditionTrue,
}, {
Type: managedupgradev1beta1.UpgradeJobConditionFailed,
Reason: managedupgradev1beta1.UpgradeJobReasonHookFailed,
Status: metav1.ConditionTrue,
},
},
Expand Down Expand Up @@ -275,17 +277,17 @@ openshift_upgrade_controller_machine_config_pools_upgrading{pool="master"} %d
openshift_upgrade_controller_machine_config_pools_upgrading{pool="worker"} %d
openshift_upgrade_controller_machine_config_pools_upgrading{pool="paused1"} 0
openshift_upgrade_controller_machine_config_pools_upgrading{pool="paused2"} 0
# HELP openshift_upgrade_controller_upgradejob_state Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states.
# HELP openshift_upgrade_controller_upgradejob_state Returns the state of jobs in the cluster. 'pending', 'active', 'succeeded', or 'failed' are possible states. Final states may have a reason.
# TYPE openshift_upgrade_controller_upgradejob_state gauge
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="active",upgradejob="active"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="failed",upgradejob="failed"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="paused",upgradejob="paused"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="succeeded",upgradejob="succeeded"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="true",desired_version_image="quay.io/openshift-release-dev/ocp-release@sha256:26f6d10b18",desired_version_version="4.11.23",matches_disruptive_hooks="false",start_after="2020-01-20T20:00:00Z",start_before="2020-01-20T21:00:00Z",state="pending",upgradejob="pending"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="active",upgradejob="active"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="HookFailed",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="failed",upgradejob="failed"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="paused",upgradejob="paused"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="Skipped",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="succeeded",upgradejob="succeeded"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="true",desired_version_image="quay.io/openshift-release-dev/ocp-release@sha256:26f6d10b18",desired_version_version="4.11.23",matches_disruptive_hooks="false",reason="",start_after="2020-01-20T20:00:00Z",start_before="2020-01-20T21:00:00Z",state="pending",upgradejob="pending"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-unclaimed-next"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-claimed-next"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="true",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-unclaimed-next"} 1
openshift_upgrade_controller_upgradejob_state{desired_version_force="false",desired_version_image="",desired_version_version="",matches_disruptive_hooks="false",reason="",start_after="0001-01-01T00:00:00Z",start_before="0001-01-01T00:00:00Z",state="pending",upgradejob="disruptive-claimed-next"} 1
`
return strings.NewReader(
fmt.Sprintf(metrics, b2i(upgrading), b2i(masterUpgrading), b2i(workerUpgrading)),
Expand Down

0 comments on commit 32e16ff

Please sign in to comment.