Skip to content

Commit

Permalink
Mock rework for concurrent usage, remove entity ID annotations, reque…
Browse files Browse the repository at this point in the history
…ue after incrementHumioClusterPodRevision and refactor podReadyCountByRevision (#843)

* Improve mock for concurrent usage and remove entity ID annotation

* look up searchdomain instead view during entity creation

* skip status update if we got error when fetching version

* add logging

* Requeue after incrementHumioClusterPodRevision and refactor podReadyCountByRevision

Requeue after incrementHumioClusterPodRevision fixes a bug where the
operator could in some cases create a new pod using the old revision
as it did not fetch the updated HumioCluster resource before creating
new pods.

Refactor podReadyCountByRevision to extract the logic for what pods to
should be marked as running and ensures we update only the pods that match
the expected pod revision as running, as any older pod revisions should
already be handled during previous iterations.

* fix staticcheck

* fix function name for validating searchdomain in mock for aggregate alerts
  • Loading branch information
SaaldjorMike authored Aug 21, 2024
1 parent 8eefe92 commit 4eb7b82
Show file tree
Hide file tree
Showing 34 changed files with 1,100 additions and 934 deletions.
34 changes: 0 additions & 34 deletions controllers/humioaction_annotations.go

This file was deleted.

23 changes: 9 additions & 14 deletions controllers/humioaction_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,22 @@ func (r *HumioActionReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return reconcile.Result{}, r.logErrorAndReturn(err, "could not resolve secret references")
}

if _, err := humio.ActionFromActionCR(ha); err != nil {
r.Log.Error(err, "unable to validate action")
err = r.setState(ctx, humiov1alpha1.HumioActionStateConfigError, ha)
if err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "unable to set action state")
if _, validateErr := humio.ActionFromActionCR(ha); validateErr != nil {
r.Log.Error(validateErr, "unable to validate action")
setStateErr := r.setState(ctx, humiov1alpha1.HumioActionStateConfigError, ha)
if setStateErr != nil {
return reconcile.Result{}, r.logErrorAndReturn(setStateErr, "unable to set action state")
}
return reconcile.Result{}, err
return reconcile.Result{}, validateErr
}

defer func(ctx context.Context, humioClient humio.Client, ha *humiov1alpha1.HumioAction) {
curAction, err := r.HumioClient.GetAction(cluster.Config(), req, ha)
_, err := r.HumioClient.GetAction(cluster.Config(), req, ha)
if errors.As(err, &humioapi.EntityNotFound{}) {
_ = r.setState(ctx, humiov1alpha1.HumioActionStateNotFound, ha)
return
}
if err != nil || curAction == nil {
if err != nil {
_ = r.setState(ctx, humiov1alpha1.HumioActionStateUnknown, ha)
return
}
Expand Down Expand Up @@ -160,12 +160,7 @@ func (r *HumioActionReconciler) reconcileHumioAction(ctx context.Context, config
if err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "could not create action")
}
r.Log.Info("Created action", "Action", ha.Spec.Name)

result, err := r.reconcileHumioActionAnnotations(ctx, addedAction, ha, req)
if err != nil {
return result, err
}
r.Log.Info("Created action", "Action", ha.Spec.Name, "ID", addedAction.ID)
return reconcile.Result{Requeue: true}, nil
}
if err != nil {
Expand Down
42 changes: 0 additions & 42 deletions controllers/humioaggregatealert_annotations.go

This file was deleted.

13 changes: 2 additions & 11 deletions controllers/humioaggregatealert_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,7 @@ func (r *HumioAggregateAlertReconciler) reconcileHumioAggregateAlert(ctx context
if err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "could not create aggregate alert")
}
r.Log.Info("Created aggregate alert", "AggregateAlert", haa.Spec.Name)

result, err := r.reconcileHumioAggregateAlertAnnotations(ctx, addedAggregateAlert, haa, req)
if err != nil {
return result, err
}
r.Log.Info("Created aggregate alert", "AggregateAlert", haa.Spec.Name, "ID", addedAggregateAlert.ID)
return reconcile.Result{Requeue: true}, nil
}
if err != nil {
Expand All @@ -173,11 +168,7 @@ func (r *HumioAggregateAlertReconciler) reconcileHumioAggregateAlert(ctx context
if err := r.HumioClient.ValidateActionsForAggregateAlert(config, req, haa); err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "could not validate actions for aggregate alert")
}
expectedAggregateAlert, err := humio.AggregateAlertTransform(haa)
if err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "could not parse expected AggregateAlert")
}

expectedAggregateAlert := humio.AggregateAlertTransform(haa)
sanitizeAggregateAlert(curAggregateAlert)
if !reflect.DeepEqual(*curAggregateAlert, *expectedAggregateAlert) {
r.Log.Info(fmt.Sprintf("AggregateAlert differs, triggering update, expected %#v, got: %#v",
Expand Down
42 changes: 0 additions & 42 deletions controllers/humioalert_annotations.go

This file was deleted.

19 changes: 5 additions & 14 deletions controllers/humioalert_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ func (r *HumioAlertReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}

defer func(ctx context.Context, humioClient humio.Client, ha *humiov1alpha1.HumioAlert) {
curAlert, err := r.HumioClient.GetAlert(cluster.Config(), req, ha)
_, err := r.HumioClient.GetAlert(cluster.Config(), req, ha)
if errors.As(err, &humioapi.EntityNotFound{}) {
_ = r.setState(ctx, humiov1alpha1.HumioAlertStateNotFound, ha)
return
}
if err != nil || curAlert == nil {
_ = r.setState(ctx, humiov1alpha1.HumioAlertStateConfigError, ha)
if err != nil {
_ = r.setState(ctx, humiov1alpha1.HumioAlertStateUnknown, ha)
return
}
_ = r.setState(ctx, humiov1alpha1.HumioAlertStateExists, ha)
Expand Down Expand Up @@ -149,12 +149,7 @@ func (r *HumioAlertReconciler) reconcileHumioAlert(ctx context.Context, config *
if err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "could not create alert")
}
r.Log.Info("Created alert", "Alert", ha.Spec.Name)

result, err := r.reconcileHumioAlertAnnotations(ctx, addedAlert, ha, req)
if err != nil {
return result, err
}
r.Log.Info("Created alert", "Alert", ha.Spec.Name, "ID", addedAlert.ID)
return reconcile.Result{Requeue: true}, nil
}
if err != nil {
Expand All @@ -167,11 +162,7 @@ func (r *HumioAlertReconciler) reconcileHumioAlert(ctx context.Context, config *
if err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "could not get action id mapping")
}
expectedAlert, err := humio.AlertTransform(ha, actionIdMap)
if err != nil {
return reconcile.Result{}, r.logErrorAndReturn(err, "could not parse expected Alert")
}

expectedAlert := humio.AlertTransform(ha, actionIdMap)
sanitizeAlert(curAlert)
if !reflect.DeepEqual(*curAlert, *expectedAlert) {
r.Log.Info(fmt.Sprintf("Alert differs, triggering update, expected %#v, got: %#v",
Expand Down
2 changes: 1 addition & 1 deletion controllers/humiocluster_annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (

const (
certHashAnnotation = "humio.com/certificate-hash"
podHashAnnotation = "humio.com/pod-hash"
PodHashAnnotation = "humio.com/pod-hash"
PodRevisionAnnotation = "humio.com/pod-revision"
envVarSourceHashAnnotation = "humio.com/env-var-source-hash"
pvcHashAnnotation = "humio_pvc_hash"
Expand Down
10 changes: 7 additions & 3 deletions controllers/humiocluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ func (r *HumioClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
if err != nil {
opts.withMessage(err.Error())
}
return r.updateStatus(ctx, r.Client.Status(), hc, opts.withState(hc.Status.State))
_, _ = r.updateStatus(ctx, r.Client.Status(), hc, opts.withState(hc.Status.State))
return reconcile.Result{Requeue: true}, nil
}
}

Expand Down Expand Up @@ -318,6 +319,7 @@ func (r *HumioClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
status, err := humioClient.Status(cluster.Config(), req)
if err != nil {
r.Log.Error(err, "unable to get cluster status")
return
}
_, _ = r.updateStatus(ctx, r.Client.Status(), hc, opts.withVersion(status.Version))
}
Expand Down Expand Up @@ -376,7 +378,7 @@ func (r *HumioClusterReconciler) nodePoolPodsReady(ctx context.Context, hc *humi
if podsStatus.waitingOnPods() {
r.Log.Info("waiting on pods, refusing to continue with reconciliation until all pods are ready")
r.Log.Info(fmt.Sprintf("cluster state is %s. waitingOnPods=%v, "+
"revisionsInSync=%v, podRevisisons=%v, podDeletionTimestampSet=%v, podNames=%v, expectedRunningPods=%v, "+
"revisionsInSync=%v, podRevisions=%v, podDeletionTimestampSet=%v, podNames=%v, expectedRunningPods=%v, "+
"podsReady=%v, podsNotReady=%v",
hc.Status.State, podsStatus.waitingOnPods(), podsStatus.podRevisionsInSync(),
podsStatus.podRevisions, podsStatus.podDeletionTimestampSet, podsStatus.podNames,
Expand Down Expand Up @@ -2044,6 +2046,7 @@ func (r *HumioClusterReconciler) ensureMismatchedPodsAreDeleted(ctx context.Cont
return r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
withMessage(r.logErrorAndReturn(err, fmt.Sprintf("failed to increment pod revision to %d", revision)).Error()))
}
return reconcile.Result{Requeue: true}, nil
}
if !desiredLifecycleState.WantsUpgrade() && desiredLifecycleState.WantsRestart() {
if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
Expand All @@ -2054,6 +2057,7 @@ func (r *HumioClusterReconciler) ensureMismatchedPodsAreDeleted(ctx context.Cont
return r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
withMessage(r.logErrorAndReturn(err, fmt.Sprintf("failed to increment pod revision to %d", revision)).Error()))
}
return reconcile.Result{Requeue: true}, nil
}
}
if desiredLifecycleState.ShouldDeletePod() {
Expand Down Expand Up @@ -2119,7 +2123,7 @@ func (r *HumioClusterReconciler) ensureMismatchedPodsAreDeleted(ctx context.Cont
}

r.Log.Info(fmt.Sprintf("cluster state is still %s. waitingOnPods=%v, podBeingDeleted=%v, "+
"revisionsInSync=%v, podRevisisons=%v, podDeletionTimestampSet=%v, podNames=%v, podHumioVersions=%v, expectedRunningPods=%v, podsReady=%v, podsNotReady=%v",
"revisionsInSync=%v, podRevisions=%v, podDeletionTimestampSet=%v, podNames=%v, podHumioVersions=%v, expectedRunningPods=%v, podsReady=%v, podsNotReady=%v",
hc.Status.State, podsStatus.waitingOnPods(), desiredLifecycleState.ShouldDeletePod(), podsStatus.podRevisionsInSync(),
podsStatus.podRevisions, podsStatus.podDeletionTimestampSet, podsStatus.podNames, podsStatus.podImageVersions, podsStatus.expectedRunningPods, podsStatus.readyCount, podsStatus.notReadyCount))

Expand Down
4 changes: 4 additions & 0 deletions controllers/humiocluster_pod_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controllers
import (
"context"
"fmt"
"sort"
"strconv"

humiov1alpha1 "github.com/humio/humio-operator/api/v1alpha1"
Expand Down Expand Up @@ -39,6 +40,9 @@ func (r *HumioClusterReconciler) getPodsStatus(ctx context.Context, hc *humiov1a
notReadyCount: len(foundPodList),
expectedRunningPods: hnp.GetNodeCount(),
}
sort.Slice(foundPodList, func(i, j int) bool {
return foundPodList[i].Name < foundPodList[j].Name
})
var podsReady, podsNotReady []string
for _, pod := range foundPodList {
podRevisionStr := pod.Annotations[PodRevisionAnnotation]
Expand Down
17 changes: 8 additions & 9 deletions controllers/humiocluster_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ func (r *HumioClusterReconciler) createPod(ctx context.Context, hc *humiov1alpha
return &corev1.Pod{}, r.logErrorAndReturn(err, "could not set controller reference")
}
r.Log.Info(fmt.Sprintf("pod %s will use attachments %+v", pod.Name, attachments))
pod.Annotations[podHashAnnotation] = podSpecAsSHA256(hnp, *pod)
pod.Annotations[PodHashAnnotation] = podSpecAsSHA256(hnp, *pod)

if attachments.envVarSourceData != nil {
b, err := json.Marshal(attachments.envVarSourceData)
Expand All @@ -808,15 +808,14 @@ func (r *HumioClusterReconciler) createPod(ctx context.Context, hc *humiov1alpha
}

_, podRevision := hnp.GetHumioClusterNodePoolRevisionAnnotation()
r.Log.Info(fmt.Sprintf("setting pod %s revision to %d", pod.Name, podRevision))
r.setPodRevision(pod, podRevision)

r.Log.Info(fmt.Sprintf("creating pod %s", pod.Name))
r.Log.Info(fmt.Sprintf("creating pod %s with revision %d", pod.Name, podRevision))
err = r.Create(ctx, pod)
if err != nil {
return &corev1.Pod{}, err
}
r.Log.Info(fmt.Sprintf("successfully created pod %s", pod.Name))
r.Log.Info(fmt.Sprintf("successfully created pod %s with revision %d", pod.Name, podRevision))
return pod, nil
}

Expand All @@ -828,7 +827,7 @@ func (r *HumioClusterReconciler) waitForNewPods(ctx context.Context, hnp *HumioN
// revision that were still terminating when the new pod was created
var expectedPodCount int
for _, pod := range previousPodList {
if pod.Annotations[podHashAnnotation] == expectedPods[0].Annotations[podHashAnnotation] {
if pod.Annotations[PodHashAnnotation] == expectedPods[0].Annotations[PodHashAnnotation] {
expectedPodCount++
}
}
Expand All @@ -843,7 +842,7 @@ func (r *HumioClusterReconciler) waitForNewPods(ctx context.Context, hnp *HumioN
return err
}
for _, pod := range latestPodList {
if pod.Annotations[podHashAnnotation] == expectedPods[0].Annotations[podHashAnnotation] {
if pod.Annotations[PodHashAnnotation] == expectedPods[0].Annotations[PodHashAnnotation] {
podsMatchingRevisionCount++
}
}
Expand All @@ -857,7 +856,7 @@ func (r *HumioClusterReconciler) waitForNewPods(ctx context.Context, hnp *HumioN
}

func (r *HumioClusterReconciler) podsMatch(hnp *HumioNodePool, pod corev1.Pod, desiredPod corev1.Pod) (bool, error) {
if _, ok := pod.Annotations[podHashAnnotation]; !ok {
if _, ok := pod.Annotations[PodHashAnnotation]; !ok {
return false, fmt.Errorf("did not find annotation with pod hash")
}
if _, ok := pod.Annotations[PodRevisionAnnotation]; !ok {
Expand All @@ -872,7 +871,7 @@ func (r *HumioClusterReconciler) podsMatch(hnp *HumioNodePool, pod corev1.Pod, d
desiredPodHash := podSpecAsSHA256(hnp, desiredPod)
_, existingPodRevision := hnp.GetHumioClusterNodePoolRevisionAnnotation()
r.setPodRevision(&desiredPod, existingPodRevision)
if pod.Annotations[podHashAnnotation] == desiredPodHash {
if pod.Annotations[PodHashAnnotation] == desiredPodHash {
specMatches = true
}
if pod.Annotations[PodRevisionAnnotation] == desiredPod.Annotations[PodRevisionAnnotation] {
Expand Down Expand Up @@ -905,7 +904,7 @@ func (r *HumioClusterReconciler) podsMatch(hnp *HumioNodePool, pod corev1.Pod, d
sanitizedDesiredPod := sanitizePod(hnp, desiredPodCopy)
podSpecDiff := cmp.Diff(sanitizedCurrentPod.Spec, sanitizedDesiredPod.Spec)
if !specMatches {
r.Log.Info(fmt.Sprintf("pod annotation %s does not match desired pod: got %+v, expected %+v", podHashAnnotation, pod.Annotations[podHashAnnotation], desiredPodHash), "podSpecDiff", podSpecDiff)
r.Log.Info(fmt.Sprintf("pod annotation %s does not match desired pod: got %+v, expected %+v", PodHashAnnotation, pod.Annotations[PodHashAnnotation], desiredPodHash), "podSpecDiff", podSpecDiff)
return false, nil
}
if !revisionMatches {
Expand Down
Loading

0 comments on commit 4eb7b82

Please sign in to comment.