Skip to content

Commit

Permalink
only update revision if upgrading/restarting or initial settings
Browse files Browse the repository at this point in the history
  • Loading branch information
SaaldjorMike committed Oct 21, 2024
1 parent a146160 commit e6472d9
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 40 deletions.
81 changes: 46 additions & 35 deletions controllers/humiocluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
networkingv1 "k8s.io/api/networking/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/strings/slices"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

Expand Down Expand Up @@ -1823,6 +1824,26 @@ func (r *HumioClusterReconciler) ensureMismatchedPodsAreDeleted(ctx context.Cont
hc.Status.State, podsStatus.waitingOnPods(), desiredLifecycleState.ADifferenceWasDetectedAndManualDeletionsNotEnabled(), podsStatus.podRevisionCountMatchesNodeCountAndAllPodsHaveTheSameRevision(),
podsStatus.podRevisions, podsStatus.podDeletionTimestampSet, podsStatus.podNames, podsStatus.podImageVersions, podsStatus.nodeCount, podsStatus.readyCount, podsStatus.notReadyCount))

// when we detect changes, update status to reflect Upgrading/Restarting
if hc.Status.State == humiov1alpha1.HumioClusterStateRunning || hc.Status.State == humiov1alpha1.HumioClusterStateConfigError {
if desiredLifecycleState.FoundVersionDifference() {
r.Log.Info(fmt.Sprintf("changing cluster state from %s to %s with pod revision %d for node pool %s", hc.Status.State, humiov1alpha1.HumioClusterStateUpgrading, hnp.GetDesiredPodRevision(), hnp.GetNodePoolName()))
if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
withNodePoolState(humiov1alpha1.HumioClusterStateUpgrading, hnp.GetNodePoolName(), hnp.GetDesiredPodRevision(), hnp.GetDesiredPodHash(), hnp.GetDesiredBootstrapTokenHash(), "")); err != nil {
return result, err
}
return reconcile.Result{Requeue: true}, nil
}
if !desiredLifecycleState.FoundVersionDifference() && desiredLifecycleState.FoundConfigurationDifference() {
r.Log.Info(fmt.Sprintf("changing cluster state from %s to %s with pod revision %d for node pool %s", hc.Status.State, humiov1alpha1.HumioClusterStateRestarting, hnp.GetDesiredPodRevision(), hnp.GetNodePoolName()))
if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
withNodePoolState(humiov1alpha1.HumioClusterStateRestarting, hnp.GetNodePoolName(), hnp.GetDesiredPodRevision(), hnp.GetDesiredPodHash(), hnp.GetDesiredBootstrapTokenHash(), "")); err != nil {
return result, err
}
return reconcile.Result{Requeue: true}, nil
}
}

// we expect an annotation for the bootstrap token to be present
desiredBootstrapTokenHash, found := desiredPod.Annotations[BootstrapTokenHashAnnotation]
if !found {
Expand All @@ -1832,21 +1853,31 @@ func (r *HumioClusterReconciler) ensureMismatchedPodsAreDeleted(ctx context.Cont
// calculate desired pod hash
desiredPodHash := podSpecAsSHA256(hnp, *desiredPod)

// if bootstrap token hash or desired pod hash differs, update node pool status with the new values
if desiredPodHash != hnp.GetDesiredPodHash() || desiredPod.Annotations[BootstrapTokenHashAnnotation] != hnp.GetDesiredBootstrapTokenHash() {
oldRevision := hnp.GetDesiredPodRevision()
newRevision := oldRevision + 1

r.Log.Info(fmt.Sprintf("detected a new pod hash for nodepool=%s, updating status with oldPodRevision=%d newPodRevision=%d oldPodHash=%s newPodHash=%s oldBootstrapTokenHash=%s newBootstrapTokenHash=%s clusterState=%s",
hnp.GetNodePoolName(),
oldRevision, newRevision,
hnp.GetDesiredPodHash(), desiredPodHash,
hnp.GetDesiredBootstrapTokenHash(), desiredBootstrapTokenHash,
hc.Status.State),
)

_, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().withNodePoolState(hc.Status.State, hnp.GetNodePoolName(), newRevision, desiredPodHash, desiredBootstrapTokenHash, ""))
return reconcile.Result{Requeue: true}, err
// save the new revision, hash and so on in one of two cases:
// 1. the cluster is in some pod replacement state
// 2. this is the first time we handle pods for this node pool
if hnp.GetDesiredPodRevision() == 0 ||
slices.Contains([]string{
humiov1alpha1.HumioClusterStateUpgrading,
humiov1alpha1.HumioClusterStateRestarting,
}, hc.Status.State) {
// if bootstrap token hash or desired pod hash differs, update node pool status with the new values
if desiredPodHash != hnp.GetDesiredPodHash() ||
desiredPod.Annotations[BootstrapTokenHashAnnotation] != hnp.GetDesiredBootstrapTokenHash() {
oldRevision := hnp.GetDesiredPodRevision()
newRevision := oldRevision + 1

r.Log.Info(fmt.Sprintf("detected a new pod hash for nodepool=%s updating status with oldPodRevision=%d newPodRevision=%d oldPodHash=%s newPodHash=%s oldBootstrapTokenHash=%s newBootstrapTokenHash=%s clusterState=%s",
hnp.GetNodePoolName(),
oldRevision, newRevision,
hnp.GetDesiredPodHash(), desiredPodHash,
hnp.GetDesiredBootstrapTokenHash(), desiredBootstrapTokenHash,
hc.Status.State,
))

_, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().withNodePoolState(hc.Status.State, hnp.GetNodePoolName(), newRevision, desiredPodHash, desiredBootstrapTokenHash, ""))
return reconcile.Result{Requeue: true}, err
}
}

// when no more changes are needed, update state to Running
Expand All @@ -1860,26 +1891,6 @@ func (r *HumioClusterReconciler) ensureMismatchedPodsAreDeleted(ctx context.Cont
return reconcile.Result{Requeue: true}, err
}

// when we detect changes, update status to reflect Upgrading/Restarting
if hc.Status.State == humiov1alpha1.HumioClusterStateRunning || hc.Status.State == humiov1alpha1.HumioClusterStateConfigError {
if desiredLifecycleState.FoundVersionDifference() {
r.Log.Info(fmt.Sprintf("changing cluster state from %s to %s with pod revision %d for node pool %s", hc.Status.State, humiov1alpha1.HumioClusterStateUpgrading, hnp.GetDesiredPodRevision(), hnp.GetNodePoolName()))
if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
withNodePoolState(humiov1alpha1.HumioClusterStateUpgrading, hnp.GetNodePoolName(), hnp.GetDesiredPodRevision(), hnp.GetDesiredPodHash(), hnp.GetDesiredBootstrapTokenHash(), "")); err != nil {
return result, err
}
return reconcile.Result{Requeue: true}, nil
}
if !desiredLifecycleState.FoundVersionDifference() && desiredLifecycleState.FoundConfigurationDifference() {
r.Log.Info(fmt.Sprintf("changing cluster state from %s to %s with pod revision %d for node pool %s", hc.Status.State, humiov1alpha1.HumioClusterStateRestarting, hnp.GetDesiredPodRevision(), hnp.GetNodePoolName()))
if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
withNodePoolState(humiov1alpha1.HumioClusterStateRestarting, hnp.GetNodePoolName(), hnp.GetDesiredPodRevision(), hnp.GetDesiredPodHash(), hnp.GetDesiredBootstrapTokenHash(), "")); err != nil {
return result, err
}
return reconcile.Result{Requeue: true}, nil
}
}

// delete evicted pods and pods attached using PVC's attached to worker nodes that no longer exists
if podsStatus.foundEvictedPodsOrPodsWithOrpahanedPVCs() {
r.Log.Info(fmt.Sprintf("found %d humio pods requiring deletion", len(podsStatus.podsEvictedOrUsesPVCAttachedToHostThatNoLongerExists)))
Expand Down
10 changes: 5 additions & 5 deletions controllers/suite/clusters/humiocluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ var _ = Describe("HumioCluster Controller", func() {

currentPods, err := kubernetes.ListPods(ctx, k8sClient, updatedHumioCluster.Namespace, controllers.NewHumioNodeManagerFromHumioCluster(&updatedHumioCluster).GetPodLabels())
if err != nil {
// wrap error in pod object
// wrap error in pod object, so that we can still see the error if the Eventually() fails
return []corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%v", err)},
Expand Down Expand Up @@ -1107,7 +1107,7 @@ var _ = Describe("HumioCluster Controller", func() {
Expect(k8sClient.Get(ctx, key, &updatedHumioCluster)).Should(Succeed())
Expect(controllers.NewHumioNodeManagerFromHumioCluster(&updatedHumioCluster).GetDesiredPodRevision()).To(BeEquivalentTo(1))

suite.UsingClusterBy(key.Name, "Updating the cluster image unsuccessfully")
suite.UsingClusterBy(key.Name, "Updating the cluster image unsuccessfully with broken image")
updatedImage := fmt.Sprintf("%s-missing-image", versions.DefaultHumioImageVersion())
Eventually(func() error {
updatedHumioCluster = humiov1alpha1.HumioCluster{}
Expand All @@ -1132,7 +1132,7 @@ var _ = Describe("HumioCluster Controller", func() {
suite.UsingClusterBy(key.Name, fmt.Sprintf("Found of %d pods", len(clusterPods)))
for _, pod := range clusterPods {
humioIndex, _ := kubernetes.GetContainerIndexByName(pod, controllers.HumioContainerName)
suite.UsingClusterBy(key.Name, fmt.Sprintf("Pod %s uses image %s and is using revision %s", pod.Spec.NodeName, pod.Spec.Containers[humioIndex].Image, pod.Annotations[controllers.PodRevisionAnnotation]))
suite.UsingClusterBy(key.Name, fmt.Sprintf("Pod %s uses image %s and is using revision %s", pod.Name, pod.Spec.Containers[humioIndex].Image, pod.Annotations[controllers.PodRevisionAnnotation]))
if pod.Spec.Containers[humioIndex].Image == updatedImage && pod.Annotations[controllers.PodRevisionAnnotation] == "2" {
badPodCount++
}
Expand All @@ -1151,7 +1151,7 @@ var _ = Describe("HumioCluster Controller", func() {
return updatedHumioCluster.Status.State
}, testTimeout, suite.TestInterval).Should(BeIdenticalTo(humiov1alpha1.HumioClusterStateRunning))

suite.UsingClusterBy(key.Name, "Updating the cluster image successfully")
suite.UsingClusterBy(key.Name, "Updating the cluster image successfully with working image")
updatedImage = versions.DefaultHumioImageVersion()
Eventually(func() error {
updatedHumioCluster = humiov1alpha1.HumioCluster{}
Expand Down Expand Up @@ -6132,7 +6132,7 @@ func monitorMaxNumberNodePoolsWithSpecificNodePoolStatus(ctx context.Context, k8
numNodePoolsWithSpecificState++
}
}
// Save the number of unavailable pods in this round
// Save the number of node pools with the node pool state this round
*mostNumNodePoolsWithSpecificNodePoolStatus = max(*mostNumNodePoolsWithSpecificNodePoolStatus, numNodePoolsWithSpecificState)
}
time.Sleep(250 * time.Millisecond)
Expand Down

0 comments on commit e6472d9

Please sign in to comment.