Skip to content

Commit

Permalink
Wait until pods are up before marking state as Running.
Browse files Browse the repository at this point in the history
Mark pods as ImagePullBackOff using envtest in the test case that tests this scenario.

Previously this assumed pods would become Running, but that is not the
case for missing images.
  • Loading branch information
SaaldjorMike committed Oct 22, 2024
1 parent f4554cd commit 8c630ba
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 10 deletions.
14 changes: 11 additions & 3 deletions controllers/humiocluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ func (r *HumioClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
}(ctx, hc)

// remove unused node pool status entries
// TODO: This should be moved to cleanupUnusedResources, but nodePoolAllowsMaintenanceOperations fails
// to indicate there's a node pool status in maintenance if the node pool is no longer configured
// by the user. When nodePoolAllowsMaintenanceOperations is updated to properly indicate something
// marked as under maintenance, even if no longer a node pool specified by the user, then we should
// move this to cleanupUnusedResources.
if ok, idx := r.hasNoUnusedNodePoolStatus(hc, &humioNodePools); !ok {
r.cleanupUnusedNodePoolStatus(hc, idx)
if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
Expand Down Expand Up @@ -261,7 +266,7 @@ func (r *HumioClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
}

// wait for license and admin token
if len(r.nodePoolsInMaintenance(hc, humioNodePools.Filter(NodePoolFilterHasNode))) == 0 {
if len(r.currentlyConfiguredNodePoolsInMaintenance(hc, humioNodePools.Filter(NodePoolFilterHasNode))) == 0 {
if result, err := r.ensureLicenseAndAdminToken(ctx, hc, req); result != emptyResult || err != nil {
if err != nil {
_, _ = r.updateStatus(ctx, r.Client.Status(), hc, statusOptions().
Expand Down Expand Up @@ -338,8 +343,10 @@ func (r *HumioClusterReconciler) nodePoolPodsReady(ctx context.Context, hc *humi
return true, nil
}

// nodePoolAllowsMaintenanceOperations fetches which node pools that are still defined, that are marked as in
// maintenance, and returns true if hnp is present in that list.
func (r *HumioClusterReconciler) nodePoolAllowsMaintenanceOperations(hc *humiov1alpha1.HumioCluster, hnp *HumioNodePool, hnps []*HumioNodePool) bool {
poolsInMaintenance := r.nodePoolsInMaintenance(hc, hnps)
poolsInMaintenance := r.currentlyConfiguredNodePoolsInMaintenance(hc, hnps)
if len(poolsInMaintenance) == 0 {
return true
}
Expand All @@ -351,7 +358,8 @@ func (r *HumioClusterReconciler) nodePoolAllowsMaintenanceOperations(hc *humiov1
return false
}

func (r *HumioClusterReconciler) nodePoolsInMaintenance(hc *humiov1alpha1.HumioCluster, hnps []*HumioNodePool) []*HumioNodePool {
// currentlyConfiguredNodePoolsInMaintenance loops through the desired node pools, and returns all node pools with state not Running
func (r *HumioClusterReconciler) currentlyConfiguredNodePoolsInMaintenance(hc *humiov1alpha1.HumioCluster, hnps []*HumioNodePool) []*HumioNodePool {
var poolsInMaintenance []*HumioNodePool
for _, pool := range hnps {
for _, poolStatus := range hc.Status.NodePoolStatus {
Expand Down
10 changes: 6 additions & 4 deletions controllers/suite/clusters/humiocluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ var _ = Describe("HumioCluster Controller", func() {
len(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions) > 0 {

if pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions[0].Key == "some-none-existent-label" {
markPodAsPendingIfUsingEnvtest(ctx, k8sClient, pod, key.Name)
markPodAsPendingUnschedulableIfUsingEnvtest(ctx, k8sClient, pod, key.Name)
}
}
}
Expand Down Expand Up @@ -1142,14 +1142,16 @@ var _ = Describe("HumioCluster Controller", func() {

suite.UsingClusterBy(key.Name, "Simulating mock pods to be scheduled")
clusterPods, _ = kubernetes.ListPods(ctx, k8sClient, key.Namespace, controllers.NewHumioNodeManagerFromHumioCluster(toCreate).GetPodLabels())
_ = suite.MarkPodsAsRunningIfUsingEnvtest(ctx, k8sClient, clusterPods, key.Name)
for _, pod := range clusterPods {
_ = markPodAsPendingImagePullBackOffIfUsingEnvtest(ctx, k8sClient, pod, key.Name)
}

suite.UsingClusterBy(key.Name, "Waiting for humio cluster state to be Running")
suite.UsingClusterBy(key.Name, "Waiting for humio cluster state to be Upgrading")
Eventually(func() string {
updatedHumioCluster = humiov1alpha1.HumioCluster{}
Expect(k8sClient.Get(ctx, key, &updatedHumioCluster)).Should(Succeed())
return updatedHumioCluster.Status.State
}, testTimeout, suite.TestInterval).Should(BeIdenticalTo(humiov1alpha1.HumioClusterStateRunning))
}, testTimeout, suite.TestInterval).Should(BeIdenticalTo(humiov1alpha1.HumioClusterStateUpgrading))

suite.UsingClusterBy(key.Name, "Updating the cluster image successfully with working image")
updatedImage = versions.DefaultHumioImageVersion()
Expand Down
32 changes: 29 additions & 3 deletions controllers/suite/clusters/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,12 +340,12 @@ func constructBasicMultiNodePoolHumioCluster(key types.NamespacedName, useAutoCr
return toCreate
}

func markPodAsPendingIfUsingEnvtest(ctx context.Context, client client.Client, pod corev1.Pod, clusterName string) error {
func markPodAsPendingUnschedulableIfUsingEnvtest(ctx context.Context, client client.Client, pod corev1.Pod, clusterName string) error {
if !helpers.UseEnvtest() {
return nil
}

suite.UsingClusterBy(clusterName, fmt.Sprintf("Simulating Humio pod is marked Pending (podName %s, pod phase %s)", pod.Name, pod.Status.Phase))
suite.UsingClusterBy(clusterName, fmt.Sprintf("Simulating Humio pod is marked Pending Unschedulable (podName %s, pod phase %s)", pod.Name, pod.Status.Phase))
pod.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodScheduled,
Expand All @@ -357,6 +357,32 @@ func markPodAsPendingIfUsingEnvtest(ctx context.Context, client client.Client, p
return client.Status().Update(ctx, &pod)
}

func markPodAsPendingImagePullBackOffIfUsingEnvtest(ctx context.Context, client client.Client, pod corev1.Pod, clusterName string) error {
if !helpers.UseEnvtest() {
return nil
}

suite.UsingClusterBy(clusterName, fmt.Sprintf("Simulating Humio pod is marked Pending ImagePullBackOff (podName %s, pod phase %s)", pod.Name, pod.Status.Phase))
pod.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
},
}
pod.Status.ContainerStatuses = []corev1.ContainerStatus{
{
Name: controllers.HumioContainerName,
State: corev1.ContainerState{
Waiting: &corev1.ContainerStateWaiting{
Reason: "ImagePullBackOff",
},
},
},
}
pod.Status.Phase = corev1.PodPending
return client.Status().Update(ctx, &pod)
}

func markPodsWithRevisionAsReadyIfUsingEnvTest(ctx context.Context, hnp *controllers.HumioNodePool, podRevision int, desiredReadyPodCount int) {
if !helpers.UseEnvtest() {
return
Expand Down Expand Up @@ -458,7 +484,7 @@ func podPendingCountByRevision(ctx context.Context, hnp *controllers.HumioNodePo
}
} else {
if nodeID+1 <= expectedPendingCount {
_ = markPodAsPendingIfUsingEnvtest(ctx, k8sClient, pod, hnp.GetClusterName())
_ = markPodAsPendingUnschedulableIfUsingEnvtest(ctx, k8sClient, pod, hnp.GetClusterName())
revisionToPendingCount[revision]++
}
}
Expand Down

0 comments on commit 8c630ba

Please sign in to comment.