diff --git a/controllers/humiocluster_controller.go b/controllers/humiocluster_controller.go index ec8ddf482..4fb9e8623 100644 --- a/controllers/humiocluster_controller.go +++ b/controllers/humiocluster_controller.go @@ -157,6 +157,11 @@ func (r *HumioClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request }(ctx, hc) // remove unused node pool status entries + // TODO: This should be moved to cleanupUnusedResources, but nodePoolAllowsMaintenanceOperations fails + // to indicate there's a node pool status in maintenance if the node pool is no longer configured + // by the user. When nodePoolAllowsMaintenanceOperations is updated to properly indicate something + // marked as under maintenance, even if no longer a node pool specified by the user, then we should + // move this to cleanupUnusedResources. if ok, idx := r.hasNoUnusedNodePoolStatus(hc, &humioNodePools); !ok { r.cleanupUnusedNodePoolStatus(hc, idx) if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions(). @@ -261,7 +266,7 @@ func (r *HumioClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request } // wait for license and admin token - if len(r.nodePoolsInMaintenance(hc, humioNodePools.Filter(NodePoolFilterHasNode))) == 0 { + if len(r.currentlyConfiguredNodePoolsInMaintenance(hc, humioNodePools.Filter(NodePoolFilterHasNode))) == 0 { if result, err := r.ensureLicenseAndAdminToken(ctx, hc, req); result != emptyResult || err != nil { if err != nil { _, _ = r.updateStatus(ctx, r.Client.Status(), hc, statusOptions(). @@ -338,8 +343,10 @@ func (r *HumioClusterReconciler) nodePoolPodsReady(ctx context.Context, hc *humi return true, nil } +// nodePoolAllowsMaintenanceOperations fetches which node pools that are still defined, that are marked as in +// maintenance, and returns true if hnp is present in that list. func (r *HumioClusterReconciler) nodePoolAllowsMaintenanceOperations(hc *humiov1alpha1.HumioCluster, hnp *HumioNodePool, hnps []*HumioNodePool) bool { - poolsInMaintenance := r.nodePoolsInMaintenance(hc, hnps) + poolsInMaintenance := r.currentlyConfiguredNodePoolsInMaintenance(hc, hnps) if len(poolsInMaintenance) == 0 { return true } @@ -351,7 +358,8 @@ func (r *HumioClusterReconciler) nodePoolAllowsMaintenanceOperations(hc *humiov1 return false } -func (r *HumioClusterReconciler) nodePoolsInMaintenance(hc *humiov1alpha1.HumioCluster, hnps []*HumioNodePool) []*HumioNodePool { +// currentlyConfiguredNodePoolsInMaintenance loops through the desired node pools, and returns all node pools with state not Running +func (r *HumioClusterReconciler) currentlyConfiguredNodePoolsInMaintenance(hc *humiov1alpha1.HumioCluster, hnps []*HumioNodePool) []*HumioNodePool { var poolsInMaintenance []*HumioNodePool for _, pool := range hnps { for _, poolStatus := range hc.Status.NodePoolStatus { diff --git a/controllers/suite/clusters/humiocluster_controller_test.go b/controllers/suite/clusters/humiocluster_controller_test.go index 484f88d29..7bc307970 100644 --- a/controllers/suite/clusters/humiocluster_controller_test.go +++ b/controllers/suite/clusters/humiocluster_controller_test.go @@ -408,7 +408,7 @@ var _ = Describe("HumioCluster Controller", func() { len(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions) > 0 { if pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions[0].Key == "some-none-existent-label" { - markPodAsPendingIfUsingEnvtest(ctx, k8sClient, pod, key.Name) + markPodAsPendingUnschedulableIfUsingEnvtest(ctx, k8sClient, pod, key.Name) } } } @@ -1142,14 +1142,16 @@ var _ = Describe("HumioCluster Controller", func() { suite.UsingClusterBy(key.Name, "Simulating mock pods to be scheduled") clusterPods, _ = kubernetes.ListPods(ctx, k8sClient, key.Namespace, controllers.NewHumioNodeManagerFromHumioCluster(toCreate).GetPodLabels()) - _ = suite.MarkPodsAsRunningIfUsingEnvtest(ctx, k8sClient, clusterPods, key.Name) + for _, pod := range clusterPods { + _ = markPodAsPendingImagePullBackOffIfUsingEnvtest(ctx, k8sClient, pod, key.Name) + } - suite.UsingClusterBy(key.Name, "Waiting for humio cluster state to be Running") + suite.UsingClusterBy(key.Name, "Waiting for humio cluster state to be Upgrading") Eventually(func() string { updatedHumioCluster = humiov1alpha1.HumioCluster{} Expect(k8sClient.Get(ctx, key, &updatedHumioCluster)).Should(Succeed()) return updatedHumioCluster.Status.State - }, testTimeout, suite.TestInterval).Should(BeIdenticalTo(humiov1alpha1.HumioClusterStateRunning)) + }, testTimeout, suite.TestInterval).Should(BeIdenticalTo(humiov1alpha1.HumioClusterStateUpgrading)) suite.UsingClusterBy(key.Name, "Updating the cluster image successfully with working image") updatedImage = versions.DefaultHumioImageVersion() diff --git a/controllers/suite/clusters/suite_test.go b/controllers/suite/clusters/suite_test.go index e0593028c..c791aa098 100644 --- a/controllers/suite/clusters/suite_test.go +++ b/controllers/suite/clusters/suite_test.go @@ -340,12 +340,12 @@ func constructBasicMultiNodePoolHumioCluster(key types.NamespacedName, useAutoCr return toCreate } -func markPodAsPendingIfUsingEnvtest(ctx context.Context, client client.Client, pod corev1.Pod, clusterName string) error { +func markPodAsPendingUnschedulableIfUsingEnvtest(ctx context.Context, client client.Client, pod corev1.Pod, clusterName string) error { if !helpers.UseEnvtest() { return nil } - suite.UsingClusterBy(clusterName, fmt.Sprintf("Simulating Humio pod is marked Pending (podName %s, pod phase %s)", pod.Name, pod.Status.Phase)) + suite.UsingClusterBy(clusterName, fmt.Sprintf("Simulating Humio pod is marked Pending Unschedulable (podName %s, pod phase %s)", pod.Name, pod.Status.Phase)) pod.Status.Conditions = []corev1.PodCondition{ { Type: corev1.PodScheduled, @@ -357,6 +357,32 @@ func markPodAsPendingIfUsingEnvtest(ctx context.Context, client client.Client, p return client.Status().Update(ctx, &pod) } +func markPodAsPendingImagePullBackOffIfUsingEnvtest(ctx context.Context, client client.Client, pod corev1.Pod, clusterName string) error { + if !helpers.UseEnvtest() { + return nil + } + + suite.UsingClusterBy(clusterName, fmt.Sprintf("Simulating Humio pod is marked Pending ImagePullBackOff (podName %s, pod phase %s)", pod.Name, pod.Status.Phase)) + pod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + }, + } + pod.Status.ContainerStatuses = []corev1.ContainerStatus{ + { + Name: controllers.HumioContainerName, + State: corev1.ContainerState{ + Waiting: &corev1.ContainerStateWaiting{ + Reason: "ImagePullBackOff", + }, + }, + }, + } + pod.Status.Phase = corev1.PodPending + return client.Status().Update(ctx, &pod) +} + func markPodsWithRevisionAsReadyIfUsingEnvTest(ctx context.Context, hnp *controllers.HumioNodePool, podRevision int, desiredReadyPodCount int) { if !helpers.UseEnvtest() { return @@ -458,7 +484,7 @@ func podPendingCountByRevision(ctx context.Context, hnp *controllers.HumioNodePo } } else { if nodeID+1 <= expectedPendingCount { - _ = markPodAsPendingIfUsingEnvtest(ctx, k8sClient, pod, hnp.GetClusterName()) + _ = markPodAsPendingUnschedulableIfUsingEnvtest(ctx, k8sClient, pod, hnp.GetClusterName()) revisionToPendingCount[revision]++ } }