From e6d775bf2d6289026a60950b2d3cb04f83f4210b Mon Sep 17 00:00:00 2001 From: Mateus Oliveira Date: Fri, 6 Dec 2024 13:27:46 -0300 Subject: [PATCH] fix: CI flakes Signed-off-by: Mateus Oliveira --- .../controller/nonadminbackup_controller.go | 79 +---- .../nonadminbackup_controller_test.go | 321 ++++++++++-------- .../controller/nonadminrestore_controller.go | 71 ++-- .../nonadminrestore_controller_test.go | 130 +++---- 4 files changed, 289 insertions(+), 312 deletions(-) diff --git a/internal/controller/nonadminbackup_controller.go b/internal/controller/nonadminbackup_controller.go index 8841917b..2362cfe5 100644 --- a/internal/controller/nonadminbackup_controller.go +++ b/internal/controller/nonadminbackup_controller.go @@ -31,7 +31,6 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -159,7 +158,6 @@ func (r *NonAdminBackupReconciler) Reconcile(ctx context.Context, req ctrl.Reque // - bool: true if reconciliation should be requeued, false otherwise // - error: any error encountered during the process func (r *NonAdminBackupReconciler) setStatusAndConditionForDeletionAndCallDelete(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { - requeueRequired := false updatedPhase := updateNonAdminPhase(&nab.Status.Phase, nacv1alpha1.NonAdminPhaseDeleting) updatedCondition := meta.SetStatusCondition(&nab.Status.Conditions, metav1.Condition{ @@ -175,19 +173,10 @@ func (r *NonAdminBackupReconciler) setStatusAndConditionForDeletionAndCallDelete return false, err } logger.V(1).Info("NonAdminBackup status marked for deletion") - requeueRequired = true // Requeue to ensure latest NAB object in the next reconciliation steps } else { logger.V(1).Info("NonAdminBackup status unchanged during deletion") } - if nab.ObjectMeta.DeletionTimestamp.IsZero() { - logger.V(1).Info("Marking NonAdminBackup for deletion", constant.NameString, nab.Name) - if err := r.Delete(ctx, nab); err != nil { - logger.Error(err, "Failed to call Delete on the NonAdminBackup object") - return false, err - } - requeueRequired = true // Requeue to allow deletion to proceed - } - return requeueRequired, nil + return false, nil } // setStatusForDirectKubernetesAPIDeletion updates the status and conditions when a NonAdminBackup @@ -247,9 +236,7 @@ func (r *NonAdminBackupReconciler) setStatusForDirectKubernetesAPIDeletion(ctx c func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { // This function is called just after setStatusAndConditionForDeletionAndCallDelete - standard delete path, which already // requeued the reconciliation to get the latest NAB object. There is no need to fetch the latest NAB object here. - if !controllerutil.ContainsFinalizer(nab, constant.NabFinalizerName) || - nab.Status.VeleroBackup == nil || - nab.Status.VeleroBackup.NACUUID == constant.EmptyString { + if nab.Status.VeleroBackup == nil || nab.Status.VeleroBackup.NACUUID == constant.EmptyString { return false, nil } @@ -257,9 +244,7 @@ func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.C // For the ForceDelete we do not create DeleteBackupRequest veleroBackupNACUUID := nab.Status.VeleroBackup.NACUUID veleroBackup, err := function.GetVeleroBackupByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) - if err != nil { - // Log error if multiple VeleroBackup objects are found logger.Error(err, findSingleVBError, constant.UUIDString, veleroBackupNACUUID) return false, err } @@ -271,7 +256,6 @@ func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.C deleteBackupRequest, err := function.GetVeleroDeleteBackupRequestByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) if err != nil { - // Log error if multiple DeleteBackupRequest objects are found logger.Error(err, findSingleVDBRError, constant.UUIDString, veleroBackupNACUUID) return false, err } @@ -317,7 +301,7 @@ func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.C logger.V(1).Info("NonAdminBackup DeleteBackupRequest Status unchanged") } - return false, nil // Continue so initNabDeletion can initialize deletion of a NonAdminBackup object + return true, nil } // deleteVeleroBackupAndDeleteBackupRequestObjects deletes both the VeleroBackup and any associated @@ -338,9 +322,9 @@ func (r *NonAdminBackupReconciler) deleteVeleroBackupAndDeleteBackupRequestObjec return false, nil } + deleted := false veleroBackupNACUUID := nab.Status.VeleroBackup.NACUUID veleroBackup, err := function.GetVeleroBackupByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) - if err != nil { // Case where more than one VeleroBackup is found with the same label UUID // TODO (migi): Determine if all objects with this UUID should be deleted @@ -354,24 +338,26 @@ func (r *NonAdminBackupReconciler) deleteVeleroBackupAndDeleteBackupRequestObjec return false, err } logger.V(1).Info("VeleroBackup deletion initiated", constant.NameString, veleroBackup.Name) + deleted = true } else { logger.V(1).Info("VeleroBackup already deleted") } deleteBackupRequest, err := function.GetVeleroDeleteBackupRequestByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) if err != nil { - // Log error if multiple DeleteBackupRequest objects are found logger.Error(err, findSingleVDBRError, constant.UUIDString, veleroBackupNACUUID) return false, err } + if deleteBackupRequest != nil { if err = r.Delete(ctx, deleteBackupRequest); err != nil { logger.Error(err, "Failed to delete VeleroDeleteBackupRequest", constant.NameString, deleteBackupRequest.Name) return false, err } logger.V(1).Info("VeleroDeleteBackupRequest deletion initiated", constant.NameString, deleteBackupRequest.Name) + deleted = true } - return false, nil // Continue so initNabDeletion can initialize deletion of an NonAdminBackup object + return deleted, nil } // removeNabFinalizerUponVeleroBackupDeletion ensures the associated VeleroBackup object is deleted @@ -393,40 +379,19 @@ func (r *NonAdminBackupReconciler) deleteVeleroBackupAndDeleteBackupRequestObjec // - A boolean indicating whether to requeue the reconcile loop (true if waiting for VeleroBackup deletion). // - An error if any update operation or deletion check fails. func (r *NonAdminBackupReconciler) removeNabFinalizerUponVeleroBackupDeletion(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { - // We do not need to gather latest NAB object here: - // 1. the NAB object was already requeued by setStatusAndConditionForDeletionAndCallDelete - // 2. removal of NAB finalizer is the last step in the delete paths that will requeue the reconciliation if the - // corresponding VeleroBackup object is found based on the NACUUID stored in the NAB status for which we already - // have the latest NAB object (point 1 above). - if !nab.ObjectMeta.DeletionTimestamp.IsZero() { - if !nab.Spec.ForceDeleteBackup && nab.Status.VeleroBackup != nil && nab.Status.VeleroBackup.NACUUID != constant.EmptyString { - veleroBackupNACUUID := nab.Status.VeleroBackup.NACUUID - - veleroBackup, err := function.GetVeleroBackupByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) - if err != nil { - // Case in which more then one VeleroBackup is found with the same label UUID - // TODO (migi): Should we delete all of the objects with such UUID ? - logger.Error(err, findSingleVBError, constant.UUIDString, veleroBackupNACUUID) - return false, err - } - - if veleroBackup != nil { - logger.V(1).Info("Waiting for VeleroBackup to be deleted", constant.NameString, veleroBackupNACUUID) - return true, nil // Requeue - } - } - // VeleroBackup is deleted, proceed with deleting the NonAdminBackup - logger.V(1).Info("VeleroBackup deleted, removing NonAdminBackup finalizer") - - controllerutil.RemoveFinalizer(nab, constant.NabFinalizerName) - - if err := r.Update(ctx, nab); err != nil { - logger.Error(err, "Failed to remove finalizer from NonAdminBackup") + controllerutil.RemoveFinalizer(nab, constant.NabFinalizerName) + if err := r.Update(ctx, nab); err != nil { + logger.Error(err, "Failed to remove finalizer from NonAdminBackup") + return false, err + } + if nab.ObjectMeta.DeletionTimestamp.IsZero() { + logger.V(1).Info("Marking NonAdminBackup for deletion", constant.NameString, nab.Name) + if err := r.Delete(ctx, nab); err != nil { + logger.Error(err, "Failed to call Delete on the NonAdminBackup object") return false, err } - - logger.V(1).Info("NonAdminBackup finalizer removed and object deleted") } + logger.V(1).Info("NonAdminBackup finalizer removed and object deleted") return false, nil } @@ -530,14 +495,6 @@ func (r *NonAdminBackupReconciler) validateSpec(ctx context.Context, logger logr // // This function generates a UUID and stores it in the VeleroBackup status field of NonAdminBackup. func (r *NonAdminBackupReconciler) setBackupUUIDInStatus(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { - // Get the latest version of the NAB object just before checking if the NACUUID is set - // to ensure we do not miss any updates to the NAB object - nabOriginal := nab.DeepCopy() - if err := r.Get(ctx, types.NamespacedName{Name: nabOriginal.Name, Namespace: nabOriginal.Namespace}, nab); err != nil { - logger.Error(err, "Failed to re-fetch NonAdminBackup") - return false, err - } - if nab.Status.VeleroBackup == nil || nab.Status.VeleroBackup.NACUUID == constant.EmptyString { veleroBackupNACUUID := function.GenerateNacObjectUUID(nab.Namespace, nab.Name) nab.Status.VeleroBackup = &nacv1alpha1.VeleroBackup{ diff --git a/internal/controller/nonadminbackup_controller_test.go b/internal/controller/nonadminbackup_controller_test.go index b44ba3ae..2a9329e0 100644 --- a/internal/controller/nonadminbackup_controller_test.go +++ b/internal/controller/nonadminbackup_controller_test.go @@ -30,9 +30,9 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -63,6 +63,7 @@ type nonAdminBackupFullReconcileScenario struct { enforcedBackupSpec *velerov1.BackupSpec spec nacv1alpha1.NonAdminBackupSpec status nacv1alpha1.NonAdminBackupStatus + errorLogs int } func buildTestNonAdminBackup(nonAdminNamespace string, nonAdminName string, spec nacv1alpha1.NonAdminBackupSpec) *nacv1alpha1.NonAdminBackup { @@ -235,7 +236,13 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func }, nonAdminBackup, ) == nil { - gomega.Expect(k8sClient.Delete(ctx, nonAdminBackup)).To(gomega.Succeed()) + for _, finalizer := range nonAdminBackup.GetFinalizers() { + controllerutil.RemoveFinalizer(nonAdminBackup, finalizer) + } + gomega.Expect(k8sClient.Update(ctx, nonAdminBackup)).To(gomega.Succeed()) + if nonAdminBackup.GetDeletionTimestamp().IsZero() { + gomega.Expect(k8sClient.Delete(ctx, nonAdminBackup)).To(gomega.Succeed()) + } } gomega.Expect(deleteTestNamespaces(ctx, nonAdminObjectNamespace, oadpNamespace)).To(gomega.Succeed()) }) @@ -409,59 +416,59 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func }, }, nonAdminBackupExpectedDeleted: true, - result: reconcile.Result{Requeue: true}, - }), - ginkgo.Entry("When triggered by NonAdminBackup deleteNonAdmin spec field with Finalizer set, should not delete NonAdminBackup as it's waiting for finalizer to be removed", nonAdminBackupSingleReconcileScenario{ - addFinalizer: true, - nonAdminBackupSpec: nacv1alpha1.NonAdminBackupSpec{ - BackupSpec: &velerov1.BackupSpec{}, - DeleteBackup: true, - }, - nonAdminBackupPriorStatus: &nacv1alpha1.NonAdminBackupStatus{ - Phase: nacv1alpha1.NonAdminPhaseCreated, - Conditions: []metav1.Condition{ - { - Type: string(nacv1alpha1.NonAdminConditionAccepted), - Status: metav1.ConditionTrue, - Reason: "BackupAccepted", - Message: "backup accepted", - LastTransitionTime: metav1.NewTime(time.Now()), - }, - { - Type: string(nacv1alpha1.NonAdminConditionQueued), - Status: metav1.ConditionTrue, - Reason: "BackupScheduled", - Message: "Created Velero Backup object", - LastTransitionTime: metav1.NewTime(time.Now()), - }, - }, - }, - nonAdminBackupExpectedStatus: nacv1alpha1.NonAdminBackupStatus{ - Phase: nacv1alpha1.NonAdminPhaseDeleting, - Conditions: []metav1.Condition{ - { - Type: string(nacv1alpha1.NonAdminConditionAccepted), - Status: metav1.ConditionTrue, - Reason: "BackupAccepted", - Message: "backup accepted", - }, - { - Type: string(nacv1alpha1.NonAdminConditionQueued), - Status: metav1.ConditionTrue, - Reason: "BackupScheduled", - Message: "Created Velero Backup object", - }, - { - Type: string(nacv1alpha1.NonAdminConditionDeleting), - Status: metav1.ConditionTrue, - Reason: "DeletionPending", - Message: "backup accepted for deletion", - }, - }, - }, - nonAdminBackupExpectedDeleted: false, - result: reconcile.Result{Requeue: true}, + result: reconcile.Result{}, }), + // ginkgo.Entry("When triggered by NonAdminBackup deleteNonAdmin spec field with Finalizer set, should not delete NonAdminBackup as it's waiting for finalizer to be removed", nonAdminBackupSingleReconcileScenario{ + // nonAdminBackupSpec: nacv1alpha1.NonAdminBackupSpec{ + // BackupSpec: &velerov1.BackupSpec{}, + // DeleteBackup: true, + // }, + // nonAdminBackupPriorStatus: &nacv1alpha1.NonAdminBackupStatus{ + // Phase: nacv1alpha1.NonAdminPhaseCreated, + // Conditions: []metav1.Condition{ + // { + // Type: string(constant.NonAdminConditionAccepted), + // Status: metav1.ConditionTrue, + // Reason: "BackupAccepted", + // Message: "backup accepted", + // LastTransitionTime: metav1.NewTime(time.Now()), + // }, + // { + // Type: string(constant.NonAdminConditionQueued), + // Status: metav1.ConditionTrue, + // Reason: "BackupScheduled", + // Message: "Created Velero Backup object", + // LastTransitionTime: metav1.NewTime(time.Now()), + // }, + // }, + // }, + // nonAdminBackupExpectedStatus: nacv1alpha1.NonAdminBackupStatus{ + // Phase: nacv1alpha1.NonAdminPhaseDeleting, + // Conditions: []metav1.Condition{ + // { + // Type: string(constant.NonAdminConditionAccepted), + // Status: metav1.ConditionTrue, + // Reason: "BackupAccepted", + // Message: "backup accepted", + // }, + // { + // Type: string(constant.NonAdminConditionQueued), + // Status: metav1.ConditionTrue, + // Reason: "BackupScheduled", + // Message: "Created Velero Backup object", + // }, + // { + // Type: string(constant.NonAdminConditionDeleting), + // Status: metav1.ConditionTrue, + // Reason: "DeletionPending", + // Message: "backup accepted for deletion", + // }, + // }, + // }, + // nonAdminBackupExpectedDeleted: false, + // addFinalizer: true, + // result: reconcile.Result{}, + // }), TODO not real scenario ginkgo.Entry("When triggered by NonAdminBackup deleteNonAdmin spec field with Finalizer persisting, should not delete NonAdminBackup as it's waiting for Velero Backup object to be deleted", nonAdminBackupSingleReconcileScenario{ createVeleroBackup: true, addFinalizer: true, @@ -547,11 +554,9 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func }, }, nonAdminBackupExpectedDeleted: true, - result: reconcile.Result{Requeue: true}, + result: reconcile.Result{}, }), - ginkgo.Entry("When triggered by NonAdminBackup forceDeleteNonAdmin spec field with Finalizer set and NonAdminBackup phase Created without DeletionTimestamp, should trigger delete NonAdminBackup and requeue", nonAdminBackupSingleReconcileScenario{ - addFinalizer: true, - addNabDeletionTimestamp: false, + ginkgo.Entry("When triggered by NonAdminBackup forceDeleteNonAdmin spec field with Finalizer set and NonAdminBackup phase Created without DeletionTimestamp, should delete NonAdminBackup", nonAdminBackupSingleReconcileScenario{ nonAdminBackupSpec: nacv1alpha1.NonAdminBackupSpec{ BackupSpec: &velerov1.BackupSpec{}, ForceDeleteBackup: true, @@ -573,37 +578,21 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func Message: "Created Velero Backup object", LastTransitionTime: metav1.NewTime(time.Now()), }, - }, - }, - nonAdminBackupExpectedStatus: nacv1alpha1.NonAdminBackupStatus{ - Phase: nacv1alpha1.NonAdminPhaseDeleting, - Conditions: []metav1.Condition{ - { - Type: string(nacv1alpha1.NonAdminConditionAccepted), - Status: metav1.ConditionTrue, - Reason: "BackupAccepted", - Message: "backup accepted", - }, { - Type: string(nacv1alpha1.NonAdminConditionQueued), - Status: metav1.ConditionTrue, - Reason: "BackupScheduled", - Message: "Created Velero Backup object", - }, - { - Type: string(nacv1alpha1.NonAdminConditionDeleting), - Status: metav1.ConditionTrue, - Reason: "DeletionPending", - Message: "backup accepted for deletion", + Type: string(nacv1alpha1.NonAdminConditionDeleting), + Status: metav1.ConditionTrue, + Reason: "DeletionPending", + Message: "backup accepted for deletion", + LastTransitionTime: metav1.NewTime(time.Now()), }, }, }, - nonAdminBackupExpectedDeleted: false, - result: reconcile.Result{Requeue: true}, + addFinalizer: true, + addNabDeletionTimestamp: false, + nonAdminBackupExpectedDeleted: true, + result: reconcile.Result{}, }), - ginkgo.Entry("When triggered by NonAdminBackup forceDeleteNonAdmin spec field with Finalizer set, should delete NonAdminBackup and exit", nonAdminBackupSingleReconcileScenario{ - addFinalizer: true, - addNabDeletionTimestamp: true, + ginkgo.Entry("When triggered by NonAdminBackup forceDeleteNonAdmin spec field with deletion timestamp and velero backup, should delete velero backup and Requeue", nonAdminBackupSingleReconcileScenario{ nonAdminBackupSpec: nacv1alpha1.NonAdminBackupSpec{ BackupSpec: &velerov1.BackupSpec{}, ForceDeleteBackup: true, @@ -629,15 +618,40 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func Type: string(nacv1alpha1.NonAdminConditionDeleting), Status: metav1.ConditionTrue, Reason: "DeletionPending", - Message: "backup accepted for deletion", + Message: "backup deletion requires setting spec.deleteBackup or spec.forceDeleteBackup to true or finalizer removal", LastTransitionTime: metav1.NewTime(time.Now()), }, }, }, + nonAdminBackupExpectedStatus: nacv1alpha1.NonAdminBackupStatus{ + Phase: nacv1alpha1.NonAdminPhaseDeleting, + Conditions: []metav1.Condition{ + { + Type: string(nacv1alpha1.NonAdminConditionAccepted), + Status: metav1.ConditionTrue, + Reason: "BackupAccepted", + Message: "backup accepted", + }, + { + Type: string(nacv1alpha1.NonAdminConditionQueued), + Status: metav1.ConditionTrue, + Reason: "BackupScheduled", + Message: "Created Velero Backup object", + }, + { + Type: string(nacv1alpha1.NonAdminConditionDeleting), + Status: metav1.ConditionTrue, + Reason: "DeletionPending", + Message: "backup accepted for deletion", + }, + }, + }, createVeleroBackup: true, uuidFromTestCase: true, - nonAdminBackupExpectedDeleted: true, - result: reconcile.Result{Requeue: false}, + nonAdminBackupExpectedDeleted: false, + addFinalizer: true, + addNabDeletionTimestamp: true, + result: reconcile.Result{Requeue: true}, }), ginkgo.Entry("When triggered by Requeue(NonAdminBackup phase new), should update NonAdminBackup Phase to Created and Condition to Accepted True and NOT Requeue", nonAdminBackupSingleReconcileScenario{ nonAdminBackupSpec: nacv1alpha1.NonAdminBackupSpec{ @@ -841,8 +855,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", gomega.Expect(deleteTestNamespaces(ctx, nonAdminObjectNamespace, oadpNamespace)).To(gomega.Succeed()) cancel() - // wait cancel - time.Sleep(1 * time.Second) + // wait manager shutdown + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + shutdownlog := "INFO Wait completed, proceeding to shutdown the manager" + return strings.Contains(logOutput, shutdownlog) && strings.Count(logOutput, shutdownlog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) }) ginkgo.DescribeTable("Reconcile triggered by NonAdminBackup Create event", @@ -853,6 +871,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ Scheme: k8sClient.Scheme(), + Cache: cache.Options{ + DefaultNamespaces: map[string]cache.Config{ + nonAdminObjectNamespace: {}, + oadpNamespace: {}, + }, + }, }) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -874,53 +898,44 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run manager") }() // wait manager start - managerStartTimeout := 10 * time.Second - pollInterval := 100 * time.Millisecond - ctxTimeout, cancel := context.WithTimeout(ctx, managerStartTimeout) - defer cancel() - - err = wait.PollUntilContextTimeout(ctxTimeout, pollInterval, managerStartTimeout, true, func(ctx context.Context) (done bool, err error) { - select { - case <-ctx.Done(): - return false, ctx.Err() - default: - // Check if the manager has started by verifying if the client is initialized - return k8sManager.GetClient() != nil, nil - } - }) - // Check if the context timeout or another error occurred - gomega.Expect(err).ToNot(gomega.HaveOccurred(), "Manager failed to start within the timeout period") + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + startUplog := `INFO Starting workers {"controller": "nonadminbackup", "controllerGroup": "oadp.openshift.io", "controllerKind": "NonAdminBackup", "worker count": 1}` + return strings.Contains(logOutput, startUplog) && + strings.Count(logOutput, startUplog) == 1 && + !strings.Contains(logOutput, "DEBUG Accepted NAB Create event"), nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) ginkgo.By("Waiting Reconcile of create event") nonAdminBackup := buildTestNonAdminBackup(nonAdminObjectNamespace, nonAdminObjectName, scenario.spec) - gomega.Expect(k8sClient.Create(ctxTimeout, nonAdminBackup)).To(gomega.Succeed()) + gomega.Expect(k8sClient.Create(ctx, nonAdminBackup)).To(gomega.Succeed()) // wait NAB reconcile - time.Sleep(2 * time.Second) - - ginkgo.By("Fetching NonAdminBackup after Reconcile") - gomega.Expect(k8sClient.Get( - ctxTimeout, - types.NamespacedName{ - Name: nonAdminObjectName, - Namespace: nonAdminObjectNamespace, - }, - nonAdminBackup, - )).To(gomega.Succeed()) - - ginkgo.By("Validating NonAdminBackup Status") + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminObjectName, + Namespace: nonAdminObjectNamespace, + }, + nonAdminBackup, + ) + if err != nil { + return false, err + } + err = checkTestNonAdminBackupStatus(nonAdminBackup, scenario.status, oadpNamespace) + return err == nil, err + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) - gomega.Expect(checkTestNonAdminBackupStatus(nonAdminBackup, scenario.status, oadpNamespace)).To(gomega.Succeed()) + ginkgo.By("Checking if NonAdminBackup Spec was not changed") + gomega.Expect(reflect.DeepEqual( + nonAdminBackup.Spec, + scenario.spec, + )).To(gomega.BeTrue()) + veleroBackup := &velerov1.Backup{} if scenario.status.VeleroBackup != nil && len(nonAdminBackup.Status.VeleroBackup.NACUUID) > 0 { - ginkgo.By("Checking if NonAdminBackup Spec was not changed") - gomega.Expect(reflect.DeepEqual( - nonAdminBackup.Spec, - scenario.spec, - )).To(gomega.BeTrue()) - - veleroBackup := &velerov1.Backup{} gomega.Expect(k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminBackup.Status.VeleroBackup.NACUUID, Namespace: oadpNamespace, @@ -936,22 +951,17 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", } ginkgo.By("Simulating VeleroBackup update to finished state") - veleroBackup.Status = velerov1.BackupStatus{ Phase: velerov1.BackupPhaseCompleted, CompletionTimestamp: &metav1.Time{Time: time.Now()}, } - // can not call .Status().Update() for veleroBackup object https://github.com/vmware-tanzu/velero/issues/8285 - gomega.Expect(k8sClient.Update(ctxTimeout, veleroBackup)).To(gomega.Succeed()) - - ginkgo.By("VeleroBackup updated") + gomega.Expect(k8sClient.Update(ctx, veleroBackup)).To(gomega.Succeed()) // wait NAB reconcile - gomega.Eventually(func() (bool, error) { err := k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminObjectName, Namespace: nonAdminObjectNamespace, @@ -969,8 +979,44 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", } ginkgo.By("Waiting Reconcile of delete event") - gomega.Expect(k8sClient.Delete(ctxTimeout, nonAdminBackup)).To(gomega.Succeed()) - time.Sleep(1 * time.Second) + nonAdminBackup.Spec.ForceDeleteBackup = true + gomega.Expect(k8sClient.Update(ctx, nonAdminBackup)).To(gomega.Succeed()) + if scenario.status.VeleroBackup != nil && len(nonAdminBackup.Status.VeleroBackup.NACUUID) > 0 { + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminBackup.Status.VeleroBackup.Name, + Namespace: oadpNamespace, + }, + veleroBackup, + ) + if errors.IsNotFound(err) { + return true, nil + } + return false, err + }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + } + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminObjectName, + Namespace: nonAdminObjectNamespace, + }, + nonAdminBackup, + ) + if errors.IsNotFound(err) { + return true, nil + } + return false, err + }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + deletelog := "DEBUG Accepted NAB Delete event" + return strings.Contains(logOutput, deletelog) && strings.Count(logOutput, deletelog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Expect(strings.Count(ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput, "ERROR")).Should(gomega.Equal(scenario.errorLogs)) }, ginkgo.Entry("Should update NonAdminBackup until VeleroBackup completes and then delete it", nonAdminBackupFullReconcileScenario{ spec: nacv1alpha1.NonAdminBackupSpec{ @@ -1025,6 +1071,7 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", enforcedBackupSpec: &velerov1.BackupSpec{ SnapshotVolumes: ptr.To(false), }, + errorLogs: 1, }), ) }) diff --git a/internal/controller/nonadminrestore_controller.go b/internal/controller/nonadminrestore_controller.go index fa342575..2ea2cb9c 100644 --- a/internal/controller/nonadminrestore_controller.go +++ b/internal/controller/nonadminrestore_controller.go @@ -18,7 +18,7 @@ package controller import ( "context" - "errors" + "fmt" "reflect" "github.com/go-logr/logr" @@ -89,7 +89,6 @@ func (r *NonAdminRestoreReconciler) Reconcile(ctx context.Context, req ctrl.Requ reconcileSteps = []nonAdminRestoreReconcileStepFunction{ r.setStatusAndConditionForDeletionAndCallDelete, r.deleteVeleroRestore, - r.removeNarFinalizerUponVeleroRestoreDeletion, } default: logger.V(1).Info("Executing creation/update path") @@ -128,7 +127,6 @@ func (r *NonAdminRestoreReconciler) Reconcile(ctx context.Context, req ctrl.Requ // - bool: true if reconciliation should be requeued, false otherwise // - error: any error encountered during the process func (r *NonAdminRestoreReconciler) setStatusAndConditionForDeletionAndCallDelete(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - requeueRequired := false updatedPhase := updateNonAdminPhase(&nar.Status.Phase, nacv1alpha1.NonAdminPhaseDeleting) updatedCondition := meta.SetStatusCondition(&nar.Status.Conditions, metav1.Condition{ @@ -144,19 +142,10 @@ func (r *NonAdminRestoreReconciler) setStatusAndConditionForDeletionAndCallDelet return false, err } logger.V(1).Info("NonAdminRestore status marked for deletion") - requeueRequired = true // Requeue to ensure latest NAR object in the next reconciliation steps } else { logger.V(1).Info("NonAdminRestore status unchanged during deletion") } - if nar.ObjectMeta.DeletionTimestamp.IsZero() { - logger.V(1).Info("Marking NonAdminRestore for deletion", constant.NameString, nar.Name) - if err := r.Delete(ctx, nar); err != nil { - logger.Error(err, "Failed to call Delete on the NonAdminRestore object") - return false, err - } - requeueRequired = true // Requeue to allow deletion to proceed - } - return requeueRequired, nil + return false, nil } func (r *NonAdminRestoreReconciler) deleteVeleroRestore(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { @@ -167,9 +156,7 @@ func (r *NonAdminRestoreReconciler) deleteVeleroRestore(ctx context.Context, log veleroRestoreNACUUID := nar.Status.VeleroRestore.NACUUID veleroRestore, err := function.GetVeleroRestoreByLabel(ctx, r.Client, r.OADPNamespace, veleroRestoreNACUUID) - if err != nil { - // Case in which more then one VeleroRestore is found with the same label NACUUID logger.Error(err, findSingleVRError, constant.UUIDString, veleroRestoreNACUUID) return false, err } @@ -185,32 +172,8 @@ func (r *NonAdminRestoreReconciler) deleteVeleroRestore(ctx context.Context, log logger.V(1).Info("VeleroRestore deletion initiated", constant.NameString, veleroRestore.Name) } else { logger.V(1).Info("VeleroRestore already deleted") - } - return false, nil // Continue -} - -func (r *NonAdminRestoreReconciler) removeNarFinalizerUponVeleroRestoreDeletion(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - if !nar.ObjectMeta.DeletionTimestamp.IsZero() { - if nar.Status.VeleroRestore != nil && nar.Status.VeleroRestore.NACUUID != constant.EmptyString { - veleroRestoreNACUUID := nar.Status.VeleroRestore.NACUUID - - veleroRestore, err := function.GetVeleroRestoreByLabel(ctx, r.Client, r.OADPNamespace, veleroRestoreNACUUID) - if err != nil { - // Case in which more then one VeleroRestore is found with the same label UUID - logger.Error(err, findSingleVRError, constant.UUIDString, veleroRestoreNACUUID) - return false, err - } - - if veleroRestore != nil { - logger.V(1).Info("Waiting for VeleroRestore to be deleted", constant.NameString, veleroRestoreNACUUID) - return true, nil // Requeue - } - } - // VeleroRestore is deleted, proceed with deleting the NonAdminRestore - logger.V(1).Info("VeleroRestore deleted, removing NonAdminRestore finalizer") controllerutil.RemoveFinalizer(nar, constant.NarFinalizerName) - if err := r.Update(ctx, nar); err != nil { logger.Error(err, "Failed to remove finalizer from NonAdminRestore") return false, err @@ -277,14 +240,6 @@ func (r *NonAdminRestoreReconciler) validateSpec(ctx context.Context, logger log } func (r *NonAdminRestoreReconciler) setUUID(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - // Get the latest version of the NAR object just before checking if the NACUUID is set - // to ensure we do not miss any updates to the NAR object - narOriginal := nar.DeepCopy() - if err := r.Get(ctx, types.NamespacedName{Name: narOriginal.Name, Namespace: narOriginal.Namespace}, nar); err != nil { - logger.Error(err, "Failed to re-fetch NonAdminRestore") - return false, err - } - if nar.Status.VeleroRestore == nil || nar.Status.VeleroRestore.NACUUID == constant.EmptyString { veleroRestoreNACUUID := function.GenerateNacObjectUUID(nar.Namespace, nar.Name) nar.Status.VeleroRestore = &nacv1alpha1.VeleroRestore{ @@ -318,14 +273,9 @@ func (r *NonAdminRestoreReconciler) setFinalizer(ctx context.Context, logger log } func (r *NonAdminRestoreReconciler) createVeleroRestore(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - if nar.Status.VeleroRestore == nil || nar.Status.VeleroRestore.NACUUID == constant.EmptyString { - return false, errors.New("unable to get Velero Restore UUID from NonAdminRestore Status") - } - veleroRestoreNACUUID := nar.Status.VeleroRestore.NACUUID veleroRestore, err := function.GetVeleroRestoreByLabel(ctx, r.Client, r.OADPNamespace, veleroRestoreNACUUID) - if err != nil { // Case in which more then one VeleroBackup is found with the same label UUID logger.Error(err, findSingleVRError, constant.UUIDString, veleroRestoreNACUUID) @@ -334,6 +284,23 @@ func (r *NonAdminRestoreReconciler) createVeleroRestore(ctx context.Context, log if veleroRestore == nil { logger.Info("VeleroRestore with label not found, creating one", constant.UUIDString, veleroRestoreNACUUID) + if nar.Status.Phase == nacv1alpha1.NonAdminPhaseCreated { + updatedCondition := meta.SetStatusCondition(&nar.Status.Conditions, + metav1.Condition{ + Type: "Restorable", + Status: metav1.ConditionFalse, + Reason: "RestoreDeleted", + Message: "Deleted Velero Restore object", + }, + ) + if updatedCondition { + if updateErr := r.Status().Update(ctx, nar); updateErr != nil { + logger.Error(updateErr, nonAdminRestoreStatusUpdateFailureMessage) + return false, updateErr + } + } + return false, reconcile.TerminalError(fmt.Errorf("VeleroRestore was deleted")) + } nab := &nacv1alpha1.NonAdminBackup{} err = r.Get(ctx, types.NamespacedName{Name: nar.Spec.RestoreSpec.BackupName, Namespace: nar.Namespace}, nab) if err != nil { diff --git a/internal/controller/nonadminrestore_controller_test.go b/internal/controller/nonadminrestore_controller_test.go index 4b2efbd1..52879a63 100644 --- a/internal/controller/nonadminrestore_controller_test.go +++ b/internal/controller/nonadminrestore_controller_test.go @@ -30,9 +30,9 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" nacv1alpha1 "github.com/migtools/oadp-non-admin/api/v1alpha1" "github.com/migtools/oadp-non-admin/internal/common/constant" @@ -47,6 +47,7 @@ type nonAdminRestoreFullReconcileScenario struct { spec nacv1alpha1.NonAdminRestoreSpec status nacv1alpha1.NonAdminRestoreStatus backupStatus nacv1alpha1.NonAdminBackupStatus + errorLogs int } func buildTestNonAdminRestore(nonAdminNamespace string, nonAdminName string, spec nacv1alpha1.NonAdminRestoreSpec) *nacv1alpha1.NonAdminRestore { @@ -168,8 +169,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" gomega.Expect(deleteTestNamespaces(ctx, nonAdminRestoreNamespace, oadpNamespace)).To(gomega.Succeed()) cancel() - // wait cancel - time.Sleep(1 * time.Second) + // wait manager shutdown + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + shutdownlog := "INFO Wait completed, proceeding to shutdown the manager" + return strings.Contains(logOutput, shutdownlog) && strings.Count(logOutput, shutdownlog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) }) ginkgo.DescribeTable("Reconcile triggered by NonAdminRestore Create event", @@ -185,6 +190,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ Scheme: k8sClient.Scheme(), + Cache: cache.Options{ + DefaultNamespaces: map[string]cache.Config{ + nonAdminRestoreNamespace: {}, + oadpNamespace: {}, + }, + }, }) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -206,53 +217,44 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run manager") }() // wait manager start - managerStartTimeout := 10 * time.Second - pollInterval := 100 * time.Millisecond - ctxTimeout, cancel := context.WithTimeout(ctx, managerStartTimeout) - defer cancel() - - err = wait.PollUntilContextTimeout(ctxTimeout, pollInterval, managerStartTimeout, true, func(ctx context.Context) (done bool, err error) { - select { - case <-ctx.Done(): - return false, ctx.Err() - default: - // Check if the manager has started by verifying if the client is initialized - return k8sManager.GetClient() != nil, nil - } - }) - // Check if the context timeout or another error occurred - gomega.Expect(err).ToNot(gomega.HaveOccurred(), "Manager failed to start within the timeout period") + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + startUplog := `INFO Starting workers {"controller": "nonadminrestore", "controllerGroup": "oadp.openshift.io", "controllerKind": "NonAdminRestore", "worker count": 1}` + return strings.Contains(logOutput, startUplog) && + strings.Count(logOutput, startUplog) == 1 && + !strings.Contains(logOutput, "DEBUG Accepted Create event"), nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) ginkgo.By("Waiting Reconcile of create event") nonAdminRestore := buildTestNonAdminRestore(nonAdminRestoreNamespace, nonAdminRestoreName, scenario.spec) - gomega.Expect(k8sClient.Create(ctxTimeout, nonAdminRestore)).To(gomega.Succeed()) + gomega.Expect(k8sClient.Create(ctx, nonAdminRestore)).To(gomega.Succeed()) // wait NonAdminRestore reconcile - time.Sleep(2 * time.Second) - - ginkgo.By("Fetching NonAdminRestore after Reconcile") - gomega.Expect(k8sClient.Get( - ctxTimeout, - types.NamespacedName{ - Name: nonAdminRestoreName, - Namespace: nonAdminRestoreNamespace, - }, - nonAdminRestore, - )).To(gomega.Succeed()) - - ginkgo.By("Validating NonAdminRestore Status") + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminRestoreName, + Namespace: nonAdminRestoreNamespace, + }, + nonAdminRestore, + ) + if err != nil { + return false, err + } + err = checkTestNonAdminRestoreStatus(nonAdminRestore, scenario.status) + return err == nil, err + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) - gomega.Expect(checkTestNonAdminRestoreStatus(nonAdminRestore, scenario.status)).To(gomega.Succeed()) + ginkgo.By("Checking if NonAdminRestore Spec was not changed") + gomega.Expect(reflect.DeepEqual( + nonAdminRestore.Spec, + scenario.spec, + )).To(gomega.BeTrue()) veleroRestore := &velerov1.Restore{} if scenario.status.VeleroRestore != nil && len(nonAdminRestore.Status.VeleroRestore.NACUUID) > 0 { - ginkgo.By("Checking if NonAdminRestore Spec was not changed") - gomega.Expect(reflect.DeepEqual( - nonAdminRestore.Spec, - scenario.spec, - )).To(gomega.BeTrue()) - gomega.Expect(k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminRestore.Status.VeleroRestore.Name, Namespace: oadpNamespace, @@ -268,19 +270,16 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" } ginkgo.By("Simulating Velero Restore update to finished state") - veleroRestore.Status = velerov1.RestoreStatus{ Phase: velerov1.RestorePhaseCompleted, } // can not call .Status().Update() for veleroRestore object https://github.com/vmware-tanzu/velero/issues/8285 - gomega.Expect(k8sClient.Update(ctxTimeout, veleroRestore)).To(gomega.Succeed()) - - ginkgo.By("Velero Restore updated") + gomega.Expect(k8sClient.Update(ctx, veleroRestore)).To(gomega.Succeed()) // wait NonAdminRestore reconcile gomega.Eventually(func() (bool, error) { err := k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminRestoreName, Namespace: nonAdminRestoreNamespace, @@ -298,25 +297,11 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" } ginkgo.By("Waiting NonAdminRestore deletion") - gomega.Expect(k8sClient.Delete(ctxTimeout, nonAdminRestore)).To(gomega.Succeed()) - gomega.Eventually(func() (bool, error) { - err := k8sClient.Get( - ctxTimeout, - types.NamespacedName{ - Name: nonAdminRestoreName, - Namespace: nonAdminRestoreNamespace, - }, - nonAdminRestore, - ) - if apierrors.IsNotFound(err) { - return true, nil - } - return false, err - }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Expect(k8sClient.Delete(ctx, nonAdminRestore)).To(gomega.Succeed()) if scenario.status.VeleroRestore != nil && len(nonAdminRestore.Status.VeleroRestore.NACUUID) > 0 { gomega.Eventually(func() (bool, error) { err := k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminRestore.Status.VeleroRestore.Name, Namespace: oadpNamespace, @@ -329,6 +314,26 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" return false, err }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) } + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminRestoreName, + Namespace: nonAdminRestoreNamespace, + }, + nonAdminRestore, + ) + if apierrors.IsNotFound(err) { + return true, nil + } + return false, err + }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + deletelog := `DEBUG Accepted Delete event {"NonAdminRestorePredicate"` + return strings.Contains(logOutput, deletelog) && strings.Count(logOutput, deletelog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Expect(strings.Count(ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput, "ERROR")).Should(gomega.Equal(scenario.errorLogs)) }, ginkgo.Entry("Should update NonAdminRestore until Velero Restore completes and then delete it", nonAdminRestoreFullReconcileScenario{ spec: nacv1alpha1.NonAdminRestoreSpec{ @@ -417,6 +422,7 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" }, }, }, + errorLogs: 1, }), ) })