diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go index c2cb05411fd9..ff7b242db78a 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go @@ -279,12 +279,7 @@ func (r *KubeadmConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, nil } - res, err := r.reconcile(ctx, scope, cluster, config, configOwner) - if err != nil && errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return res, err + return r.reconcile(ctx, scope, cluster, config, configOwner) } func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, cluster *clusterv1.Cluster, config *bootstrapv1.KubeadmConfig, configOwner *bsutil.ConfigOwner) (ctrl.Result, error) { diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index 0f04983fa61a..00d2d111a3d7 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -255,21 +255,11 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl. if !kcp.ObjectMeta.DeletionTimestamp.IsZero() { // Handle deletion reconciliation loop. - res, err = r.reconcileDelete(ctx, controlPlane) - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return res, err + return r.reconcileDelete(ctx, controlPlane) } // Handle normal reconciliation loop. - res, err = r.reconcile(ctx, controlPlane) - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return res, err + return r.reconcile(ctx, controlPlane) } // initControlPlaneScope initializes the control plane scope; this includes also checking for orphan machines and diff --git a/exp/addons/internal/controllers/clusterresourceset_controller.go b/exp/addons/internal/controllers/clusterresourceset_controller.go index 6ca5bf3c7eaa..394cd7249355 100644 --- a/exp/addons/internal/controllers/clusterresourceset_controller.go +++ b/exp/addons/internal/controllers/clusterresourceset_controller.go @@ -183,17 +183,9 @@ func (r *ClusterResourceSetReconciler) Reconcile(ctx context.Context, req ctrl.R } errs := []error{} - errClusterNotConnectedOccurred := false for _, cluster := range clusters { if err := r.ApplyClusterResourceSet(ctx, cluster, clusterResourceSet); err != nil { - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - errClusterNotConnectedOccurred = true - } else { - // Append the error if the error is not ErrClusterLocked. - errs = append(errs, err) - } + errs = append(errs, err) } } @@ -218,13 +210,6 @@ func (r *ClusterResourceSetReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, kerrors.NewAggregate(errs) } - // Requeue if ErrClusterNotConnected was returned for one of the clusters. - if errClusterNotConnectedOccurred { - // Requeue after a minute to not end up in exponential delayed requeue which - // could take up to 16m40s. - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return ctrl.Result{}, nil } diff --git a/exp/internal/controllers/machinepool_controller.go b/exp/internal/controllers/machinepool_controller.go index b3fdf3031b82..c85528d22328 100644 --- a/exp/internal/controllers/machinepool_controller.go +++ b/exp/internal/controllers/machinepool_controller.go @@ -228,14 +228,7 @@ func (r *MachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Handle deletion reconciliation loop. if !mp.ObjectMeta.DeletionTimestamp.IsZero() { - err := r.reconcileDelete(ctx, cluster, mp) - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - - return ctrl.Result{}, err + return ctrl.Result{}, r.reconcileDelete(ctx, cluster, mp) } // Handle normal reconciliation loop. @@ -243,13 +236,7 @@ func (r *MachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) cluster: cluster, machinePool: mp, } - res, err := r.reconcile(ctx, scope) - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return res, err + return r.reconcile(ctx, scope) } func (r *MachinePoolReconciler) reconcile(ctx context.Context, s *scope) (ctrl.Result, error) { diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 739400ef8005..a63977a25d79 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -201,8 +201,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re return ctrl.Result{}, err } - log := ctrl.LoggerFrom(ctx).WithValues("Cluster", klog.KRef(m.Namespace, m.Spec.ClusterName)) - ctx = ctrl.LoggerInto(ctx, log) + ctx = ctrl.LoggerInto(ctx, ctrl.LoggerFrom(ctx).WithValues("Cluster", klog.KRef(m.Namespace, m.Spec.ClusterName))) // Add finalizer first if not set to avoid the race condition between init and delete. if finalizerAdded, err := finalizers.EnsureFinalizer(ctx, r.Client, m, clusterv1.MachineFinalizer); err != nil || finalizerAdded { @@ -211,7 +210,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re // AddOwners adds the owners of Machine as k/v pairs to the logger. // Specifically, it will add KubeadmControlPlane, MachineSet and MachineDeployment. - ctx, log, err := clog.AddOwners(ctx, r.Client, m) + ctx, _, err := clog.AddOwners(ctx, r.Client, m) if err != nil { return ctrl.Result{}, err } @@ -276,23 +275,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re r.reconcileDelete, ) - res, err := doReconcile(ctx, reconcileDelete, s) - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return res, err + return doReconcile(ctx, reconcileDelete, s) } // Handle normal reconciliation loop. - res, err := doReconcile(ctx, alwaysReconcile, s) - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return res, err + return doReconcile(ctx, alwaysReconcile, s) } func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error { @@ -819,14 +806,7 @@ func (r *Reconciler) drainNode(ctx context.Context, s *scope) (ctrl.Result, erro remoteClient, err := r.ClusterCache.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing drain Node because connection to the workload cluster is down") - s.deletingReason = clusterv1.MachineDeletingDrainingNodeV1Beta2Reason - s.deletingMessage = "Requeuing drain Node because connection to the workload cluster is down" - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - log.Error(err, "Error creating a remote client for cluster while draining Node, won't retry") - return ctrl.Result{}, nil + return ctrl.Result{}, errors.Wrapf(err, "failed to drain Node %s", nodeName) } node := &corev1.Node{} @@ -992,15 +972,9 @@ func (r *Reconciler) shouldWaitForNodeVolumes(ctx context.Context, s *scope) (ct } func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error { - log := ctrl.LoggerFrom(ctx) - remoteClient, err := r.ClusterCache.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { - if errors.Is(err, clustercache.ErrClusterNotConnected) { - return errors.Wrapf(err, "failed deleting Node because connection to the workload cluster is down") - } - log.Error(err, "Error creating a remote client for cluster while deleting Node, won't retry") - return nil + return errors.Wrapf(err, "failed deleting Node because connection to the workload cluster is down") } node := &corev1.Node{ diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go index dfe35f1c2754..6e87319f2a96 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go @@ -188,19 +188,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName - result, err := r.reconcile(ctx, log, cluster, m) - if err != nil { - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - - // Requeue immediately if any errors occurred - return ctrl.Result{}, err - } - - return result, nil + return r.reconcile(ctx, log, cluster, m) } func (r *Reconciler) reconcile(ctx context.Context, logger logr.Logger, cluster *clusterv1.Cluster, m *clusterv1.MachineHealthCheck) (ctrl.Result, error) { diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index a3eff8ad1a23..2e63405ab69d 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -168,8 +168,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct return ctrl.Result{}, err } - log := ctrl.LoggerFrom(ctx).WithValues("Cluster", klog.KRef(machineSet.Namespace, machineSet.Spec.ClusterName)) - ctx = ctrl.LoggerInto(ctx, log) + ctx = ctrl.LoggerInto(ctx, ctrl.LoggerFrom(ctx).WithValues("Cluster", klog.KRef(machineSet.Namespace, machineSet.Spec.ClusterName))) // Add finalizer first if not set to avoid the race condition between init and delete. if finalizerAdded, err := finalizers.EnsureFinalizer(ctx, r.Client, machineSet, clusterv1.MachineSetFinalizer); err != nil || finalizerAdded { @@ -178,7 +177,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct // AddOwners adds the owners of MachineSet as k/v pairs to the logger. // Specifically, it will add MachineDeployment. - ctx, log, err := clog.AddOwners(ctx, r.Client, machineSet) + ctx, _, err := clog.AddOwners(ctx, r.Client, machineSet) if err != nil { return ctrl.Result{}, err } @@ -257,20 +256,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct wrapErrMachineSetReconcileFunc(r.syncReplicas, "failed to sync replicas"), ) - result, kerr := doReconcile(ctx, s, reconcileNormal) - if kerr != nil { - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(kerr, clustercache.ErrClusterNotConnected) { - if len(kerr.Errors()) > 1 { - log.Error(kerr, "Requeuing because connection to the workload cluster is down") - } else { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - } - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - err = kerr - } - return result, err + return doReconcile(ctx, s, reconcileNormal) } type scope struct { diff --git a/internal/controllers/topology/cluster/cluster_controller.go b/internal/controllers/topology/cluster/cluster_controller.go index cf7ae76d3344..6100a4f3c7f2 100644 --- a/internal/controllers/topology/cluster/cluster_controller.go +++ b/internal/controllers/topology/cluster/cluster_controller.go @@ -278,8 +278,6 @@ func (r *Reconciler) SetupForDryRun(recorder record.EventRecorder) { } func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { - log := ctrl.LoggerFrom(ctx) - // Fetch the Cluster instance. cluster := &clusterv1.Cluster{} if err := r.Client.Get(ctx, req.NamespacedName, cluster); err != nil { @@ -340,15 +338,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } // Handle normal reconciliation loop. - result, err := r.reconcile(ctx, s) - if err != nil { - // Requeue if the reconcile failed because connection to workload cluster was down. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - } - return result, err + return r.reconcile(ctx, s) } // reconcile handles cluster reconciliation. diff --git a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go index 03de44628a2a..ee39c9955593 100644 --- a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go @@ -27,6 +27,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -70,8 +71,7 @@ type DockerMachineReconciler struct { // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch // Reconcile handles DockerMachine events. -func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, rerr error) { - log := ctrl.LoggerFrom(ctx) +func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { ctx = container.RuntimeInto(ctx, r.ContainerRuntime) // Fetch the DockerMachine instance. @@ -150,10 +150,7 @@ func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques // Always attempt to Patch the DockerMachine object and status after each reconciliation. defer func() { if err := patchDockerMachine(ctx, patchHelper, dockerMachine); err != nil { - log.Error(err, "Failed to patch DockerMachine") - if rerr == nil { - rerr = err - } + reterr = kerrors.NewAggregate([]error{reterr, err}) } }() @@ -188,14 +185,7 @@ func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques } // Handle non-deleted machines - res, err := r.reconcileNormal(ctx, cluster, dockerCluster, machine, dockerMachine, externalMachine, externalLoadBalancer) - // Requeue if the reconcile failed because the ClusterCacheTracker was locked for - // the current cluster because of concurrent access. - if errors.Is(err, clustercache.ErrClusterNotConnected) { - log.V(5).Info("Requeuing because connection to the workload cluster is down") - return ctrl.Result{RequeueAfter: time.Minute}, nil - } - return res, err + return r.reconcileNormal(ctx, cluster, dockerCluster, machine, dockerMachine, externalMachine, externalLoadBalancer) } func patchDockerMachine(ctx context.Context, patchHelper *patch.Helper, dockerMachine *infrav1.DockerMachine) error {