From 758afa180715e2d36c9d6c72788e24322e126ca0 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Mon, 29 Jul 2024 15:05:37 +0300 Subject: [PATCH] OS upgrade implementation for multi node clusters (#25) * Generalise OS plan creation * Add OS worker plan * Add worker plan upgrade logic * Enable OS upgrades * Fix worker in progress condition message * Remove leftovers --- internal/controller/reconcile_os.go | 23 +++++++- internal/controller/upgradeplan_controller.go | 5 +- internal/upgrade/os.go | 53 +++++++++++++++---- 3 files changed, 65 insertions(+), 16 deletions(-) diff --git a/internal/controller/reconcile_os.go b/internal/controller/reconcile_os.go index 695ce8a..40fc254 100644 --- a/internal/controller/reconcile_os.go +++ b/internal/controller/reconcile_os.go @@ -15,7 +15,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -//lint:ignore U1000 - Temporary ignore "unused" linter error. Will be removed when function is ready to be used. func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, release *release.Release) (ctrl.Result, error) { secret, err := upgrade.OSUpgradeSecret(&release.Components.OperatingSystem) if err != nil { @@ -58,7 +57,27 @@ func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, upgradePlan *li return ctrl.Result{Requeue: true}, nil } - // TODO: worker upgrade + workerPlan := upgrade.OSWorkerPlan(release.ReleaseVersion, secret.Name, &release.Components.OperatingSystem) + if err = r.Get(ctx, client.ObjectKeyFromObject(workerPlan), workerPlan); err != nil { + if !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + setInProgressCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "Worker nodes are being upgraded") + return ctrl.Result{}, r.createPlan(ctx, upgradePlan, workerPlan) + } + + selector, err = metav1.LabelSelectorAsSelector(workerPlan.Spec.NodeSelector) + if err != nil { + return ctrl.Result{}, fmt.Errorf("parsing node selector: %w", err) + } + + if !isOSUpgraded(nodeList, selector, release.Components.OperatingSystem.PrettyName) { + setInProgressCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "Worker nodes are being upgraded") + return ctrl.Result{}, nil + } + + setSuccessfulCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "All cluster nodes are upgraded") return ctrl.Result{Requeue: true}, nil } diff --git a/internal/controller/upgradeplan_controller.go b/internal/controller/upgradeplan_controller.go index 5ae605d..e463e6a 100644 --- a/internal/controller/upgradeplan_controller.go +++ b/internal/controller/upgradeplan_controller.go @@ -98,9 +98,8 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li } switch { - // TODO: uncomment once OS upgrades support multi node clusters - // case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.OperatingSystemUpgradedCondition): - // return r.reconcileOS(ctx, upgradePlan, release) + case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.OperatingSystemUpgradedCondition): + return r.reconcileOS(ctx, upgradePlan, release) case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.KubernetesUpgradedCondition): return r.reconcileKubernetes(ctx, upgradePlan, &release.Components.Kubernetes) case !isHelmUpgradeFinished(upgradePlan, lifecyclev1alpha1.RancherUpgradedCondition): diff --git a/internal/upgrade/os.go b/internal/upgrade/os.go index d3e3522..2e22cb2 100644 --- a/internal/upgrade/os.go +++ b/internal/upgrade/os.go @@ -65,12 +65,9 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) } func OSControlPlanePlan(releaseVersion, secretName string, releaseOS *release.OperatingSystem) *upgradecattlev1.Plan { - const ( - planImage = "registry.suse.com/bci/bci-base:15.5" - ) - controlPlanePlanName := osPlanName(controlPlaneKey, releaseOS.ZypperID, releaseOS.Version) - controlPlanePlan := baseUpgradePlan(controlPlanePlanName) + controlPlanePlan := baseOSPlan(controlPlanePlanName, releaseVersion, secretName) + controlPlanePlan.Labels = map[string]string{ "os-upgrade": "control-plane", } @@ -107,25 +104,59 @@ func OSControlPlanePlan(releaseVersion, secretName string, releaseOS *release.Op }, } + return controlPlanePlan +} + +func OSWorkerPlan(releaseVersion, secretName string, releaseOS *release.OperatingSystem) *upgradecattlev1.Plan { + workerPlanName := osPlanName(workersKey, releaseOS.ZypperID, releaseOS.Version) + workerPlan := baseOSPlan(workerPlanName, releaseVersion, secretName) + + workerPlan.Labels = map[string]string{ + "os-upgrade": "worker", + } + + workerPlan.Spec.Concurrency = 2 + workerPlan.Spec.NodeSelector = &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: ControlPlaneLabel, + Operator: "NotIn", + Values: []string{ + "true", + }, + }, + }, + } + + return workerPlan +} + +func baseOSPlan(planName, releaseVersion, secretName string) *upgradecattlev1.Plan { + const ( + planImage = "registry.suse.com/bci/bci-base:15.5" + ) + + baseOSplan := baseUpgradePlan(planName) + secretPathRelativeToHost := fmt.Sprintf("/run/system-upgrade/secrets/%s", secretName) mountPath := filepath.Join("/host", secretPathRelativeToHost) - controlPlanePlan.Spec.Secrets = []upgradecattlev1.SecretSpec{ + baseOSplan.Spec.Secrets = []upgradecattlev1.SecretSpec{ { Name: secretName, Path: mountPath, }, } - controlPlanePlan.Spec.Cordon = true - controlPlanePlan.Spec.Version = releaseVersion + baseOSplan.Spec.Cordon = true + baseOSplan.Spec.Version = releaseVersion - controlPlanePlan.Spec.JobActiveDeadlineSecs = 3600 + baseOSplan.Spec.JobActiveDeadlineSecs = 3600 - controlPlanePlan.Spec.Upgrade = &upgradecattlev1.ContainerSpec{ + baseOSplan.Spec.Upgrade = &upgradecattlev1.ContainerSpec{ Image: planImage, Command: []string{"chroot", "/host"}, Args: []string{"sh", filepath.Join(secretPathRelativeToHost, scriptName)}, } - return controlPlanePlan + return baseOSplan } func osPlanName(typeKey, osName, osVersion string) string {