From 6e9f5e4e4dc2ef7c1e85b715514ff465f8c5b931 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Tue, 23 Jul 2024 17:46:49 +0300 Subject: [PATCH 01/16] Add watch for secrets --- config/rbac/role.yaml | 1 + internal/controller/upgradeplan_controller.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 71093fa..3238517 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -20,6 +20,7 @@ rules: - delete - get - list + - watch - apiGroups: - batch resources: diff --git a/internal/controller/upgradeplan_controller.go b/internal/controller/upgradeplan_controller.go index 08e457e..ef3bd95 100644 --- a/internal/controller/upgradeplan_controller.go +++ b/internal/controller/upgradeplan_controller.go @@ -58,7 +58,7 @@ type UpgradePlanReconciler struct { // +kubebuilder:rbac:groups=lifecycle.suse.com,resources=upgradeplans/finalizers,verbs=update // +kubebuilder:rbac:groups=upgrade.cattle.io,resources=plans,verbs=create;list;get;watch // +kubebuilder:rbac:groups="",resources=nodes,verbs=watch;list -// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;delete;create +// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;delete;create;watch // +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch // +kubebuilder:rbac:groups=batch,resources=jobs/status,verbs=get From c68ffd5fea5919c5bfbffa4f4a9d9fbbf0af237a Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Tue, 23 Jul 2024 17:47:57 +0300 Subject: [PATCH 02/16] Move control-plane/worker keys to base --- internal/upgrade/base.go | 6 ++++-- internal/upgrade/kubernetes.go | 3 --- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/internal/upgrade/base.go b/internal/upgrade/base.go index 66cee69..5a7e0b7 100644 --- a/internal/upgrade/base.go +++ b/internal/upgrade/base.go @@ -7,8 +7,10 @@ import ( ) const ( - planNamespace = "cattle-system" - PlanAnnotation = "lifecycle.suse.com/upgrade-plan" + planNamespace = "cattle-system" + PlanAnnotation = "lifecycle.suse.com/upgrade-plan" + controlPlaneKey = "control-plane" + workersKey = "workers" ControlPlaneLabel = "node-role.kubernetes.io/control-plane" ) diff --git a/internal/upgrade/kubernetes.go b/internal/upgrade/kubernetes.go index 4bcde12..1b9c91f 100644 --- a/internal/upgrade/kubernetes.go +++ b/internal/upgrade/kubernetes.go @@ -12,9 +12,6 @@ import ( const ( rke2UpgradeImage = "rancher/rke2-upgrade" k3sUpgradeImage = "rancher/k3s-upgrade" - - controlPlaneKey = "control-plane" - workersKey = "workers" ) func kubernetesPlanName(typeKey, version string) string { From c4496cfd63c95f50fab80b59da00cbe07f4a618c Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Tue, 23 Jul 2024 17:48:40 +0300 Subject: [PATCH 03/16] Introduce OS control-plane plan object setup --- internal/upgrade/os.go | 84 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/internal/upgrade/os.go b/internal/upgrade/os.go index 36f5878..db2e207 100644 --- a/internal/upgrade/os.go +++ b/internal/upgrade/os.go @@ -4,22 +4,25 @@ import ( "bytes" _ "embed" "fmt" + "path/filepath" + "strings" "text/template" + upgradecattlev1 "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io/v1" "github.com/suse-edge/upgrade-controller/pkg/release" corev1 "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + scriptName = "os-upgrade.sh" + secretName = "os-upgrade-secret" ) //go:embed templates/os-upgrade.sh.tpl var osUpgradeScript string func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) { - const ( - scriptName = "os-upgrade.sh" - secretName = "os-upgrade-secret" - ) - tmpl, err := template.New(scriptName).Parse(osUpgradeScript) if err != nil { return nil, fmt.Errorf("parsing contents: %w", err) @@ -45,7 +48,7 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) } secret := &corev1.Secret{ - ObjectMeta: v1.ObjectMeta{ + ObjectMeta: metav1.ObjectMeta{ Name: secretName, Namespace: planNamespace, }, @@ -57,3 +60,70 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) return secret, nil } + +func OSControlPlanePlan(release *release.Release) *upgradecattlev1.Plan { + const ( + planImage = "registry.suse.com/bci/bci-base:15.5" + ) + + controlPlanePlanName := osPlanName(controlPlaneKey, release.Components.OperatingSystem.ZypperID, release.Components.OperatingSystem.Version) + + controlPlanePlan := baseUpgradePlan(controlPlanePlanName) + controlPlanePlan.Labels = map[string]string{ + "os-upgrade": "control-plane", + } + controlPlanePlan.Spec.Concurrency = 1 + controlPlanePlan.Spec.NodeSelector = &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: ControlPlaneLabel, + Operator: "In", + Values: []string{ + "true", + }, + }, + }, + } + controlPlanePlan.Spec.Tolerations = []corev1.Toleration{ + { + Key: "CriticalAddonsOnly", + Operator: "Equal", + Value: "true", + Effect: "NoExecute", + }, + { + Key: ControlPlaneLabel, + Operator: "Equal", + Value: "", + Effect: "NoSchedule", + }, + { + Key: "node-role.kubernetes.io/etcd", + Operator: "Equal", + Value: "", + Effect: "NoExecute", + }, + } + + secretPathRelativeToHost := fmt.Sprintf("/run/system-upgrade/secrets/%s", secretName) + mountPath := filepath.Join("/host", secretPathRelativeToHost) + controlPlanePlan.Spec.Secrets = []upgradecattlev1.SecretSpec{ + { + Name: secretName, + Path: mountPath, + }, + } + controlPlanePlan.Spec.Cordon = true + controlPlanePlan.Spec.Version = release.ReleaseVersion + + controlPlanePlan.Spec.Upgrade = &upgradecattlev1.ContainerSpec{ + Image: planImage, + Command: []string{"chroot", "/host"}, + Args: []string{"sh", filepath.Join(secretPathRelativeToHost, scriptName)}, + } + return controlPlanePlan +} + +func osPlanName(typeKey, osName, osVersion string) string { + return fmt.Sprintf("%s-%s-%s", typeKey, strings.ToLower(osName), strings.ReplaceAll(osVersion, ".", "-")) +} From aead1cc90f67c9f42d2188e4010e3549efcfb73e Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:07:10 +0300 Subject: [PATCH 04/16] Add basic control-plane Plan deployment logic --- internal/controller/reconcile_os.go | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/internal/controller/reconcile_os.go b/internal/controller/reconcile_os.go index 5155397..46ab6d3 100644 --- a/internal/controller/reconcile_os.go +++ b/internal/controller/reconcile_os.go @@ -4,23 +4,38 @@ import ( "context" "fmt" + lifecyclev1alpha1 "github.com/suse-edge/upgrade-controller/api/v1alpha1" "github.com/suse-edge/upgrade-controller/internal/upgrade" "github.com/suse-edge/upgrade-controller/pkg/release" + "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ) //lint:ignore U1000 - Temporary ignore "unused" linter error. Will be removed when function is ready to be used. -func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, releaseOS *release.OperatingSystem) (ctrl.Result, error) { - secret, err := upgrade.OSUpgradeSecret(releaseOS) +func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, release *release.Release) (ctrl.Result, error) { + secret, err := upgrade.OSUpgradeSecret(&release.Components.OperatingSystem) if err != nil { return ctrl.Result{}, fmt.Errorf("generating OS upgrade secret: %w", err) } - if err = r.Create(ctx, secret); err != nil { - return ctrl.Result{}, fmt.Errorf("creating OS upgrade secret: %w", err) + if err = r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil { + if !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + return ctrl.Result{}, r.createSecret(ctx, upgradePlan, secret) } - // TODO: OS upgrade logic + controlPlanePlan := upgrade.OSControlPlanePlan(release.ReleaseVersion, &release.Components.OperatingSystem) + if err = r.Get(ctx, client.ObjectKeyFromObject(controlPlanePlan), controlPlanePlan); err != nil { + if !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + setInProgressCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "Control plane nodes are being upgraded") + return ctrl.Result{}, r.createPlan(ctx, upgradePlan, controlPlanePlan) + } return ctrl.Result{Requeue: true}, nil } From e94407920763c5686e6a49523bb316727f3f81b5 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:09:29 +0300 Subject: [PATCH 05/16] Add OS upgrade validation for single clusters --- internal/controller/reconcile_os.go | 47 +++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/internal/controller/reconcile_os.go b/internal/controller/reconcile_os.go index 46ab6d3..d0cfc15 100644 --- a/internal/controller/reconcile_os.go +++ b/internal/controller/reconcile_os.go @@ -7,7 +7,10 @@ import ( lifecyclev1alpha1 "github.com/suse-edge/upgrade-controller/api/v1alpha1" "github.com/suse-edge/upgrade-controller/internal/upgrade" "github.com/suse-edge/upgrade-controller/pkg/release" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -37,5 +40,49 @@ func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, upgradePlan *li return ctrl.Result{}, r.createPlan(ctx, upgradePlan, controlPlanePlan) } + selector, err := metav1.LabelSelectorAsSelector(controlPlanePlan.Spec.NodeSelector) + if err != nil { + return ctrl.Result{}, fmt.Errorf("parsing node selector: %w", err) + } + + nodeList := &corev1.NodeList{} + if err := r.List(ctx, nodeList); err != nil { + return ctrl.Result{}, fmt.Errorf("listing nodes: %w", err) + } + + if !isOSUpgraded(nodeList, selector, release.Components.OperatingSystem.PrettyName) { + return ctrl.Result{}, nil + } else if controlPlaneOnlyCluster(nodeList) { + setSuccessfulCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "All cluster nodes are upgraded") + return ctrl.Result{Requeue: true}, nil + } + + // TODO: worker upgrade return ctrl.Result{Requeue: true}, nil } + +func isOSUpgraded(nodeList *corev1.NodeList, selector labels.Selector, osPrettyName string) bool { + for _, node := range nodeList.Items { + if !selector.Matches(labels.Set(node.Labels)) { + continue + } + + var nodeReadyStatus corev1.ConditionStatus + + for _, condition := range node.Status.Conditions { + if condition.Type == corev1.NodeReady { + nodeReadyStatus = condition.Status + break + } + } + + if nodeReadyStatus != corev1.ConditionTrue || node.Spec.Unschedulable || node.Status.NodeInfo.OSImage != osPrettyName { + // Upgrade is still in progress. + // TODO: Adjust to looking at the `Complete` condition of the + // `plans.upgrade.cattle.io` resources once system-upgrade-controller v0.13.4 is released. + return false + } + } + + return true +} From cc35034ea737109b3ab29eb708616119d9b1e38c Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:12:44 +0300 Subject: [PATCH 06/16] Generalise create and record event logic for plans and secrets --- internal/controller/upgradeplan_controller.go | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/internal/controller/upgradeplan_controller.go b/internal/controller/upgradeplan_controller.go index ef3bd95..ea39d6b 100644 --- a/internal/controller/upgradeplan_controller.go +++ b/internal/controller/upgradeplan_controller.go @@ -109,24 +109,40 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li return ctrl.Result{}, nil } -func (r *UpgradePlanReconciler) recordCreatedPlan(upgradePlan *lifecyclev1alpha1.UpgradePlan, name, namespace string) { - r.Recorder.Eventf(upgradePlan, corev1.EventTypeNormal, "PlanCreated", "Upgrade plan created: %s/%s", namespace, name) +func (r *UpgradePlanReconciler) createSecret(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, secret *corev1.Secret) error { + if err := r.createObject(ctx, upgradePlan, secret); err != nil { + return fmt.Errorf("creating secret: %w", err) + } + + r.recordCreatedObject(upgradePlan, "SecretCreated", fmt.Sprintf("Secret created: %s/%s", secret.Namespace, secret.Name)) + return nil } func (r *UpgradePlanReconciler) createPlan(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, plan *upgradecattlev1.Plan) error { - if err := ctrl.SetControllerReference(upgradePlan, plan, r.Scheme); err != nil { - return fmt.Errorf("setting controller reference: %w", err) + if err := r.createObject(ctx, upgradePlan, plan); err != nil { + return fmt.Errorf("creating upgrade plan: %w", err) } - if err := r.Create(ctx, plan); err != nil { - return fmt.Errorf("creating upgrade plan: %w", err) + r.recordCreatedObject(upgradePlan, "PlanCreated", fmt.Sprintf("Upgrade plan created: %s/%s", plan.Namespace, plan.Name)) + return nil +} + +func (r *UpgradePlanReconciler) createObject(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, obj client.Object) error { + if err := ctrl.SetControllerReference(upgradePlan, obj, r.Scheme); err != nil { + return fmt.Errorf("setting controller reference: %w", err) } - r.recordCreatedPlan(upgradePlan, plan.Name, plan.Namespace) + if err := r.Create(ctx, obj); err != nil { + return fmt.Errorf("creating object: %w", err) + } return nil } +func (r *UpgradePlanReconciler) recordCreatedObject(upgradePlan *lifecyclev1alpha1.UpgradePlan, reason, msg string) { + r.Recorder.Eventf(upgradePlan, corev1.EventTypeNormal, reason, msg) +} + func isHelmUpgradeFinished(plan *lifecyclev1alpha1.UpgradePlan, conditionType string) bool { condition := meta.FindStatusCondition(plan.Status.Conditions, conditionType) From 696b08a4cc7ed5878667a1aadb3ac95bed0905a4 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:14:07 +0300 Subject: [PATCH 07/16] Make sure initial plan creation does not trigger reconciliation --- internal/controller/upgradeplan_controller.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/controller/upgradeplan_controller.go b/internal/controller/upgradeplan_controller.go index ea39d6b..7f8f141 100644 --- a/internal/controller/upgradeplan_controller.go +++ b/internal/controller/upgradeplan_controller.go @@ -217,6 +217,9 @@ func (r *UpgradePlanReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&lifecyclev1alpha1.UpgradePlan{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). Owns(&upgradecattlev1.Plan{}, builder.WithPredicates(predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return false + }, UpdateFunc: func(e event.UpdateEvent) bool { // Upgrade plans are being constantly updated on every node change. // Ensure that the reconciliation only covers the scenarios From 23dd6f92632bfaf316a3390fffd47e3ef8567212 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:15:03 +0300 Subject: [PATCH 08/16] Keep track of secrets related to the UpgradePlan --- internal/controller/upgradeplan_controller.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/controller/upgradeplan_controller.go b/internal/controller/upgradeplan_controller.go index 7f8f141..c95be17 100644 --- a/internal/controller/upgradeplan_controller.go +++ b/internal/controller/upgradeplan_controller.go @@ -251,5 +251,6 @@ func (r *UpgradePlanReconciler) SetupWithManager(mgr ctrl.Manager) error { return false }, })). + Owns(&corev1.Secret{}). Complete(r) } From ac585c389485678aac57bb959edb7c69a10419ac Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:16:31 +0300 Subject: [PATCH 09/16] Add OS upgrade initial condition and condition verification --- internal/controller/upgradeplan_controller.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/controller/upgradeplan_controller.go b/internal/controller/upgradeplan_controller.go index c95be17..fd9233b 100644 --- a/internal/controller/upgradeplan_controller.go +++ b/internal/controller/upgradeplan_controller.go @@ -90,6 +90,7 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li } if len(upgradePlan.Status.Conditions) == 0 { + setPendingCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "OS upgrade is not yet started") setPendingCondition(upgradePlan, lifecyclev1alpha1.KubernetesUpgradedCondition, "Kubernetes upgrade is not yet started") setPendingCondition(upgradePlan, lifecyclev1alpha1.RancherUpgradedCondition, "Rancher upgrade is not yet started") @@ -97,6 +98,9 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li } switch { + // TODO: uncomment once OS upgrades support multi node clusters + // case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.OperatingSystemUpgradedCondition): + // return r.reconcileOS(ctx, upgradePlan, release) case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.KubernetesUpgradedCondition): return r.reconcileKubernetes(ctx, upgradePlan, &release.Components.Kubernetes) case !isHelmUpgradeFinished(upgradePlan, lifecyclev1alpha1.RancherUpgradedCondition): From f201d2ef7efd65cb0568ce53db861732cf4faf8c Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:17:24 +0300 Subject: [PATCH 10/16] Add 'prettyName' for release manifest OS --- manifests/release-3.0.1.yaml | 1 + pkg/release/release.go | 1 + 2 files changed, 2 insertions(+) diff --git a/manifests/release-3.0.1.yaml b/manifests/release-3.0.1.yaml index 056b799..8346fd0 100644 --- a/manifests/release-3.0.1.yaml +++ b/manifests/release-3.0.1.yaml @@ -15,6 +15,7 @@ components: zypperID: SL-Micro cpeScheme: cpe:/o:suse:sl-micro:6.0 repoGPGPath: /usr/lib/rpm/gnupg/keys/gpg-pubkey-09d9ea69-645b99ce.asc + prettyName: "SUSE Linux Micro 6.0" supportedArchs: - x86_64 # - aarch64 TODO: add when we start supporting it diff --git a/pkg/release/release.go b/pkg/release/release.go index 7a0b519..e269e3a 100644 --- a/pkg/release/release.go +++ b/pkg/release/release.go @@ -27,6 +27,7 @@ type OperatingSystem struct { CPEScheme string `yaml:"cpeScheme"` RepoGPGPath string `yaml:"repoGPGPath"` SupportedArchs []string `yaml:"supportedArchs"` + PrettyName string `yaml:"prettyName"` } type HelmChart struct { From 7e1e987246d73838dc4f9d3656ba87dea2d611ac Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:19:32 +0300 Subject: [PATCH 11/16] Move to a more specific set of parameters for plan creation --- internal/upgrade/os.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/internal/upgrade/os.go b/internal/upgrade/os.go index db2e207..aa06686 100644 --- a/internal/upgrade/os.go +++ b/internal/upgrade/os.go @@ -61,13 +61,12 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) return secret, nil } -func OSControlPlanePlan(release *release.Release) *upgradecattlev1.Plan { +func OSControlPlanePlan(releaseVersion string, releaseOS *release.OperatingSystem) *upgradecattlev1.Plan { const ( planImage = "registry.suse.com/bci/bci-base:15.5" ) - controlPlanePlanName := osPlanName(controlPlaneKey, release.Components.OperatingSystem.ZypperID, release.Components.OperatingSystem.Version) - + controlPlanePlanName := osPlanName(controlPlaneKey, releaseOS.ZypperID, releaseOS.Version) controlPlanePlan := baseUpgradePlan(controlPlanePlanName) controlPlanePlan.Labels = map[string]string{ "os-upgrade": "control-plane", @@ -114,7 +113,7 @@ func OSControlPlanePlan(release *release.Release) *upgradecattlev1.Plan { }, } controlPlanePlan.Spec.Cordon = true - controlPlanePlan.Spec.Version = release.ReleaseVersion + controlPlanePlan.Spec.Version = releaseVersion controlPlanePlan.Spec.Upgrade = &upgradecattlev1.ContainerSpec{ Image: planImage, From f82727be4b0e748292b6c6daa48ef3ec32fe5da8 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:20:54 +0300 Subject: [PATCH 12/16] Increase deadline seconds for Jobs related to control-plane plans --- internal/upgrade/os.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/upgrade/os.go b/internal/upgrade/os.go index aa06686..103a44a 100644 --- a/internal/upgrade/os.go +++ b/internal/upgrade/os.go @@ -115,6 +115,8 @@ func OSControlPlanePlan(releaseVersion string, releaseOS *release.OperatingSyste controlPlanePlan.Spec.Cordon = true controlPlanePlan.Spec.Version = releaseVersion + controlPlanePlan.Spec.JobActiveDeadlineSecs = 3600 + controlPlanePlan.Spec.Upgrade = &upgradecattlev1.ContainerSpec{ Image: planImage, Command: []string{"chroot", "/host"}, From 1e63caf952287ac72c79d252670bd5d481de675d Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:21:43 +0300 Subject: [PATCH 13/16] Add missing OS condition --- api/v1alpha1/upgradeplan_types.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/v1alpha1/upgradeplan_types.go b/api/v1alpha1/upgradeplan_types.go index 9202a4c..ff9acf3 100644 --- a/api/v1alpha1/upgradeplan_types.go +++ b/api/v1alpha1/upgradeplan_types.go @@ -21,6 +21,8 @@ import ( ) const ( + OperatingSystemUpgradedCondition = "OSUpgraded" + KubernetesUpgradedCondition = "KubernetesUpgraded" RancherUpgradedCondition = "RancherUpgraded" From fc288b41ed0facb01f1725470686e19abdd8ba61 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Thu, 25 Jul 2024 16:23:05 +0300 Subject: [PATCH 14/16] Update os-upgrade.sh to support error handling and suc specific use-cases --- internal/upgrade/templates/os-upgrade.sh.tpl | 126 ++++++++++++------- 1 file changed, 84 insertions(+), 42 deletions(-) diff --git a/internal/upgrade/templates/os-upgrade.sh.tpl b/internal/upgrade/templates/os-upgrade.sh.tpl index 14351da..f8f3214 100644 --- a/internal/upgrade/templates/os-upgrade.sh.tpl +++ b/internal/upgrade/templates/os-upgrade.sh.tpl @@ -1,44 +1,68 @@ #!/bin/sh -# Common Platform Enumeration (CPE) comming from the release manifest -RELEASE_CPE={{.CPEScheme}} -# Common Platform Enumeration (CPE) that the system is currently running with -CURRENT_CPE=`cat /etc/os-release | grep -w CPE_NAME | cut -d "=" -f 2 | tr -d '"'` - -# Determine whether architecture is supported -SYSTEM_ARCH=`arch` -IFS=' ' read -r -a SUPPORTED_ARCH_ARRAY <<< $(echo "{{.SupportedArchs}}" | tr -d '[]') - -found=false -for arch in "${SUPPORTED_ARCH_ARRAY[@]}"; do - if [ "${SYSTEM_ARCH}" == ${arch} ]; then - found=true - break - fi -done +OS_UPGRADED_PLACEHOLDER_PATH="/etc/os-upgrade-successful" -if [ ${found} == false ]; then - echo "Operating system is running an unsupported architecture. System arch: ${SYSTEM_ARCH}. Supported archs: ${SUPPORTED_ARCH_ARRAY[*]}" - exit 1 +if [ -f ${OS_UPGRADED_PLACEHOLDER_PATH} ]; then + # Due to the nature of how SUC handles OS upgrades, + # the OS upgrade pod will be restarted after an OS reboot. + # Whitin the new Pod we only need to check whether the upgrade + # has been done. This is done by checking for the '/run/os-upgrade-successful' + # file which will only be present on the system if a successful upgrade + # of the OS has taken place. + echo "Upgrade has already been done. Exiting.." + rm ${OS_UPGRADED_PLACEHOLDER_PATH} + exit 0 fi -# Determine whether this is a package update or a migration -if [ "${RELEASE_CPE}" == "${CURRENT_CPE}" ]; then - # Package update if both CPEs are the same - EXEC_START_PRE="" - EXEC_START="/usr/sbin/transactional-update cleanup up" - SERVICE_NAME="os-pkg-update.service" -else - # Migration if the CPEs are different - EXEC_START_PRE="/usr/sbin/transactional-update run rpm --import {{.RepoGPGKey}}" - EXEC_START="/usr/sbin/transactional-update --continue run zypper migration --non-interactive --product {{.ZypperID}}/{{.Version}}/${SYSTEM_ARCH} --root /" - SERVICE_NAME="os-migration.service" -fi +cleanupService(){ + rm ${1} + systemctl daemon-reload +} + +executeUpgrade(){ + # Common Platform Enumeration (CPE) comming from the release manifest + RELEASE_CPE={{.CPEScheme}} + # Common Platform Enumeration (CPE) that the system is currently running with + CURRENT_CPE=`cat /etc/os-release | grep -w CPE_NAME | cut -d "=" -f 2 | tr -d '"'` + + # Determine whether architecture is supported + SYSTEM_ARCH=`arch` + IFS=' ' read -r -a SUPPORTED_ARCH_ARRAY <<< $(echo "{{.SupportedArchs}}" | tr -d '[]') + + found=false + for arch in "${SUPPORTED_ARCH_ARRAY[@]}"; do + if [ "${SYSTEM_ARCH}" == ${arch} ]; then + found=true + break + fi + done + + if [ ${found} == false ]; then + echo "Operating system is running an unsupported architecture. System arch: ${SYSTEM_ARCH}. Supported archs: ${SUPPORTED_ARCH_ARRAY[*]}" + exit 1 + fi + + # Determine whether this is a package update or a migration + if [ "${RELEASE_CPE}" == "${CURRENT_CPE}" ]; then + # Package update if both CPEs are the same + EXEC_START_PRE="" + EXEC_START="/usr/sbin/transactional-update cleanup up" + SERVICE_NAME="os-pkg-update.service" + else + # Migration if the CPEs are different + EXEC_START_PRE="/usr/sbin/transactional-update cleanup run rpm --import {{.RepoGPGKey}}" + EXEC_START="/usr/sbin/transactional-update --continue run zypper migration --non-interactive --product {{.ZypperID}}/{{.Version}}/${SYSTEM_ARCH} --root /" + SERVICE_NAME="os-migration.service" + fi + + UPDATE_SERVICE_PATH=/etc/systemd/system/${SERVICE_NAME} -UPDATE_SERVICE_PATH=/etc/systemd/system/${SERVICE_NAME} + # Make sure that even after a non-zero exit of the script + # we will do a cleanup of the service + trap "cleanupService ${UPDATE_SERVICE_PATH}" EXIT -echo "Creating ${SERVICE_NAME}..." -cat < ${UPDATE_SERVICE_PATH} + echo "Creating ${SERVICE_NAME}..." + cat < ${UPDATE_SERVICE_PATH} [Unit] Description=SUSE Edge Upgrade Service ConditionACPower=true @@ -49,16 +73,34 @@ After=network.target Type=oneshot ExecStartPre=${EXEC_START_PRE} ExecStart=${EXEC_START} -ExecStartPost=-/bin/bash -c '[ -f /run/reboot-needed ] && shutdown -r +1' IOSchedulingClass=best-effort IOSchedulingPriority=7 EOF -echo "Starting ${SERVICE_NAME}..." -systemctl start ${SERVICE_NAME} & -tail --pid $! -f cat /var/log/transactional-update.log + echo "Starting ${SERVICE_NAME}..." + systemctl start ${SERVICE_NAME} & + + BACKGROUND_PROC_PID=$! + tail --pid ${BACKGROUND_PROC_PID} -f /var/log/transactional-update.log + + # Waits for the backgroud process with pid to finish and propagates its exit code to '$?' + wait ${BACKGROUND_PROC_PID} + + # Get exit code of backgroup process + BACKGROUND_PROC_EXIT=$? + if [ ${BACKGROUND_PROC_EXIT} -ne 0 ]; then + exit ${BACKGROUND_PROC_EXIT} + fi + + # Check if reboot is needed. + # Will only be needed when transactional-update has successfully + # done any package upgrades/updates. + if [ -f /run/reboot-needed ]; then + # Create a placeholder indicating that the os upgrade + # has finished succesfully + touch ${OS_UPGRADED_PLACEHOLDER_PATH} + /usr/sbin/reboot + fi +} -echo "Cleaning up..." -# Remove service after it has finished its work -rm ${UPDATE_SERVICE_PATH} -systemctl daemon-reload +executeUpgrade From c236ccfbc72bb99c6d9ff86897951c01120e1d6a Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Fri, 26 Jul 2024 10:31:36 +0300 Subject: [PATCH 15/16] Fix typos in upgrade script --- internal/upgrade/templates/os-upgrade.sh.tpl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/upgrade/templates/os-upgrade.sh.tpl b/internal/upgrade/templates/os-upgrade.sh.tpl index f8f3214..b82cf99 100644 --- a/internal/upgrade/templates/os-upgrade.sh.tpl +++ b/internal/upgrade/templates/os-upgrade.sh.tpl @@ -5,7 +5,7 @@ OS_UPGRADED_PLACEHOLDER_PATH="/etc/os-upgrade-successful" if [ -f ${OS_UPGRADED_PLACEHOLDER_PATH} ]; then # Due to the nature of how SUC handles OS upgrades, # the OS upgrade pod will be restarted after an OS reboot. - # Whitin the new Pod we only need to check whether the upgrade + # Within the new Pod we only need to check whether the upgrade # has been done. This is done by checking for the '/run/os-upgrade-successful' # file which will only be present on the system if a successful upgrade # of the OS has taken place. @@ -20,7 +20,7 @@ cleanupService(){ } executeUpgrade(){ - # Common Platform Enumeration (CPE) comming from the release manifest + # Common Platform Enumeration (CPE) coming from the release manifest RELEASE_CPE={{.CPEScheme}} # Common Platform Enumeration (CPE) that the system is currently running with CURRENT_CPE=`cat /etc/os-release | grep -w CPE_NAME | cut -d "=" -f 2 | tr -d '"'` @@ -83,7 +83,7 @@ EOF BACKGROUND_PROC_PID=$! tail --pid ${BACKGROUND_PROC_PID} -f /var/log/transactional-update.log - # Waits for the backgroud process with pid to finish and propagates its exit code to '$?' + # Waits for the background process with pid to finish and propagates its exit code to '$?' wait ${BACKGROUND_PROC_PID} # Get exit code of backgroup process From 58074e9906be21b81ea6397951dddc3333eb8ae8 Mon Sep 17 00:00:00 2001 From: Ivo Petrov Date: Fri, 26 Jul 2024 11:09:03 +0300 Subject: [PATCH 16/16] Remove secret name from global constants --- internal/controller/reconcile_os.go | 2 +- internal/upgrade/os.go | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/controller/reconcile_os.go b/internal/controller/reconcile_os.go index d0cfc15..245d054 100644 --- a/internal/controller/reconcile_os.go +++ b/internal/controller/reconcile_os.go @@ -30,7 +30,7 @@ func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, upgradePlan *li return ctrl.Result{}, r.createSecret(ctx, upgradePlan, secret) } - controlPlanePlan := upgrade.OSControlPlanePlan(release.ReleaseVersion, &release.Components.OperatingSystem) + controlPlanePlan := upgrade.OSControlPlanePlan(release.ReleaseVersion, secret.Name, &release.Components.OperatingSystem) if err = r.Get(ctx, client.ObjectKeyFromObject(controlPlanePlan), controlPlanePlan); err != nil { if !errors.IsNotFound(err) { return ctrl.Result{}, err diff --git a/internal/upgrade/os.go b/internal/upgrade/os.go index 103a44a..d3e3522 100644 --- a/internal/upgrade/os.go +++ b/internal/upgrade/os.go @@ -16,13 +16,16 @@ import ( const ( scriptName = "os-upgrade.sh" - secretName = "os-upgrade-secret" ) //go:embed templates/os-upgrade.sh.tpl var osUpgradeScript string func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) { + const ( + secretName = "os-upgrade-secret" + ) + tmpl, err := template.New(scriptName).Parse(osUpgradeScript) if err != nil { return nil, fmt.Errorf("parsing contents: %w", err) @@ -61,7 +64,7 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) return secret, nil } -func OSControlPlanePlan(releaseVersion string, releaseOS *release.OperatingSystem) *upgradecattlev1.Plan { +func OSControlPlanePlan(releaseVersion, secretName string, releaseOS *release.OperatingSystem) *upgradecattlev1.Plan { const ( planImage = "registry.suse.com/bci/bci-base:15.5" )