Skip to content

Commit

Permalink
OS upgrade implementation for single clusters (#20)
Browse files Browse the repository at this point in the history
* Add watch for secrets

* Move control-plane/worker keys to base

* Introduce OS control-plane plan object setup

* Add basic control-plane Plan deployment logic

* Add OS upgrade validation for single clusters

* Generalise create and record event logic for plans and secrets

* Make sure initial plan creation does not trigger reconciliation

* Keep track of secrets related to the UpgradePlan

* Add OS upgrade initial condition and condition verification

* Add 'prettyName' for release manifest OS

* Move to a more specific set of parameters for plan creation

* Increase deadline seconds for Jobs related to control-plane plans

* Add missing OS condition

* Update os-upgrade.sh to support error handling and suc specific use-cases

* Fix typos in upgrade script

* Remove secret name from global constants
  • Loading branch information
ipetrov117 committed Jul 26, 2024
1 parent 406fc86 commit 88fbcd2
Show file tree
Hide file tree
Showing 10 changed files with 269 additions and 63 deletions.
2 changes: 2 additions & 0 deletions api/v1alpha1/upgradeplan_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
)

const (
OperatingSystemUpgradedCondition = "OSUpgraded"

KubernetesUpgradedCondition = "KubernetesUpgraded"
RancherUpgradedCondition = "RancherUpgraded"

Expand Down
1 change: 1 addition & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ rules:
- delete
- get
- list
- watch
- apiGroups:
- batch
resources:
Expand Down
72 changes: 67 additions & 5 deletions internal/controller/reconcile_os.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,85 @@ import (
"context"
"fmt"

lifecyclev1alpha1 "github.com/suse-edge/upgrade-controller/api/v1alpha1"
"github.com/suse-edge/upgrade-controller/internal/upgrade"
"github.com/suse-edge/upgrade-controller/pkg/release"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

//lint:ignore U1000 - Temporary ignore "unused" linter error. Will be removed when function is ready to be used.
func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, releaseOS *release.OperatingSystem) (ctrl.Result, error) {
secret, err := upgrade.OSUpgradeSecret(releaseOS)
func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, release *release.Release) (ctrl.Result, error) {
secret, err := upgrade.OSUpgradeSecret(&release.Components.OperatingSystem)
if err != nil {
return ctrl.Result{}, fmt.Errorf("generating OS upgrade secret: %w", err)
}

if err = r.Create(ctx, secret); err != nil {
return ctrl.Result{}, fmt.Errorf("creating OS upgrade secret: %w", err)
if err = r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil {
if !errors.IsNotFound(err) {
return ctrl.Result{}, err
}

return ctrl.Result{}, r.createSecret(ctx, upgradePlan, secret)
}

// TODO: OS upgrade logic
controlPlanePlan := upgrade.OSControlPlanePlan(release.ReleaseVersion, secret.Name, &release.Components.OperatingSystem)
if err = r.Get(ctx, client.ObjectKeyFromObject(controlPlanePlan), controlPlanePlan); err != nil {
if !errors.IsNotFound(err) {
return ctrl.Result{}, err
}

setInProgressCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "Control plane nodes are being upgraded")
return ctrl.Result{}, r.createPlan(ctx, upgradePlan, controlPlanePlan)
}

selector, err := metav1.LabelSelectorAsSelector(controlPlanePlan.Spec.NodeSelector)
if err != nil {
return ctrl.Result{}, fmt.Errorf("parsing node selector: %w", err)
}

nodeList := &corev1.NodeList{}
if err := r.List(ctx, nodeList); err != nil {
return ctrl.Result{}, fmt.Errorf("listing nodes: %w", err)
}

if !isOSUpgraded(nodeList, selector, release.Components.OperatingSystem.PrettyName) {
return ctrl.Result{}, nil
} else if controlPlaneOnlyCluster(nodeList) {
setSuccessfulCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "All cluster nodes are upgraded")
return ctrl.Result{Requeue: true}, nil
}

// TODO: worker upgrade
return ctrl.Result{Requeue: true}, nil
}

func isOSUpgraded(nodeList *corev1.NodeList, selector labels.Selector, osPrettyName string) bool {
for _, node := range nodeList.Items {
if !selector.Matches(labels.Set(node.Labels)) {
continue
}

var nodeReadyStatus corev1.ConditionStatus

for _, condition := range node.Status.Conditions {
if condition.Type == corev1.NodeReady {
nodeReadyStatus = condition.Status
break
}
}

if nodeReadyStatus != corev1.ConditionTrue || node.Spec.Unschedulable || node.Status.NodeInfo.OSImage != osPrettyName {
// Upgrade is still in progress.
// TODO: Adjust to looking at the `Complete` condition of the
// `plans.upgrade.cattle.io` resources once system-upgrade-controller v0.13.4 is released.
return false
}
}

return true
}
40 changes: 32 additions & 8 deletions internal/controller/upgradeplan_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ type UpgradePlanReconciler struct {
// +kubebuilder:rbac:groups=lifecycle.suse.com,resources=upgradeplans/finalizers,verbs=update
// +kubebuilder:rbac:groups=upgrade.cattle.io,resources=plans,verbs=create;list;get;watch
// +kubebuilder:rbac:groups="",resources=nodes,verbs=watch;list
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;delete;create
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;delete;create;watch
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch
// +kubebuilder:rbac:groups=batch,resources=jobs/status,verbs=get
Expand Down Expand Up @@ -90,13 +90,17 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li
}

if len(upgradePlan.Status.Conditions) == 0 {
setPendingCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "OS upgrade is not yet started")
setPendingCondition(upgradePlan, lifecyclev1alpha1.KubernetesUpgradedCondition, "Kubernetes upgrade is not yet started")
setPendingCondition(upgradePlan, lifecyclev1alpha1.RancherUpgradedCondition, "Rancher upgrade is not yet started")

return ctrl.Result{Requeue: true}, nil
}

switch {
// TODO: uncomment once OS upgrades support multi node clusters
// case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.OperatingSystemUpgradedCondition):
// return r.reconcileOS(ctx, upgradePlan, release)
case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.KubernetesUpgradedCondition):
return r.reconcileKubernetes(ctx, upgradePlan, &release.Components.Kubernetes)
case !isHelmUpgradeFinished(upgradePlan, lifecyclev1alpha1.RancherUpgradedCondition):
Expand All @@ -109,24 +113,40 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li
return ctrl.Result{}, nil
}

func (r *UpgradePlanReconciler) recordCreatedPlan(upgradePlan *lifecyclev1alpha1.UpgradePlan, name, namespace string) {
r.Recorder.Eventf(upgradePlan, corev1.EventTypeNormal, "PlanCreated", "Upgrade plan created: %s/%s", namespace, name)
func (r *UpgradePlanReconciler) createSecret(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, secret *corev1.Secret) error {
if err := r.createObject(ctx, upgradePlan, secret); err != nil {
return fmt.Errorf("creating secret: %w", err)
}

r.recordCreatedObject(upgradePlan, "SecretCreated", fmt.Sprintf("Secret created: %s/%s", secret.Namespace, secret.Name))
return nil
}

func (r *UpgradePlanReconciler) createPlan(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, plan *upgradecattlev1.Plan) error {
if err := ctrl.SetControllerReference(upgradePlan, plan, r.Scheme); err != nil {
return fmt.Errorf("setting controller reference: %w", err)
if err := r.createObject(ctx, upgradePlan, plan); err != nil {
return fmt.Errorf("creating upgrade plan: %w", err)
}

if err := r.Create(ctx, plan); err != nil {
return fmt.Errorf("creating upgrade plan: %w", err)
r.recordCreatedObject(upgradePlan, "PlanCreated", fmt.Sprintf("Upgrade plan created: %s/%s", plan.Namespace, plan.Name))
return nil
}

func (r *UpgradePlanReconciler) createObject(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, obj client.Object) error {
if err := ctrl.SetControllerReference(upgradePlan, obj, r.Scheme); err != nil {
return fmt.Errorf("setting controller reference: %w", err)
}

r.recordCreatedPlan(upgradePlan, plan.Name, plan.Namespace)
if err := r.Create(ctx, obj); err != nil {
return fmt.Errorf("creating object: %w", err)
}

return nil
}

func (r *UpgradePlanReconciler) recordCreatedObject(upgradePlan *lifecyclev1alpha1.UpgradePlan, reason, msg string) {
r.Recorder.Eventf(upgradePlan, corev1.EventTypeNormal, reason, msg)
}

func isHelmUpgradeFinished(plan *lifecyclev1alpha1.UpgradePlan, conditionType string) bool {
condition := meta.FindStatusCondition(plan.Status.Conditions, conditionType)

Expand Down Expand Up @@ -201,6 +221,9 @@ func (r *UpgradePlanReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&lifecyclev1alpha1.UpgradePlan{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})).
Owns(&upgradecattlev1.Plan{}, builder.WithPredicates(predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
return false
},
UpdateFunc: func(e event.UpdateEvent) bool {
// Upgrade plans are being constantly updated on every node change.
// Ensure that the reconciliation only covers the scenarios
Expand Down Expand Up @@ -232,5 +255,6 @@ func (r *UpgradePlanReconciler) SetupWithManager(mgr ctrl.Manager) error {
return false
},
})).
Owns(&corev1.Secret{}).
Complete(r)
}
6 changes: 4 additions & 2 deletions internal/upgrade/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (
)

const (
planNamespace = "cattle-system"
PlanAnnotation = "lifecycle.suse.com/upgrade-plan"
planNamespace = "cattle-system"
PlanAnnotation = "lifecycle.suse.com/upgrade-plan"
controlPlaneKey = "control-plane"
workersKey = "workers"

ControlPlaneLabel = "node-role.kubernetes.io/control-plane"
)
Expand Down
3 changes: 0 additions & 3 deletions internal/upgrade/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ import (
const (
rke2UpgradeImage = "rancher/rke2-upgrade"
k3sUpgradeImage = "rancher/k3s-upgrade"

controlPlaneKey = "control-plane"
workersKey = "workers"
)

func kubernetesPlanName(typeKey, version string) string {
Expand Down
80 changes: 77 additions & 3 deletions internal/upgrade/os.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ import (
"bytes"
_ "embed"
"fmt"
"path/filepath"
"strings"
"text/template"

upgradecattlev1 "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io/v1"
"github.com/suse-edge/upgrade-controller/pkg/release"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
scriptName = "os-upgrade.sh"
)

//go:embed templates/os-upgrade.sh.tpl
var osUpgradeScript string

func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) {
const (
scriptName = "os-upgrade.sh"
secretName = "os-upgrade-secret"
)

Expand Down Expand Up @@ -45,7 +51,7 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error)
}

secret := &corev1.Secret{
ObjectMeta: v1.ObjectMeta{
ObjectMeta: metav1.ObjectMeta{
Name: secretName,
Namespace: planNamespace,
},
Expand All @@ -57,3 +63,71 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error)

return secret, nil
}

func OSControlPlanePlan(releaseVersion, secretName string, releaseOS *release.OperatingSystem) *upgradecattlev1.Plan {
const (
planImage = "registry.suse.com/bci/bci-base:15.5"
)

controlPlanePlanName := osPlanName(controlPlaneKey, releaseOS.ZypperID, releaseOS.Version)
controlPlanePlan := baseUpgradePlan(controlPlanePlanName)
controlPlanePlan.Labels = map[string]string{
"os-upgrade": "control-plane",
}
controlPlanePlan.Spec.Concurrency = 1
controlPlanePlan.Spec.NodeSelector = &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: ControlPlaneLabel,
Operator: "In",
Values: []string{
"true",
},
},
},
}
controlPlanePlan.Spec.Tolerations = []corev1.Toleration{
{
Key: "CriticalAddonsOnly",
Operator: "Equal",
Value: "true",
Effect: "NoExecute",
},
{
Key: ControlPlaneLabel,
Operator: "Equal",
Value: "",
Effect: "NoSchedule",
},
{
Key: "node-role.kubernetes.io/etcd",
Operator: "Equal",
Value: "",
Effect: "NoExecute",
},
}

secretPathRelativeToHost := fmt.Sprintf("/run/system-upgrade/secrets/%s", secretName)
mountPath := filepath.Join("/host", secretPathRelativeToHost)
controlPlanePlan.Spec.Secrets = []upgradecattlev1.SecretSpec{
{
Name: secretName,
Path: mountPath,
},
}
controlPlanePlan.Spec.Cordon = true
controlPlanePlan.Spec.Version = releaseVersion

controlPlanePlan.Spec.JobActiveDeadlineSecs = 3600

controlPlanePlan.Spec.Upgrade = &upgradecattlev1.ContainerSpec{
Image: planImage,
Command: []string{"chroot", "/host"},
Args: []string{"sh", filepath.Join(secretPathRelativeToHost, scriptName)},
}
return controlPlanePlan
}

func osPlanName(typeKey, osName, osVersion string) string {
return fmt.Sprintf("%s-%s-%s", typeKey, strings.ToLower(osName), strings.ReplaceAll(osVersion, ".", "-"))
}
Loading

0 comments on commit 88fbcd2

Please sign in to comment.