Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OS upgrade implementation for single clusters #20

Merged
merged 16 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/v1alpha1/upgradeplan_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
)

const (
OperatingSystemUpgradedCondition = "OSUpgraded"

KubernetesUpgradedCondition = "KubernetesUpgraded"
RancherUpgradedCondition = "RancherUpgraded"

Expand Down
1 change: 1 addition & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ rules:
- delete
- get
- list
- watch
- apiGroups:
- batch
resources:
Expand Down
72 changes: 67 additions & 5 deletions internal/controller/reconcile_os.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,85 @@ import (
"context"
"fmt"

lifecyclev1alpha1 "github.com/suse-edge/upgrade-controller/api/v1alpha1"
"github.com/suse-edge/upgrade-controller/internal/upgrade"
"github.com/suse-edge/upgrade-controller/pkg/release"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

//lint:ignore U1000 - Temporary ignore "unused" linter error. Will be removed when function is ready to be used.
func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, releaseOS *release.OperatingSystem) (ctrl.Result, error) {
secret, err := upgrade.OSUpgradeSecret(releaseOS)
func (r *UpgradePlanReconciler) reconcileOS(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, release *release.Release) (ctrl.Result, error) {
secret, err := upgrade.OSUpgradeSecret(&release.Components.OperatingSystem)
if err != nil {
return ctrl.Result{}, fmt.Errorf("generating OS upgrade secret: %w", err)
}

if err = r.Create(ctx, secret); err != nil {
return ctrl.Result{}, fmt.Errorf("creating OS upgrade secret: %w", err)
if err = r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil {
if !errors.IsNotFound(err) {
return ctrl.Result{}, err
}

return ctrl.Result{}, r.createSecret(ctx, upgradePlan, secret)
}

// TODO: OS upgrade logic
controlPlanePlan := upgrade.OSControlPlanePlan(release.ReleaseVersion, secret.Name, &release.Components.OperatingSystem)
if err = r.Get(ctx, client.ObjectKeyFromObject(controlPlanePlan), controlPlanePlan); err != nil {
if !errors.IsNotFound(err) {
return ctrl.Result{}, err
}

setInProgressCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "Control plane nodes are being upgraded")
return ctrl.Result{}, r.createPlan(ctx, upgradePlan, controlPlanePlan)
}

selector, err := metav1.LabelSelectorAsSelector(controlPlanePlan.Spec.NodeSelector)
if err != nil {
return ctrl.Result{}, fmt.Errorf("parsing node selector: %w", err)
}

nodeList := &corev1.NodeList{}
if err := r.List(ctx, nodeList); err != nil {
return ctrl.Result{}, fmt.Errorf("listing nodes: %w", err)
}

if !isOSUpgraded(nodeList, selector, release.Components.OperatingSystem.PrettyName) {
return ctrl.Result{}, nil
} else if controlPlaneOnlyCluster(nodeList) {
setSuccessfulCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "All cluster nodes are upgraded")
return ctrl.Result{Requeue: true}, nil
}

// TODO: worker upgrade
return ctrl.Result{Requeue: true}, nil
}

func isOSUpgraded(nodeList *corev1.NodeList, selector labels.Selector, osPrettyName string) bool {
for _, node := range nodeList.Items {
if !selector.Matches(labels.Set(node.Labels)) {
continue
}

var nodeReadyStatus corev1.ConditionStatus

for _, condition := range node.Status.Conditions {
if condition.Type == corev1.NodeReady {
nodeReadyStatus = condition.Status
break
}
}

if nodeReadyStatus != corev1.ConditionTrue || node.Spec.Unschedulable || node.Status.NodeInfo.OSImage != osPrettyName {
// Upgrade is still in progress.
// TODO: Adjust to looking at the `Complete` condition of the
// `plans.upgrade.cattle.io` resources once system-upgrade-controller v0.13.4 is released.
return false
}
}

return true
}
40 changes: 32 additions & 8 deletions internal/controller/upgradeplan_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ type UpgradePlanReconciler struct {
// +kubebuilder:rbac:groups=lifecycle.suse.com,resources=upgradeplans/finalizers,verbs=update
// +kubebuilder:rbac:groups=upgrade.cattle.io,resources=plans,verbs=create;list;get;watch
// +kubebuilder:rbac:groups="",resources=nodes,verbs=watch;list
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;delete;create
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;delete;create;watch
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch
// +kubebuilder:rbac:groups=batch,resources=jobs/status,verbs=get
Expand Down Expand Up @@ -90,13 +90,17 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li
}

if len(upgradePlan.Status.Conditions) == 0 {
setPendingCondition(upgradePlan, lifecyclev1alpha1.OperatingSystemUpgradedCondition, "OS upgrade is not yet started")
setPendingCondition(upgradePlan, lifecyclev1alpha1.KubernetesUpgradedCondition, "Kubernetes upgrade is not yet started")
setPendingCondition(upgradePlan, lifecyclev1alpha1.RancherUpgradedCondition, "Rancher upgrade is not yet started")

return ctrl.Result{Requeue: true}, nil
}

switch {
// TODO: uncomment once OS upgrades support multi node clusters
// case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.OperatingSystemUpgradedCondition):
// return r.reconcileOS(ctx, upgradePlan, release)
case !meta.IsStatusConditionTrue(upgradePlan.Status.Conditions, lifecyclev1alpha1.KubernetesUpgradedCondition):
return r.reconcileKubernetes(ctx, upgradePlan, &release.Components.Kubernetes)
case !isHelmUpgradeFinished(upgradePlan, lifecyclev1alpha1.RancherUpgradedCondition):
Expand All @@ -109,24 +113,40 @@ func (r *UpgradePlanReconciler) executePlan(ctx context.Context, upgradePlan *li
return ctrl.Result{}, nil
}

func (r *UpgradePlanReconciler) recordCreatedPlan(upgradePlan *lifecyclev1alpha1.UpgradePlan, name, namespace string) {
r.Recorder.Eventf(upgradePlan, corev1.EventTypeNormal, "PlanCreated", "Upgrade plan created: %s/%s", namespace, name)
func (r *UpgradePlanReconciler) createSecret(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, secret *corev1.Secret) error {
if err := r.createObject(ctx, upgradePlan, secret); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach is fine and will work but I might look into simplifying it when I have some time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, I did it like this mainly because it was the least invasive method of introducing this in the existing code.

return fmt.Errorf("creating secret: %w", err)
}

r.recordCreatedObject(upgradePlan, "SecretCreated", fmt.Sprintf("Secret created: %s/%s", secret.Namespace, secret.Name))
return nil
}

func (r *UpgradePlanReconciler) createPlan(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, plan *upgradecattlev1.Plan) error {
if err := ctrl.SetControllerReference(upgradePlan, plan, r.Scheme); err != nil {
return fmt.Errorf("setting controller reference: %w", err)
if err := r.createObject(ctx, upgradePlan, plan); err != nil {
return fmt.Errorf("creating upgrade plan: %w", err)
}

if err := r.Create(ctx, plan); err != nil {
return fmt.Errorf("creating upgrade plan: %w", err)
r.recordCreatedObject(upgradePlan, "PlanCreated", fmt.Sprintf("Upgrade plan created: %s/%s", plan.Namespace, plan.Name))
return nil
}

func (r *UpgradePlanReconciler) createObject(ctx context.Context, upgradePlan *lifecyclev1alpha1.UpgradePlan, obj client.Object) error {
if err := ctrl.SetControllerReference(upgradePlan, obj, r.Scheme); err != nil {
return fmt.Errorf("setting controller reference: %w", err)
}

r.recordCreatedPlan(upgradePlan, plan.Name, plan.Namespace)
if err := r.Create(ctx, obj); err != nil {
return fmt.Errorf("creating object: %w", err)
}

return nil
}

func (r *UpgradePlanReconciler) recordCreatedObject(upgradePlan *lifecyclev1alpha1.UpgradePlan, reason, msg string) {
r.Recorder.Eventf(upgradePlan, corev1.EventTypeNormal, reason, msg)
}

func isHelmUpgradeFinished(plan *lifecyclev1alpha1.UpgradePlan, conditionType string) bool {
condition := meta.FindStatusCondition(plan.Status.Conditions, conditionType)

Expand Down Expand Up @@ -201,6 +221,9 @@ func (r *UpgradePlanReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&lifecyclev1alpha1.UpgradePlan{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})).
Owns(&upgradecattlev1.Plan{}, builder.WithPredicates(predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
return false
},
UpdateFunc: func(e event.UpdateEvent) bool {
// Upgrade plans are being constantly updated on every node change.
// Ensure that the reconciliation only covers the scenarios
Expand Down Expand Up @@ -232,5 +255,6 @@ func (r *UpgradePlanReconciler) SetupWithManager(mgr ctrl.Manager) error {
return false
},
})).
Owns(&corev1.Secret{}).
Complete(r)
}
6 changes: 4 additions & 2 deletions internal/upgrade/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (
)

const (
planNamespace = "cattle-system"
PlanAnnotation = "lifecycle.suse.com/upgrade-plan"
planNamespace = "cattle-system"
PlanAnnotation = "lifecycle.suse.com/upgrade-plan"
controlPlaneKey = "control-plane"
workersKey = "workers"

ControlPlaneLabel = "node-role.kubernetes.io/control-plane"
)
Expand Down
3 changes: 0 additions & 3 deletions internal/upgrade/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ import (
const (
rke2UpgradeImage = "rancher/rke2-upgrade"
k3sUpgradeImage = "rancher/k3s-upgrade"

controlPlaneKey = "control-plane"
workersKey = "workers"
)

func kubernetesPlanName(typeKey, version string) string {
Expand Down
80 changes: 77 additions & 3 deletions internal/upgrade/os.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ import (
"bytes"
_ "embed"
"fmt"
"path/filepath"
"strings"
"text/template"

upgradecattlev1 "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io/v1"
"github.com/suse-edge/upgrade-controller/pkg/release"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
scriptName = "os-upgrade.sh"
)

//go:embed templates/os-upgrade.sh.tpl
var osUpgradeScript string

func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error) {
const (
scriptName = "os-upgrade.sh"
secretName = "os-upgrade-secret"
)

Expand Down Expand Up @@ -45,7 +51,7 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error)
}

secret := &corev1.Secret{
ObjectMeta: v1.ObjectMeta{
ObjectMeta: metav1.ObjectMeta{
Name: secretName,
Namespace: planNamespace,
},
Expand All @@ -57,3 +63,71 @@ func OSUpgradeSecret(releaseOS *release.OperatingSystem) (*corev1.Secret, error)

return secret, nil
}

func OSControlPlanePlan(releaseVersion, secretName string, releaseOS *release.OperatingSystem) *upgradecattlev1.Plan {
const (
planImage = "registry.suse.com/bci/bci-base:15.5"
)

controlPlanePlanName := osPlanName(controlPlaneKey, releaseOS.ZypperID, releaseOS.Version)
controlPlanePlan := baseUpgradePlan(controlPlanePlanName)
controlPlanePlan.Labels = map[string]string{
"os-upgrade": "control-plane",
}
controlPlanePlan.Spec.Concurrency = 1
controlPlanePlan.Spec.NodeSelector = &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: ControlPlaneLabel,
Operator: "In",
Values: []string{
"true",
},
},
},
}
controlPlanePlan.Spec.Tolerations = []corev1.Toleration{
{
Key: "CriticalAddonsOnly",
Operator: "Equal",
Value: "true",
Effect: "NoExecute",
},
{
Key: ControlPlaneLabel,
Operator: "Equal",
Value: "",
Effect: "NoSchedule",
},
{
Key: "node-role.kubernetes.io/etcd",
Operator: "Equal",
Value: "",
Effect: "NoExecute",
},
}

secretPathRelativeToHost := fmt.Sprintf("/run/system-upgrade/secrets/%s", secretName)
mountPath := filepath.Join("/host", secretPathRelativeToHost)
controlPlanePlan.Spec.Secrets = []upgradecattlev1.SecretSpec{
{
Name: secretName,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know it is tempting to just use the constants here but wouldn't it be better if the secret name and script are passed to the function from the outside? Both can be extracted from the secret that we query.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the secretName out of the global constants and added it as a parameter to the plan creation function. I cannot do the same for the scriptName mainly because it is a key of the StringData map of the secret and I do not have a good way of extracting the key name from the secret.StringData map. So I decided to leave the scriptName as a global constant.

Path: mountPath,
},
}
controlPlanePlan.Spec.Cordon = true
controlPlanePlan.Spec.Version = releaseVersion

controlPlanePlan.Spec.JobActiveDeadlineSecs = 3600

controlPlanePlan.Spec.Upgrade = &upgradecattlev1.ContainerSpec{
Image: planImage,
Command: []string{"chroot", "/host"},
Args: []string{"sh", filepath.Join(secretPathRelativeToHost, scriptName)},
}
return controlPlanePlan
}

func osPlanName(typeKey, osName, osVersion string) string {
return fmt.Sprintf("%s-%s-%s", typeKey, strings.ToLower(osName), strings.ReplaceAll(osVersion, ".", "-"))
}
Loading