From 11394625d288542782cd23fee50a3d9ff6957ebe Mon Sep 17 00:00:00 2001 From: Tomas Nozicka Date: Thu, 20 Jun 2024 15:47:29 +0200 Subject: [PATCH] Set up dedicates service account for perftune jobs --- .../templates/clusterrole_def.yaml | 1 + pkg/cmd/operator/operator.go | 2 + pkg/controller/nodeconfig/controller.go | 66 ++++++++++++++++ pkg/controller/nodeconfig/resource.go | 49 ++++++++++++ pkg/controller/nodeconfig/sync.go | 52 ++++++++++++ .../nodeconfig/sync_rolebindings.go | 79 +++++++++++++++++++ pkg/controller/nodeconfig/sync_roles.go | 77 ++++++++++++++++++ .../nodeconfig/sync_serviceaccounts.go | 1 + pkg/controller/nodetune/resource.go | 22 +++--- pkg/naming/constants.go | 3 +- 10 files changed, 341 insertions(+), 11 deletions(-) create mode 100644 pkg/controller/nodeconfig/sync_rolebindings.go create mode 100644 pkg/controller/nodeconfig/sync_roles.go diff --git a/helm/scylla-operator/templates/clusterrole_def.yaml b/helm/scylla-operator/templates/clusterrole_def.yaml index d4a0c2c0ed9..a819a2f397a 100644 --- a/helm/scylla-operator/templates/clusterrole_def.yaml +++ b/helm/scylla-operator/templates/clusterrole_def.yaml @@ -190,6 +190,7 @@ rules: resources: - clusterroles - clusterrolebindings + - roles - rolebindings verbs: - create diff --git a/pkg/cmd/operator/operator.go b/pkg/cmd/operator/operator.go index f991db4b9d7..ecdc45675d3 100644 --- a/pkg/cmd/operator/operator.go +++ b/pkg/cmd/operator/operator.go @@ -294,6 +294,8 @@ func (o *OperatorOptions) run(ctx context.Context, streams genericclioptions.IOS scyllaInformers.Scylla().V1alpha1().ScyllaOperatorConfigs(), kubeInformers.Rbac().V1().ClusterRoles(), kubeInformers.Rbac().V1().ClusterRoleBindings(), + kubeInformers.Rbac().V1().Roles(), + kubeInformers.Rbac().V1().RoleBindings(), kubeInformers.Apps().V1().DaemonSets(), kubeInformers.Core().V1().Namespaces(), kubeInformers.Core().V1().Nodes(), diff --git a/pkg/controller/nodeconfig/controller.go b/pkg/controller/nodeconfig/controller.go index 0ced0b8b7eb..c44b8e45385 100644 --- a/pkg/controller/nodeconfig/controller.go +++ b/pkg/controller/nodeconfig/controller.go @@ -58,6 +58,8 @@ type Controller struct { scyllaOperatorConfigLister scyllav1alpha1listers.ScyllaOperatorConfigLister clusterRoleLister rbacv1listers.ClusterRoleLister clusterRoleBindingLister rbacv1listers.ClusterRoleBindingLister + roleLister rbacv1listers.RoleLister + roleBindingLister rbacv1listers.RoleBindingLister daemonSetLister appsv1listers.DaemonSetLister namespaceLister corev1listers.NamespaceLister nodeLister corev1listers.NodeLister @@ -80,6 +82,8 @@ func NewController( scyllaOperatorConfigInformer scyllav1alpha1informers.ScyllaOperatorConfigInformer, clusterRoleInformer rbacv1informers.ClusterRoleInformer, clusterRoleBindingInformer rbacv1informers.ClusterRoleBindingInformer, + roleInformer rbacv1informers.RoleInformer, + roleBindingInformer rbacv1informers.RoleBindingInformer, daemonSetInformer appsv1informers.DaemonSetInformer, namespaceInformer corev1informers.NamespaceInformer, nodeInformer corev1informers.NodeInformer, @@ -98,6 +102,8 @@ func NewController( scyllaOperatorConfigLister: scyllaOperatorConfigInformer.Lister(), clusterRoleLister: clusterRoleInformer.Lister(), clusterRoleBindingLister: clusterRoleBindingInformer.Lister(), + roleLister: roleInformer.Lister(), + roleBindingLister: roleBindingInformer.Lister(), daemonSetLister: daemonSetInformer.Lister(), namespaceLister: namespaceInformer.Lister(), nodeLister: nodeInformer.Lister(), @@ -108,6 +114,8 @@ func NewController( scyllaOperatorConfigInformer.Informer().HasSynced, clusterRoleInformer.Informer().HasSynced, clusterRoleBindingInformer.Informer().HasSynced, + roleInformer.Informer().HasSynced, + roleBindingInformer.Informer().HasSynced, daemonSetInformer.Informer().HasSynced, namespaceInformer.Informer().HasSynced, nodeInformer.Informer().HasSynced, @@ -170,6 +178,18 @@ func NewController( DeleteFunc: ncc.deleteClusterRoleBinding, }) + roleInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: ncc.addRole, + UpdateFunc: ncc.updateRole, + DeleteFunc: ncc.deleteRole, + }) + + roleBindingInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: ncc.addRoleBinding, + UpdateFunc: ncc.updateRoleBinding, + DeleteFunc: ncc.deleteRoleBinding, + }) + serviceAccountInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: ncc.addServiceAccount, UpdateFunc: ncc.updateServiceAccount, @@ -277,6 +297,52 @@ func (ncc *Controller) deleteClusterRole(obj interface{}) { ) } +func (ncc *Controller) addRoleBinding(obj interface{}) { + ncc.handlers.HandleAdd( + obj.(*rbacv1.RoleBinding), + ncc.handlers.EnqueueOwner, + ) +} + +func (ncc *Controller) updateRoleBinding(old, cur interface{}) { + ncc.handlers.HandleUpdate( + old.(*rbacv1.RoleBinding), + cur.(*rbacv1.RoleBinding), + ncc.handlers.EnqueueOwner, + ncc.deleteRoleBinding, + ) +} + +func (ncc *Controller) deleteRoleBinding(obj interface{}) { + ncc.handlers.HandleDelete( + obj, + ncc.handlers.EnqueueOwner, + ) +} + +func (ncc *Controller) addRole(obj interface{}) { + ncc.handlers.HandleAdd( + obj.(*rbacv1.Role), + ncc.handlers.EnqueueOwner, + ) +} + +func (ncc *Controller) updateRole(old, cur interface{}) { + ncc.handlers.HandleUpdate( + old.(*rbacv1.Role), + cur.(*rbacv1.Role), + ncc.handlers.EnqueueOwner, + ncc.deleteRole, + ) +} + +func (ncc *Controller) deleteRole(obj interface{}) { + ncc.handlers.HandleDelete( + obj, + ncc.handlers.EnqueueOwner, + ) +} + func (ncc *Controller) addNodeConfig(obj interface{}) { ncc.handlers.HandleAdd( obj.(*scyllav1alpha1.NodeConfig), diff --git a/pkg/controller/nodeconfig/resource.go b/pkg/controller/nodeconfig/resource.go index 7d45da00b6f..4d0f9d7175e 100644 --- a/pkg/controller/nodeconfig/resource.go +++ b/pkg/controller/nodeconfig/resource.go @@ -38,6 +38,18 @@ func makeNodeConfigServiceAccount() *corev1.ServiceAccount { } } +func makePerftuneServiceAccount() *corev1.ServiceAccount { + return &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: naming.ScyllaOperatorNodeTuningNamespace, + Name: naming.PerftuneServiceAccountName, + Labels: map[string]string{ + naming.NodeConfigNameLabel: naming.NodeConfigAppName, + }, + }, + } +} + func NodeConfigClusterRole() *rbacv1.ClusterRole { return &rbacv1.ClusterRole{ ObjectMeta: metav1.ObjectMeta{ @@ -92,6 +104,19 @@ func NodeConfigClusterRole() *rbacv1.ClusterRole { } } +func makePerftuneRole() *rbacv1.Role { + return &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: naming.ScyllaOperatorNodeTuningNamespace, + Name: naming.PerftuneServiceAccountName, + Labels: map[string]string{ + naming.NodeConfigNameLabel: naming.NodeConfigAppName, + }, + }, + Rules: []rbacv1.PolicyRule{}, + } +} + func makeNodeConfigClusterRoleBinding() *rbacv1.ClusterRoleBinding { return &rbacv1.ClusterRoleBinding{ ObjectMeta: metav1.ObjectMeta{ @@ -116,6 +141,30 @@ func makeNodeConfigClusterRoleBinding() *rbacv1.ClusterRoleBinding { } } +func makePerftuneRoleBinding() *rbacv1.RoleBinding { + return &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: naming.ScyllaOperatorNodeTuningNamespace, + Name: naming.PerftuneServiceAccountName, + Labels: map[string]string{ + naming.NodeConfigNameLabel: naming.NodeConfigAppName, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "Role", + Name: naming.PerftuneServiceAccountName, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Namespace: naming.ScyllaOperatorNodeTuningNamespace, + Name: naming.PerftuneServiceAccountName, + }, + }, + } +} + func makeNodeSetupDaemonSet(nc *scyllav1alpha1.NodeConfig, operatorImage, scyllaImage string) *appsv1.DaemonSet { if nc.Spec.LocalDiskSetup == nil && nc.Spec.DisableOptimizations { return nil diff --git a/pkg/controller/nodeconfig/sync.go b/pkg/controller/nodeconfig/sync.go index 1abbee372d0..e5df5b7b9cc 100644 --- a/pkg/controller/nodeconfig/sync.go +++ b/pkg/controller/nodeconfig/sync.go @@ -63,6 +63,11 @@ func (ncc *Controller) sync(ctx context.Context, key string) error { objectErrs = append(objectErrs, err) } + roles, err := ncc.getRoles() + if err != nil { + objectErrs = append(objectErrs, err) + } + serviceAccounts, err := ncc.getServiceAccounts() if err != nil { objectErrs = append(objectErrs, err) @@ -73,6 +78,11 @@ func (ncc *Controller) sync(ctx context.Context, key string) error { objectErrs = append(objectErrs, err) } + roleBindings, err := ncc.getRoleBindings() + if err != nil { + objectErrs = append(objectErrs, err) + } + daemonSets, err := controllerhelpers.GetObjects[CT, *appsv1.DaemonSet]( ctx, nc, @@ -114,6 +124,11 @@ func (ncc *Controller) sync(ctx context.Context, key string) error { errs = append(errs, fmt.Errorf("sync ClusterRole(s): %w", err)) } + err = ncc.syncRoles(ctx, roles) + if err != nil { + errs = append(errs, fmt.Errorf("sync Role(s): %w", err)) + } + err = ncc.syncServiceAccounts(ctx, serviceAccounts) if err != nil { errs = append(errs, fmt.Errorf("sync ServiceAccount(s): %w", err)) @@ -124,6 +139,11 @@ func (ncc *Controller) sync(ctx context.Context, key string) error { errs = append(errs, fmt.Errorf("sync ClusterRoleBinding(s): %w", err)) } + err = ncc.syncRoleBindings(ctx, roleBindings) + if err != nil { + errs = append(errs, fmt.Errorf("sync RoleBinding(s): %w", err)) + } + err = ncc.syncDaemonSet(ctx, nc, soc, daemonSets) if err != nil { errs = append(errs, fmt.Errorf("sync DaemonSet(s): %w", err)) @@ -183,6 +203,38 @@ func (ncc *Controller) getClusterRoleBindings() (map[string]*rbacv1.ClusterRoleB return crbMap, nil } +func (ncc *Controller) getRoles() (map[string]*rbacv1.Role, error) { + roles, err := ncc.roleLister.List(labels.SelectorFromSet(map[string]string{ + naming.NodeConfigNameLabel: naming.NodeConfigAppName, + })) + if err != nil { + return nil, fmt.Errorf("list roles: %w", err) + } + + res := map[string]*rbacv1.Role{} + for i := range roles { + res[roles[i].Name] = roles[i] + } + + return res, nil +} + +func (ncc *Controller) getRoleBindings() (map[string]*rbacv1.RoleBinding, error) { + roleBindings, err := ncc.roleBindingLister.List(labels.SelectorFromSet(map[string]string{ + naming.NodeConfigNameLabel: naming.NodeConfigAppName, + })) + if err != nil { + return nil, fmt.Errorf("list rolebindings: %w", err) + } + + res := map[string]*rbacv1.RoleBinding{} + for i := range roleBindings { + res[roleBindings[i].Name] = roleBindings[i] + } + + return res, nil +} + func (ncc *Controller) getServiceAccounts() (map[string]*corev1.ServiceAccount, error) { sas, err := ncc.serviceAccountLister.List(labels.SelectorFromSet(map[string]string{ naming.NodeConfigNameLabel: naming.NodeConfigAppName, diff --git a/pkg/controller/nodeconfig/sync_rolebindings.go b/pkg/controller/nodeconfig/sync_rolebindings.go new file mode 100644 index 00000000000..4c5ab9b15f0 --- /dev/null +++ b/pkg/controller/nodeconfig/sync_rolebindings.go @@ -0,0 +1,79 @@ +// Copyright (C) 2024 ScyllaDB + +package nodeconfig + +import ( + "context" + "fmt" + + "github.com/scylladb/scylla-operator/pkg/resourceapply" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" +) + +func (ncc *Controller) makeRoleBindings() []*rbacv1.RoleBinding { + roleBindings := []*rbacv1.RoleBinding{ + makePerftuneRoleBinding(), + } + + return roleBindings +} + +func (ncc *Controller) pruneRoleBindings(ctx context.Context, requiredRoleBindings []*rbacv1.RoleBinding, roleBindings map[string]*rbacv1.RoleBinding) error { + var errs []error + for _, rb := range roleBindings { + if rb.DeletionTimestamp != nil { + continue + } + + isRequired := false + for _, req := range requiredRoleBindings { + if rb.Name == req.Name { + isRequired = true + break + } + } + if isRequired { + continue + } + + propagationPolicy := metav1.DeletePropagationBackground + err := ncc.kubeClient.RbacV1().RoleBindings(rb.Namespace).Delete(ctx, rb.Name, metav1.DeleteOptions{ + Preconditions: &metav1.Preconditions{ + UID: &rb.UID, + }, + PropagationPolicy: &propagationPolicy, + }) + if err != nil { + errs = append(errs, err) + continue + } + } + return utilerrors.NewAggregate(errs) +} + +func (ncc *Controller) syncRoleBindings( + ctx context.Context, + roleBindings map[string]*rbacv1.RoleBinding, +) error { + requiredRoleBindings := ncc.makeRoleBindings() + + // Delete any excessive RoleBindings. + // Delete has to be the first action to avoid getting stuck on quota. + if err := ncc.pruneRoleBindings(ctx, requiredRoleBindings, roleBindings); err != nil { + return fmt.Errorf("can't delete RoleBinding(s): %w", err) + } + + var errs []error + for _, crb := range requiredRoleBindings { + _, _, err := resourceapply.ApplyRoleBinding(ctx, ncc.kubeClient.RbacV1(), ncc.roleBindingLister, ncc.eventRecorder, crb, resourceapply.ApplyOptions{ + AllowMissingControllerRef: true, + }) + if err != nil { + errs = append(errs, fmt.Errorf("can't create missing rolebinding: %w", err)) + continue + } + } + return utilerrors.NewAggregate(errs) +} diff --git a/pkg/controller/nodeconfig/sync_roles.go b/pkg/controller/nodeconfig/sync_roles.go new file mode 100644 index 00000000000..1471f46c9ce --- /dev/null +++ b/pkg/controller/nodeconfig/sync_roles.go @@ -0,0 +1,77 @@ +// Copyright (C) 2024 ScyllaDB + +package nodeconfig + +import ( + "context" + "fmt" + + "github.com/scylladb/scylla-operator/pkg/resourceapply" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" +) + +func (ncc *Controller) makeRoles() []*rbacv1.Role { + Roles := []*rbacv1.Role{ + makePerftuneRole(), + } + + return Roles +} + +func (ncc *Controller) pruneRoles(ctx context.Context, requiredRoles []*rbacv1.Role, Roles map[string]*rbacv1.Role) error { + var errs []error + for _, r := range Roles { + if r.DeletionTimestamp != nil { + continue + } + + isRequired := false + for _, req := range requiredRoles { + if r.Name == req.Name { + isRequired = true + break + } + } + if isRequired { + continue + } + + propagationPolicy := metav1.DeletePropagationBackground + err := ncc.kubeClient.RbacV1().Roles(r.Namespace).Delete(ctx, r.Name, metav1.DeleteOptions{ + Preconditions: &metav1.Preconditions{ + UID: &r.UID, + }, + PropagationPolicy: &propagationPolicy, + }) + if err != nil { + errs = append(errs, err) + continue + } + } + return utilerrors.NewAggregate(errs) +} + +func (ncc *Controller) syncRoles(ctx context.Context, Roles map[string]*rbacv1.Role) error { + requiredRoles := ncc.makeRoles() + + // Delete any excessive Roles. + // Delete has to be the first action to avoid getting stuck on quota. + if err := ncc.pruneRoles(ctx, requiredRoles, Roles); err != nil { + return fmt.Errorf("can't delete Role(s): %w", err) + } + + var errs []error + for _, cr := range requiredRoles { + _, _, err := resourceapply.ApplyRole(ctx, ncc.kubeClient.RbacV1(), ncc.roleLister, ncc.eventRecorder, cr, resourceapply.ApplyOptions{ + AllowMissingControllerRef: true, + }) + if err != nil { + errs = append(errs, fmt.Errorf("can't create missing Role: %w", err)) + continue + } + } + + return utilerrors.NewAggregate(errs) +} diff --git a/pkg/controller/nodeconfig/sync_serviceaccounts.go b/pkg/controller/nodeconfig/sync_serviceaccounts.go index cf88464f213..33611f35a8a 100644 --- a/pkg/controller/nodeconfig/sync_serviceaccounts.go +++ b/pkg/controller/nodeconfig/sync_serviceaccounts.go @@ -15,6 +15,7 @@ import ( func (ncc *Controller) makeServiceAccounts() []*corev1.ServiceAccount { serviceAccounts := []*corev1.ServiceAccount{ makeNodeConfigServiceAccount(), + makePerftuneServiceAccount(), } return serviceAccounts diff --git a/pkg/controller/nodetune/resource.go b/pkg/controller/nodetune/resource.go index b8f65642581..6ddfd206181 100644 --- a/pkg/controller/nodetune/resource.go +++ b/pkg/controller/nodetune/resource.go @@ -54,11 +54,12 @@ func makePerftuneJobForNode(controllerRef *metav1.OwnerReference, namespace, nod Annotations: annotations, }, Spec: corev1.PodSpec{ - Tolerations: podSpec.Tolerations, - NodeName: nodeName, - RestartPolicy: corev1.RestartPolicyOnFailure, - HostPID: true, - HostNetwork: true, + Tolerations: podSpec.Tolerations, + NodeName: nodeName, + RestartPolicy: corev1.RestartPolicyOnFailure, + HostPID: true, + HostNetwork: true, + ServiceAccountName: naming.PerftuneServiceAccountName, Containers: []corev1.Container{ { Name: naming.PerftuneContainerName, @@ -164,11 +165,12 @@ func makePerftuneJobForContainers(controllerRef *metav1.OwnerReference, namespac Annotations: annotations, }, Spec: corev1.PodSpec{ - Tolerations: podSpec.Tolerations, - NodeName: nodeName, - RestartPolicy: corev1.RestartPolicyOnFailure, - HostPID: true, - HostNetwork: true, + Tolerations: podSpec.Tolerations, + NodeName: nodeName, + RestartPolicy: corev1.RestartPolicyOnFailure, + HostPID: true, + HostNetwork: true, + ServiceAccountName: naming.PerftuneServiceAccountName, Containers: []corev1.Container{ { Name: naming.PerftuneContainerName, diff --git a/pkg/naming/constants.go b/pkg/naming/constants.go index 38388f311b8..b142723dc0f 100644 --- a/pkg/naming/constants.go +++ b/pkg/naming/constants.go @@ -140,7 +140,8 @@ const ( SingletonName = "cluster" - PerftuneJobPrefixName = "perftune" + PerftuneJobPrefixName = "perftune" + PerftuneServiceAccountName = "perftune" // TODO: Make sure this doesn't get out of date. // TODO: Can't be bumped until scylladb/scylladb#17787 is fixed.