From e10bbe8f070aa0a72b4d3202221c5c3a95eda31b Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Mon, 24 Jul 2023 11:22:41 +0800 Subject: [PATCH 01/22] feat(federated-informer-manager): integrate pod informer --- cmd/controller-manager/app/util.go | 11 +- .../federatedinformermanager.go | 184 +++++++++++++---- .../federatedinformermanager_test.go | 191 +++++++++++++++--- pkg/util/informermanager/informermanager.go | 10 +- pkg/util/informermanager/interface.go | 20 +- pkg/util/informermanager/podinformer.go | 139 +++++++++++++ 6 files changed, 464 insertions(+), 91 deletions(-) create mode 100644 pkg/util/informermanager/podinformer.go diff --git a/cmd/controller-manager/app/util.go b/cmd/controller-manager/app/util.go index 41958dfc..a12d8f3e 100644 --- a/cmd/controller-manager/app/util.go +++ b/cmd/controller-manager/app/util.go @@ -27,6 +27,7 @@ import ( "k8s.io/client-go/dynamic/dynamicinformer" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" "github.com/kubewharf/kubeadmiral/cmd/controller-manager/app/options" @@ -115,19 +116,15 @@ func createControllerContext(opts *options.Options) (*controllercontext.Context, nil, ) federatedInformerManager := informermanager.NewFederatedInformerManager( - informermanager.ClusterClientGetter{ + informermanager.ClusterClientHelper{ ConnectionHash: informermanager.DefaultClusterConnectionHash, - ClientGetter: func(cluster *fedcorev1a1.FederatedCluster) (dynamic.Interface, error) { - restConfig, err := clusterutil.BuildClusterConfig( + RestConfigGetter: func(cluster *fedcorev1a1.FederatedCluster) (*rest.Config, error) { + return clusterutil.BuildClusterConfig( cluster, kubeClientset, restConfig, common.DefaultFedSystemNamespace, ) - if err != nil { - return nil, err - } - return dynamic.NewForConfig(restConfig) }, }, fedInformerFactory.Core().V1alpha1().FederatedTypeConfigs(), diff --git a/pkg/util/informermanager/federatedinformermanager.go b/pkg/util/informermanager/federatedinformermanager.go index 7264b220..aa04def9 100644 --- a/pkg/util/informermanager/federatedinformermanager.go +++ b/pkg/util/informermanager/federatedinformermanager.go @@ -22,15 +22,22 @@ import ( "encoding/gob" "fmt" "sync" + "time" + "golang.org/x/sync/semaphore" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/dynamic" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + v1 "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" @@ -50,40 +57,51 @@ type federatedInformerManager struct { started bool shutdown bool - clientGetter ClusterClientGetter + clientHelper ClusterClientHelper + kubeClientGetter func(*fedcorev1a1.FederatedCluster, *rest.Config) (kubernetes.Interface, error) + dynamicClientGetter func(*fedcorev1a1.FederatedCluster, *rest.Config) (dynamic.Interface, error) + ftcInformer fedcorev1a1informers.FederatedTypeConfigInformer clusterInformer fedcorev1a1informers.FederatedClusterInformer eventHandlerGenerators []*EventHandlerGenerator clusterEventHandlers []*ClusterEventHandler - clients map[string]dynamic.Interface - connectionMap map[string][]byte - informerManagers map[string]InformerManager - informerManagersCancelFuncs map[string]context.CancelFunc + kubeClients map[string]kubernetes.Interface + dynamicClients map[string]dynamic.Interface + connectionMap map[string][]byte + clusterCancelFuncs map[string]context.CancelFunc + informerManagers map[string]InformerManager + informerFactories map[string]informers.SharedInformerFactory - queue workqueue.RateLimitingInterface + queue workqueue.RateLimitingInterface + podListerSemaphore *semaphore.Weighted + initialClusters sets.Set[string] } func NewFederatedInformerManager( - clientGetter ClusterClientGetter, + clientHelper ClusterClientHelper, ftcInformer fedcorev1a1informers.FederatedTypeConfigInformer, clusterInformer fedcorev1a1informers.FederatedClusterInformer, ) FederatedInformerManager { manager := &federatedInformerManager{ - lock: sync.RWMutex{}, - started: false, - shutdown: false, - clientGetter: clientGetter, - ftcInformer: ftcInformer, - clusterInformer: clusterInformer, - eventHandlerGenerators: []*EventHandlerGenerator{}, - clusterEventHandlers: []*ClusterEventHandler{}, - clients: map[string]dynamic.Interface{}, - connectionMap: map[string][]byte{}, - informerManagers: map[string]InformerManager{}, - informerManagersCancelFuncs: map[string]context.CancelFunc{}, - queue: workqueue.NewRateLimitingQueue(workqueue.DefaultItemBasedRateLimiter()), + lock: sync.RWMutex{}, + started: false, + shutdown: false, + clientHelper: clientHelper, + ftcInformer: ftcInformer, + clusterInformer: clusterInformer, + eventHandlerGenerators: []*EventHandlerGenerator{}, + clusterEventHandlers: []*ClusterEventHandler{}, + kubeClients: map[string]kubernetes.Interface{}, + dynamicClients: map[string]dynamic.Interface{}, + connectionMap: map[string][]byte{}, + clusterCancelFuncs: map[string]context.CancelFunc{}, + informerManagers: map[string]InformerManager{}, + informerFactories: map[string]informers.SharedInformerFactory{}, + queue: workqueue.NewRateLimitingQueue(workqueue.DefaultItemBasedRateLimiter()), + podListerSemaphore: semaphore.NewWeighted(3), // TODO: make this configurable + initialClusters: sets.New[string](), } clusterInformer.Informer().AddEventHandler(cache.FilteringResourceEventHandler{ @@ -100,6 +118,13 @@ func NewFederatedInformerManager( ftcInformer.Informer() + manager.dynamicClientGetter = func(_ *fedcorev1a1.FederatedCluster, config *rest.Config) (dynamic.Interface, error) { + return dynamic.NewForConfig(config) + } + manager.kubeClientGetter = func(_ *fedcorev1a1.FederatedCluster, config *rest.Config) (kubernetes.Interface, error) { + return kubernetes.NewForConfig(config) + } + return manager } @@ -145,7 +170,7 @@ func (m *federatedInformerManager) worker(ctx context.Context) { return } - err, needReenqueue := m.processCluster(ctx, cluster) + err, needReenqueue, delay := m.processCluster(ctx, cluster) if err != nil { if needReenqueue { logger.Error(err, "Failed to process FederatedCluster, will retry") @@ -159,22 +184,22 @@ func (m *federatedInformerManager) worker(ctx context.Context) { m.queue.Forget(key) if needReenqueue { - m.queue.Add(key) + m.queue.AddAfter(key, delay) } } func (m *federatedInformerManager) processCluster( ctx context.Context, cluster *fedcorev1a1.FederatedCluster, -) (err error, needReenqueue bool) { +) (err error, needReenqueue bool, delay time.Duration) { m.lock.Lock() defer m.lock.Unlock() clusterName := cluster.Name - connectionHash, err := m.clientGetter.ConnectionHash(cluster) + connectionHash, err := m.clientHelper.ConnectionHash(cluster) if err != nil { - return fmt.Errorf("failed to get connection hash for cluster %s: %w", clusterName, err), true + return fmt.Errorf("failed to get connection hash for cluster %s: %w", clusterName, err), true, 0 } if oldConnectionHash, exists := m.connectionMap[clusterName]; exists { if !bytes.Equal(oldConnectionHash, connectionHash) { @@ -183,16 +208,26 @@ func (m *federatedInformerManager) processCluster( // reenqueue. // Note: updating of cluster connection details, however, is still not a supported use case. err := m.processClusterDeletionUnlocked(ctx, clusterName) - return err, true + return err, true, 0 } } else { - clusterClient, err := m.clientGetter.ClientGetter(cluster) + clusterRestConfig, err := m.clientHelper.RestConfigGetter(cluster) + if err != nil { + return fmt.Errorf("failed to get rest config for cluster %s: %w", clusterName, err), true, 0 + } + + clusterDynamicClient, err := m.dynamicClientGetter(cluster, clusterRestConfig) + if err != nil { + return fmt.Errorf("failed to get dynamic client for cluster %s: %w", clusterName, err), true, 0 + } + + clusterKubeClient, err := m.kubeClientGetter(cluster, clusterRestConfig) if err != nil { - return fmt.Errorf("failed to get client for cluster %s: %w", clusterName, err), true + return fmt.Errorf("failed to get kubernetes client for cluster %s: %w", clusterName, err), true, 0 } manager := NewInformerManager( - clusterClient, + clusterDynamicClient, m.ftcInformer, func(opts *metav1.ListOptions) { selector := &metav1.LabelSelector{} @@ -209,21 +244,39 @@ func (m *federatedInformerManager) processCluster( for _, generator := range m.eventHandlerGenerators { if err := manager.AddEventHandlerGenerator(generator); err != nil { cancel() - return fmt.Errorf("failed to initialized InformerManager for cluster %s: %w", clusterName, err), true + return fmt.Errorf("failed to initialized InformerManager for cluster %s: %w", clusterName, err), true, 0 } } - klog.FromContext(ctx).V(2).Info("Starting new InformerManager for FederatedCluster") + factory := informers.NewSharedInformerFactory(clusterKubeClient, 0) + addPodInformer(ctx, factory, clusterKubeClient, m.podListerSemaphore, true) + factory.Core().V1().Nodes().Informer() + klog.FromContext(ctx).V(2).Info("Starting new InformerManager for FederatedCluster") manager.Start(ctx) + klog.FromContext(ctx).V(2).Info("Starting new SharedInformerFactory for FederatedCluster") + factory.Start(ctx.Done()) + m.connectionMap[clusterName] = connectionHash - m.clients[clusterName] = clusterClient + m.kubeClients[clusterName] = clusterKubeClient + m.dynamicClients[clusterName] = clusterDynamicClient + m.clusterCancelFuncs[clusterName] = cancel m.informerManagers[clusterName] = manager - m.informerManagersCancelFuncs[clusterName] = cancel + m.informerFactories[clusterName] = factory + } + + if m.initialClusters.Has(cluster.Name) { + manager := m.informerManagers[cluster.Name] + if manager != nil && manager.HasSynced() { + m.initialClusters.Delete(cluster.Name) + } else { + klog.FromContext(ctx).V(3).Info("Waiting for InformerManager sync") + return nil, true, 100 * time.Millisecond + } } - return nil, false + return nil, false, 0 } func (m *federatedInformerManager) processClusterDeletion(ctx context.Context, clusterName string) error { @@ -234,14 +287,16 @@ func (m *federatedInformerManager) processClusterDeletion(ctx context.Context, c func (m *federatedInformerManager) processClusterDeletionUnlocked(ctx context.Context, clusterName string) error { delete(m.connectionMap, clusterName) - delete(m.clients, clusterName) + delete(m.dynamicClients, clusterName) - if cancel, ok := m.informerManagersCancelFuncs[clusterName]; ok { + if cancel, ok := m.clusterCancelFuncs[clusterName]; ok { klog.FromContext(ctx).V(2).Info("Stopping InformerManager for FederatedCluster") cancel() } delete(m.informerManagers, clusterName) - delete(m.informerManagersCancelFuncs, clusterName) + delete(m.clusterCancelFuncs, clusterName) + + m.initialClusters.Delete(clusterName) return nil } @@ -270,10 +325,17 @@ func (m *federatedInformerManager) AddEventHandlerGenerator(generator *EventHand return nil } -func (m *federatedInformerManager) GetClusterClient(cluster string) (client dynamic.Interface, exists bool) { +func (m *federatedInformerManager) GetClusterDynamicClient(cluster string) (client dynamic.Interface, exists bool) { m.lock.RLock() defer m.lock.RUnlock() - client, ok := m.clients[cluster] + client, ok := m.dynamicClients[cluster] + return client, ok +} + +func (m *federatedInformerManager) GetClusterKubeClient(cluster string) (client kubernetes.Interface, exists bool) { + m.lock.RLock() + defer m.lock.RUnlock() + client, ok := m.kubeClients[cluster] return client, ok } @@ -301,6 +363,34 @@ func (m *federatedInformerManager) GetFederatedTypeConfigLister() fedcorev1a1lis return m.ftcInformer.Lister() } +func (m *federatedInformerManager) GetNodeLister( + cluster string, +) (lister v1.NodeLister, informerSynced cache.InformerSynced, exists bool) { + m.lock.RLock() + defer m.lock.RUnlock() + + factory, ok := m.informerFactories[cluster] + if !ok { + return nil, nil, false + } + + return factory.Core().V1().Nodes().Lister(), factory.Core().V1().Nodes().Informer().HasSynced, true +} + +func (m *federatedInformerManager) GetPodLister( + cluster string, +) (lister v1.PodLister, informerSynced cache.InformerSynced, exists bool) { + m.lock.RLock() + defer m.lock.RUnlock() + + factory, ok := m.informerFactories[cluster] + if !ok { + return nil, nil, false + } + + return factory.Core().V1().Pods().Lister(), factory.Core().V1().Pods().Informer().HasSynced, true +} + func (m *federatedInformerManager) GetResourceLister( gvk schema.GroupVersionKind, cluster string, @@ -317,7 +407,10 @@ func (m *federatedInformerManager) GetResourceLister( } func (m *federatedInformerManager) HasSynced() bool { - return m.ftcInformer.Informer().HasSynced() && m.clusterInformer.Informer().HasSynced() + m.lock.RLock() + defer m.lock.RUnlock() + return m.ftcInformer.Informer().HasSynced() && m.clusterInformer.Informer().HasSynced() && + len(m.initialClusters) == 0 } func (m *federatedInformerManager) Start(ctx context.Context) { @@ -333,11 +426,18 @@ func (m *federatedInformerManager) Start(ctx context.Context) { m.started = true - if !cache.WaitForCacheSync(ctx.Done(), m.HasSynced) { + if !cache.WaitForCacheSync(ctx.Done(), m.ftcInformer.Informer().HasSynced, m.clusterInformer.Informer().HasSynced) { logger.Error(nil, "Failed to wait for FederatedInformerManager cache sync") return } + // Populate the initial snapshot of clusters + + clusters := m.clusterInformer.Informer().GetStore().List() + for _, cluster := range clusters { + m.initialClusters.Insert(cluster.(*fedcorev1a1.FederatedCluster).GetName()) + } + for _, handler := range m.clusterEventHandlers { predicate := handler.Predicate callback := handler.Callback @@ -428,7 +528,7 @@ func GetClusterObject( return clusterObj.(*unstructured.Unstructured), true, nil } - client, exists := fedInformerManager.GetClusterClient(clusterName) + client, exists := fedInformerManager.GetClusterDynamicClient(clusterName) if !exists { return nil, false, fmt.Errorf("cluster client does not exist for cluster %q", clusterName) } diff --git a/pkg/util/informermanager/federatedinformermanager_test.go b/pkg/util/informermanager/federatedinformermanager_test.go index dc13a1a5..bbfd3acf 100644 --- a/pkg/util/informermanager/federatedinformermanager_test.go +++ b/pkg/util/informermanager/federatedinformermanager_test.go @@ -31,13 +31,16 @@ import ( "k8s.io/apimachinery/pkg/util/wait" dynamicclient "k8s.io/client-go/dynamic" dynamicfake "k8s.io/client-go/dynamic/fake" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" "k8s.io/klog/v2/ktesting" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" fedclient "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned" - "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned/fake" + fedfake "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned/fake" fedinformers "github.com/kubewharf/kubeadmiral/pkg/client/informers/externalversions" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" ) @@ -78,11 +81,17 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that the clients for each cluster is eventually available for _, cluster := range defaultClusters { g.Eventually(func(g gomega.Gomega) { - client, exists := manager.GetClusterClient(cluster.Name) + client, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) g.Expect(client).ToNot(gomega.BeNil()) }).WithTimeout(time.Second * 2).Should(gomega.Succeed()) @@ -90,7 +99,7 @@ func TestFederatedInformerManager(t *testing.T) { // 3. Verify that the client for a non-existent cluster is not available - client, exists := manager.GetClusterClient("cluster-4") + client, exists := manager.GetClusterDynamicClient("cluster-4") g.Expect(exists).To(gomega.BeFalse()) g.Expect(client).To(gomega.BeNil()) }) @@ -122,10 +131,16 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that client for cluster-1 does is not available initially. g.Consistently(func(g gomega.Gomega) { - client, exists := manager.GetClusterClient("cluster-1") + client, exists := manager.GetClusterDynamicClient("cluster-1") g.Expect(exists).To(gomega.BeFalse()) g.Expect(client).To(gomega.BeNil()) }).WithTimeout(time.Second * 2).Should(gomega.Succeed()) @@ -142,7 +157,7 @@ func TestFederatedInformerManager(t *testing.T) { // 4. Verify that client for new cluster is eventually available g.Eventually(func(g gomega.Gomega) { - client, exists := manager.GetClusterClient(cluster.Name) + client, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) g.Expect(client).ToNot(gomega.BeNil()) }).WithTimeout(time.Second * 2).Should(gomega.Succeed()) @@ -179,6 +194,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that listers for existing FTCs and clusters are eventually available for _, ftc := range defaultFTCs { @@ -195,6 +216,24 @@ func TestFederatedInformerManager(t *testing.T) { } } + for _, cluster := range defaultClusters { + g.Eventually(func(g gomega.Gomega) { + lister, informerSynced, exists := manager.GetPodLister(cluster.Name) + + g.Expect(exists).To(gomega.BeTrue()) + g.Expect(lister).ToNot(gomega.BeNil()) + g.Expect(informerSynced()).To(gomega.BeTrue()) + }).WithTimeout(time.Second * 2).Should(gomega.Succeed()) + + g.Eventually(func(g gomega.Gomega) { + lister, informerSynced, exists := manager.GetNodeLister(cluster.Name) + + g.Expect(exists).To(gomega.BeTrue()) + g.Expect(lister).ToNot(gomega.BeNil()) + g.Expect(informerSynced()).To(gomega.BeTrue()) + }).WithTimeout(time.Second * 2).Should(gomega.Succeed()) + } + // 3. Verify that the lister for non-existent FTCs or clusters are not available lister, informerSynced, exists := manager.GetResourceLister(daemonsetGVK, "cluster-1") @@ -244,6 +283,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + ftc := daemonsetFTC gvk := ftc.GetSourceTypeGVK() @@ -302,6 +347,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + cluster := getTestCluster("cluster-1") // 2. Verify that listers for cluster-1 is not available at the start @@ -315,6 +366,16 @@ func TestFederatedInformerManager(t *testing.T) { g.Expect(lister).To(gomega.BeNil()) g.Expect(informerSynced).To(gomega.BeNil()) } + + podLister, informerSynced, exists := manager.GetPodLister(cluster.Name) + g.Expect(exists).To(gomega.BeFalse()) + g.Expect(podLister).To(gomega.BeNil()) + g.Expect(informerSynced).To(gomega.BeNil()) + + nodeLister, informerSynced, exists := manager.GetNodeLister(cluster.Name) + g.Expect(exists).To(gomega.BeFalse()) + g.Expect(nodeLister).To(gomega.BeNil()) + g.Expect(informerSynced).To(gomega.BeNil()) }).WithTimeout(time.Second * 2).Should(gomega.Succeed()) // 3. Create cluster-1 @@ -333,6 +394,16 @@ func TestFederatedInformerManager(t *testing.T) { g.Expect(lister).ToNot(gomega.BeNil()) g.Expect(informerSynced()).To(gomega.BeTrue()) } + + podLister, informerSynced, exists := manager.GetPodLister(cluster.Name) + g.Expect(exists).To(gomega.BeTrue()) + g.Expect(podLister).ToNot(gomega.BeNil()) + g.Expect(informerSynced()).To(gomega.BeTrue()) + + nodeLister, informerSynced, exists := manager.GetNodeLister(cluster.Name) + g.Expect(exists).To(gomega.BeTrue()) + g.Expect(nodeLister).ToNot(gomega.BeNil()) + g.Expect(informerSynced()).To(gomega.BeTrue()) }).WithTimeout(time.Second * 2).Should(gomega.Succeed()) }) @@ -387,6 +458,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify alwaysRegistered is eventually registered for all existing FTCs and clusters. for _, cluster := range defaultClusters { @@ -406,7 +483,7 @@ func TestFederatedInformerManager(t *testing.T) { dp1.SetAnnotations(map[string]string{"test": "test"}) for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -439,7 +516,7 @@ func TestFederatedInformerManager(t *testing.T) { // 5. Verify that events for non-existent FTCs are not received for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -504,12 +581,17 @@ func TestFederatedInformerManager(t *testing.T) { generators, clusterHandlers, ) - defer func() { cancel() _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that alwaysRegistered is not registered initially for daemonset alwaysRegistered.AssertConsistently(g, time.Second*2) @@ -531,7 +613,7 @@ func TestFederatedInformerManager(t *testing.T) { // 5. Verify that newly generated events are also received by alwaysRegistered for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -548,7 +630,7 @@ func TestFederatedInformerManager(t *testing.T) { // 6. Verify that events for non-existent FTCs are not received by alwaysRegistered for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -614,6 +696,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that alwaysRegistered is not registered initially since there are no clusters alwaysRegistered.AssertConsistently(g, time.Second*2) @@ -653,7 +741,7 @@ func TestFederatedInformerManager(t *testing.T) { // 5. Verify that newly generated events are also received by alwaysRegistered for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -670,7 +758,7 @@ func TestFederatedInformerManager(t *testing.T) { // 6. Verify that events for non-existent FTCs are not received by alwaysRegistered for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -812,6 +900,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that handler is not registered initially. handler.AssertConsistently(g, time.Second*2) @@ -830,7 +924,7 @@ func TestFederatedInformerManager(t *testing.T) { handler.AssertEventually(g, time.Second*2) for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -888,6 +982,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that handler is registered initially. handler.ExpectGenerateEvents(ftc.Name, len(defaultClusters)) @@ -905,7 +1005,7 @@ func TestFederatedInformerManager(t *testing.T) { // 4. Verify that handler is unregistered and new events are no longer received by handler. for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -963,6 +1063,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that handler is registered initially handler.ExpectGenerateEvents(ftc.Name, len(defaultClusters)) @@ -982,7 +1088,7 @@ func TestFederatedInformerManager(t *testing.T) { dp1.SetAnnotations(map[string]string{"test": "test"}) for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -1041,6 +1147,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that handler is registered initially handler.ExpectGenerateEvents(ftc.Name, len(defaultClusters)) @@ -1060,7 +1172,7 @@ func TestFederatedInformerManager(t *testing.T) { dp1.SetAnnotations(map[string]string{"test": "test"}) for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -1125,6 +1237,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that handler1 and handler2 is registered initially for all FTCs and clusters for _, cluster := range defaultClusters { @@ -1157,7 +1275,7 @@ func TestFederatedInformerManager(t *testing.T) { // 4. Verify that handler1 and handler2 is unregistered for deployments and no additional events are received for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -1174,7 +1292,7 @@ func TestFederatedInformerManager(t *testing.T) { // 5. Verify that handler1 and handler2 is not unregistered for other FTCs. for _, cluster := range defaultClusters { - dynamicClient, exists := manager.GetClusterClient(cluster.Name) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster.Name) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -1250,6 +1368,12 @@ func TestFederatedInformerManager(t *testing.T) { _ = wait.PollInfinite(time.Millisecond, func() (done bool, err error) { return manager.IsShutdown(), nil }) }() + ctxWithTimeout, timeoutCancel := context.WithTimeout(ctx, time.Second) + defer timeoutCancel() + if !cache.WaitForCacheSync(ctxWithTimeout.Done(), manager.HasSynced) { + g.Fail("Timed out waiting for FederatedInformerManager cache sync") + } + // 2. Verify that handler1 and handler2 is registered initially for all FTCs and clusters for _, cluster := range defaultClusters { @@ -1271,7 +1395,7 @@ func TestFederatedInformerManager(t *testing.T) { // 3. Delete cluster-1 // Get client before deletion - dynamicClient, exists := manager.GetClusterClient("cluster-1") + dynamicClient, exists := manager.GetClusterDynamicClient("cluster-1") err := fedClient.CoreV1alpha1().FederatedClusters().Delete(ctx, "cluster-1", metav1.DeleteOptions{}) g.Expect(err).ToNot(gomega.HaveOccurred()) @@ -1295,7 +1419,7 @@ func TestFederatedInformerManager(t *testing.T) { // 5. Verify that handler1 and handler2 is not unregistered for other clusters. for _, cluster := range []string{"cluster-2", "cluster-3"} { - dynamicClient, exists := manager.GetClusterClient(cluster) + dynamicClient, exists := manager.GetClusterDynamicClient(cluster) g.Expect(exists).To(gomega.BeTrue()) generateEvents( @@ -1437,7 +1561,7 @@ func bootstrapFederatedInformerManagerWithFakeClients( for _, ftc := range ftcs { fedObjects = append(fedObjects, runtime.Object(ftc.DeepCopy())) } - fedClient := fake.NewSimpleClientset(fedObjects...) + fedClient := fedfake.NewSimpleClientset(fedObjects...) factory := fedinformers.NewSharedInformerFactory(fedClient, 0) @@ -1450,15 +1574,27 @@ func bootstrapFederatedInformerManagerWithFakeClients( } informerManager := NewFederatedInformerManager( - ClusterClientGetter{ + ClusterClientHelper{ ConnectionHash: DefaultClusterConnectionHash, - ClientGetter: func(cluster *fedcorev1a1.FederatedCluster) (dynamicclient.Interface, error) { - return dynamicfake.NewSimpleDynamicClient(scheme, dynamicObjects[cluster.Name]...), nil + RestConfigGetter: func(cluster *fedcorev1a1.FederatedCluster) (*rest.Config, error) { + return nil, nil }, }, factory.Core().V1alpha1().FederatedTypeConfigs(), factory.Core().V1alpha1().FederatedClusters(), ) + informerManager.(*federatedInformerManager).dynamicClientGetter = func( + cluster *fedcorev1a1.FederatedCluster, + config *rest.Config, + ) (dynamicclient.Interface, error) { + return dynamicfake.NewSimpleDynamicClient(scheme, dynamicObjects[cluster.Name]...), nil + } + informerManager.(*federatedInformerManager).kubeClientGetter = func( + cluster *fedcorev1a1.FederatedCluster, + config *rest.Config, + ) (kubernetes.Interface, error) { + return fake.NewSimpleClientset(), nil + } for _, generator := range eventHandlerGenerators { err := informerManager.AddEventHandlerGenerator(generator) @@ -1472,12 +1608,5 @@ func bootstrapFederatedInformerManagerWithFakeClients( factory.Start(ctx.Done()) informerManager.Start(ctx) - ctxWithTimeout, cancel := context.WithTimeout(ctx, time.Second) - defer cancel() - - if !cache.WaitForCacheSync(ctxWithTimeout.Done(), informerManager.HasSynced) { - g.Fail("Timed out waiting for FederatedInformerManager cache sync") - } - return informerManager, fedClient } diff --git a/pkg/util/informermanager/informermanager.go b/pkg/util/informermanager/informermanager.go index 42e3c7b2..8124a5e6 100644 --- a/pkg/util/informermanager/informermanager.go +++ b/pkg/util/informermanager/informermanager.go @@ -54,8 +54,7 @@ type informerManager struct { eventHandlerGenerators []*EventHandlerGenerator ftcUpdateHandlers []FTCUpdateHandler - initialFTCs sets.Set[string] - gvkMapping *bijection.Bijection[string, schema.GroupVersionKind] + gvkMapping *bijection.Bijection[string, schema.GroupVersionKind] lastObservedFTCs map[string]*fedcorev1a1.FederatedTypeConfig informers map[string]informers.GenericInformer @@ -63,7 +62,8 @@ type informerManager struct { eventHandlerRegistrations map[string]map[*EventHandlerGenerator]cache.ResourceEventHandlerRegistration lastAppliedFTCsCache map[string]map[*EventHandlerGenerator]*fedcorev1a1.FederatedTypeConfig - queue workqueue.RateLimitingInterface + queue workqueue.RateLimitingInterface + initialFTCs sets.Set[string] } func NewInformerManager( @@ -80,7 +80,6 @@ func NewInformerManager( ftcInformer: ftcInformer, eventHandlerGenerators: []*EventHandlerGenerator{}, ftcUpdateHandlers: []FTCUpdateHandler{}, - initialFTCs: sets.New[string](), gvkMapping: bijection.NewBijection[string, schema.GroupVersionKind](), lastObservedFTCs: map[string]*fedcorev1a1.FederatedTypeConfig{}, informers: map[string]informers.GenericInformer{}, @@ -88,6 +87,7 @@ func NewInformerManager( eventHandlerRegistrations: map[string]map[*EventHandlerGenerator]cache.ResourceEventHandlerRegistration{}, lastAppliedFTCsCache: map[string]map[*EventHandlerGenerator]*fedcorev1a1.FederatedTypeConfig{}, queue: workqueue.NewRateLimitingQueue(workqueue.DefaultItemBasedRateLimiter()), + initialFTCs: sets.New[string](), } ftcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -383,7 +383,7 @@ func (m *informerManager) Start(ctx context.Context) { return } - // Populate the intial snapshot of FTCs + // Populate the initial snapshot of FTCs ftcs := m.ftcInformer.Informer().GetStore().List() for _, ftc := range ftcs { diff --git a/pkg/util/informermanager/interface.go b/pkg/util/informermanager/interface.go index a696f288..5ea8b2a6 100644 --- a/pkg/util/informermanager/interface.go +++ b/pkg/util/informermanager/interface.go @@ -21,6 +21,9 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" @@ -117,8 +120,13 @@ type FederatedInformerManager interface { gvk schema.GroupVersionKind, cluster string, ) (lister cache.GenericLister, informerSynced cache.InformerSynced, exists bool) - // Returns a client for the given cluster if it exists. The client for each cluster will eventually exist. - GetClusterClient(cluster string) (client dynamic.Interface, exists bool) + // Returns a dynamic client for the given cluster if it exists. The client for each cluster will eventually exist. + GetClusterDynamicClient(cluster string) (client dynamic.Interface, exists bool) + // Returns a kubernetes client for the given cluster if it exists. The client for each cluster will eventually exist. + GetClusterKubeClient(cluster string) (client kubernetes.Interface, exists bool) + + GetPodLister(cluster string) (lister corev1listers.PodLister, informerSynced cache.InformerSynced, exists bool) + GetNodeLister(cluster string) (lister corev1listers.NodeLister, informerSynced cache.InformerSynced, exists bool) // Returns the FederatedTypeConfig lister used by the FederatedInformerManager. GetFederatedTypeConfigLister() fedcorev1a1listers.FederatedTypeConfigLister @@ -140,12 +148,12 @@ type FederatedInformerManager interface { IsShutdown() bool } -// ClusterClientGetter is used by the FederatedInformerManager to create clients for joined member clusters. -type ClusterClientGetter struct { +// ClusterClientHelper is used by the FederatedInformerManager to create clients for joined member clusters. +type ClusterClientHelper struct { // ConnectionHash should return a string that uniquely identifies the combination of parameters used to generate the // cluster client. A change in the connection hash indicates a need to create a new client for a given member // cluster. ConnectionHash func(cluster *fedcorev1a1.FederatedCluster) ([]byte, error) - // ClientGetter returns a dynamic client for the given member cluster. - ClientGetter func(cluster *fedcorev1a1.FederatedCluster) (dynamic.Interface, error) + // RestConfigGetter returns a *rest.Config for the given member cluster. + RestConfigGetter func(cluster *fedcorev1a1.FederatedCluster) (*rest.Config, error) } diff --git a/pkg/util/informermanager/podinformer.go b/pkg/util/informermanager/podinformer.go new file mode 100644 index 00000000..d5f6f539 --- /dev/null +++ b/pkg/util/informermanager/podinformer.go @@ -0,0 +1,139 @@ +/* +Copyright 2023 The KubeAdmiral Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package informermanager + +import ( + "context" + "time" + + "golang.org/x/sync/semaphore" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/informers" + kubeclient "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" +) + +func addPodInformer(ctx context.Context, + informer informers.SharedInformerFactory, + client kubeclient.Interface, + podListerSemaphore *semaphore.Weighted, + enablePodPruning bool, +) { + informer.InformerFor(&corev1.Pod{}, func(k kubeclient.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + podListerWatcher(ctx, client, podListerSemaphore, enablePodPruning), + &corev1.Pod{}, + resyncPeriod, + cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, + ) + }) + +} + +func podListerWatcher( + ctx context.Context, + client kubeclient.Interface, + semaphore *semaphore.Weighted, + enablePodPruning bool, +) cache.ListerWatcher { + return &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + if semaphore != nil { + if err := semaphore.Acquire(ctx, 1); err != nil { + return nil, err + } + defer semaphore.Release(1) + } + pods, err := client.CoreV1().Pods(corev1.NamespaceAll).List(ctx, options) + if err != nil { + return nil, err + } + if enablePodPruning { + for i := range pods.Items { + prunePod(&pods.Items[i]) + } + } + return pods, nil + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + watcher, err := client.CoreV1().Pods(corev1.NamespaceAll).Watch(ctx, options) + if err != nil { + return nil, err + } + if !enablePodPruning { + return watcher, nil + } + + // It's easy for a consumer to add buffering via an extra + // goroutine/channel, but impossible for them to remove it, + // so nonbuffered is better. -- from watch.NewStreamWatcher + proxyCh := make(chan watch.Event) + proxyWatcher := watch.NewProxyWatcher(proxyCh) + go func() { + defer watcher.Stop() + // Closing proxyCh will notify the reflector to stop the current + // watching cycle and then restart the list and watch. + defer close(proxyCh) + for { + select { + case <-proxyWatcher.StopChan(): + return + case event, ok := <-watcher.ResultChan(): + if !ok { + // the watcher has been closed, stop the proxy + return + } + if pod, ok := event.Object.(*corev1.Pod); ok { + prunePod(pod) + } + proxyCh <- event + } + } + }() + return proxyWatcher, nil + }, + } +} + +func prunePod(pod *corev1.Pod) { + containers := make([]corev1.Container, len(pod.Spec.Containers)) + initContainers := make([]corev1.Container, len(pod.Spec.InitContainers)) + for i := range pod.Spec.Containers { + containers[i] = corev1.Container{Resources: pod.Spec.Containers[i].Resources} + } + for i := range pod.Spec.InitContainers { + initContainers[i] = corev1.Container{Resources: pod.Spec.InitContainers[i].Resources} + } + *pod = corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: pod.Name, + Namespace: pod.Namespace, + Generation: pod.Generation, + ResourceVersion: pod.ResourceVersion, + UID: pod.UID, + }, + Spec: corev1.PodSpec{ + NodeName: pod.Spec.NodeName, + Overhead: pod.Spec.Overhead, + Containers: containers, + InitContainers: initContainers, + }, + } +} From 513a40d79bc558b60dc29a8a7b6976a5197f7f4f Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Tue, 25 Jul 2023 13:09:40 +0800 Subject: [PATCH 02/22] fix(federated-informer-manager): fix cluster deletion --- .../federatedinformermanager.go | 24 ++++++++++--------- pkg/util/informermanager/informermanager.go | 16 ++++++------- pkg/util/informermanager/podinformer.go | 20 +++++++++------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/pkg/util/informermanager/federatedinformermanager.go b/pkg/util/informermanager/federatedinformermanager.go index aa04def9..0f433d23 100644 --- a/pkg/util/informermanager/federatedinformermanager.go +++ b/pkg/util/informermanager/federatedinformermanager.go @@ -170,7 +170,7 @@ func (m *federatedInformerManager) worker(ctx context.Context) { return } - err, needReenqueue, delay := m.processCluster(ctx, cluster) + needReenqueue, delay, err := m.processCluster(ctx, cluster) if err != nil { if needReenqueue { logger.Error(err, "Failed to process FederatedCluster, will retry") @@ -191,7 +191,7 @@ func (m *federatedInformerManager) worker(ctx context.Context) { func (m *federatedInformerManager) processCluster( ctx context.Context, cluster *fedcorev1a1.FederatedCluster, -) (err error, needReenqueue bool, delay time.Duration) { +) (needReenqueue bool, reenqueueDelay time.Duration, err error) { m.lock.Lock() defer m.lock.Unlock() @@ -199,7 +199,7 @@ func (m *federatedInformerManager) processCluster( connectionHash, err := m.clientHelper.ConnectionHash(cluster) if err != nil { - return fmt.Errorf("failed to get connection hash for cluster %s: %w", clusterName, err), true, 0 + return true, 0, fmt.Errorf("failed to get connection hash for cluster %s: %w", clusterName, err) } if oldConnectionHash, exists := m.connectionMap[clusterName]; exists { if !bytes.Equal(oldConnectionHash, connectionHash) { @@ -208,22 +208,22 @@ func (m *federatedInformerManager) processCluster( // reenqueue. // Note: updating of cluster connection details, however, is still not a supported use case. err := m.processClusterDeletionUnlocked(ctx, clusterName) - return err, true, 0 + return true, 0, err } } else { clusterRestConfig, err := m.clientHelper.RestConfigGetter(cluster) if err != nil { - return fmt.Errorf("failed to get rest config for cluster %s: %w", clusterName, err), true, 0 + return true, 0, fmt.Errorf("failed to get rest config for cluster %s: %w", clusterName, err) } clusterDynamicClient, err := m.dynamicClientGetter(cluster, clusterRestConfig) if err != nil { - return fmt.Errorf("failed to get dynamic client for cluster %s: %w", clusterName, err), true, 0 + return true, 0, fmt.Errorf("failed to get dynamic client for cluster %s: %w", clusterName, err) } clusterKubeClient, err := m.kubeClientGetter(cluster, clusterRestConfig) if err != nil { - return fmt.Errorf("failed to get kubernetes client for cluster %s: %w", clusterName, err), true, 0 + return true, 0, fmt.Errorf("failed to get kubernetes client for cluster %s: %w", clusterName, err) } manager := NewInformerManager( @@ -244,7 +244,7 @@ func (m *federatedInformerManager) processCluster( for _, generator := range m.eventHandlerGenerators { if err := manager.AddEventHandlerGenerator(generator); err != nil { cancel() - return fmt.Errorf("failed to initialized InformerManager for cluster %s: %w", clusterName, err), true, 0 + return true, 0, fmt.Errorf("failed to initialized InformerManager for cluster %s: %w", clusterName, err) } } @@ -272,11 +272,11 @@ func (m *federatedInformerManager) processCluster( m.initialClusters.Delete(cluster.Name) } else { klog.FromContext(ctx).V(3).Info("Waiting for InformerManager sync") - return nil, true, 100 * time.Millisecond + return true, 100 * time.Millisecond, nil } } - return nil, false, 0 + return false, 0, nil } func (m *federatedInformerManager) processClusterDeletion(ctx context.Context, clusterName string) error { @@ -287,13 +287,15 @@ func (m *federatedInformerManager) processClusterDeletion(ctx context.Context, c func (m *federatedInformerManager) processClusterDeletionUnlocked(ctx context.Context, clusterName string) error { delete(m.connectionMap, clusterName) + delete(m.kubeClients, clusterName) delete(m.dynamicClients, clusterName) if cancel, ok := m.clusterCancelFuncs[clusterName]; ok { - klog.FromContext(ctx).V(2).Info("Stopping InformerManager for FederatedCluster") + klog.FromContext(ctx).V(2).Info("Stopping InformerManager and SharedInformerFactory for FederatedCluster") cancel() } delete(m.informerManagers, clusterName) + delete(m.informerFactories, clusterName) delete(m.clusterCancelFuncs, clusterName) m.initialClusters.Delete(clusterName) diff --git a/pkg/util/informermanager/informermanager.go b/pkg/util/informermanager/informermanager.go index 8124a5e6..c23d2687 100644 --- a/pkg/util/informermanager/informermanager.go +++ b/pkg/util/informermanager/informermanager.go @@ -141,7 +141,7 @@ func (m *informerManager) worker(ctx context.Context) { return } - err, needReenqueue, delay := m.processFTC(ctx, ftc) + needReenqueue, delay, err := m.processFTC(ctx, ftc) if err != nil { if needReenqueue { logger.Error(err, "Failed to process FederatedTypeConfig, will retry") @@ -162,7 +162,7 @@ func (m *informerManager) worker(ctx context.Context) { func (m *informerManager) processFTC( ctx context.Context, ftc *fedcorev1a1.FederatedTypeConfig, -) (err error, needReenqueue bool, reenqueueDelay time.Duration) { +) (needReenqueue bool, reenqueueDelay time.Duration, err error) { m.lock.Lock() defer m.lock.Unlock() @@ -183,14 +183,14 @@ func (m *informerManager) processFTC( // time and we missed processing the deletion. We simply process the ftc deletion and reenqueue. Note: // updating of ftc source types, however, is still not a supported use case. err := m.processFTCDeletionUnlocked(ctx, ftcName) - return err, true, 0 + return true, 0, err } informer = m.informers[ftcName] } else { if err := m.gvkMapping.Add(ftcName, gvk); err != nil { // There must be another ftc with the same source type GVK. - return fmt.Errorf("source type is already referenced by another FederatedTypeConfig: %w", err), false, 0 + return false, 0, fmt.Errorf("source type is already referenced by another FederatedTypeConfig: %w", err) } logger.V(2).Info("Starting new informer for FederatedTypeConfig") @@ -220,7 +220,7 @@ func (m *informerManager) processFTC( if !informer.Informer().HasSynced() { logger.V(3).Info("Informer for FederatedTypeConfig not synced, will not register event handlers yet") - return nil, true, 100 * time.Millisecond + return true, 100 * time.Millisecond, nil } registrations := m.eventHandlerRegistrations[ftcName] @@ -237,7 +237,7 @@ func (m *informerManager) processFTC( if oldRegistration := registrations[generator]; oldRegistration != nil { if err := informer.Informer().RemoveEventHandler(oldRegistration); err != nil { - return fmt.Errorf("failed to unregister event handler: %w", err), true, 0 + return true, 0, fmt.Errorf("failed to unregister event handler: %w", err) } delete(registrations, generator) } @@ -246,7 +246,7 @@ func (m *informerManager) processFTC( if handler := generator.Generator(ftc); handler != nil { newRegistration, err := informer.Informer().AddEventHandler(handler) if err != nil { - return fmt.Errorf("failed to register event handler: %w", err), true, 0 + return true, 0, fmt.Errorf("failed to register event handler: %w", err) } registrations[generator] = newRegistration } @@ -254,7 +254,7 @@ func (m *informerManager) processFTC( lastAppliedFTCs[generator] = ftc } - return nil, false, 0 + return false, 0, nil } func (m *informerManager) processFTCDeletion(ctx context.Context, ftcName string) error { diff --git a/pkg/util/informermanager/podinformer.go b/pkg/util/informermanager/podinformer.go index d5f6f539..c49da4b5 100644 --- a/pkg/util/informermanager/podinformer.go +++ b/pkg/util/informermanager/podinformer.go @@ -36,15 +36,17 @@ func addPodInformer(ctx context.Context, podListerSemaphore *semaphore.Weighted, enablePodPruning bool, ) { - informer.InformerFor(&corev1.Pod{}, func(k kubeclient.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - podListerWatcher(ctx, client, podListerSemaphore, enablePodPruning), - &corev1.Pod{}, - resyncPeriod, - cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, - ) - }) - + informer.InformerFor( + &corev1.Pod{}, + func(k kubeclient.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + podListerWatcher(ctx, client, podListerSemaphore, enablePodPruning), + &corev1.Pod{}, + resyncPeriod, + cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, + ) + }, + ) } func podListerWatcher( From 18f2623d6a2569d36229ddff858a0e9117e342b6 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Tue, 25 Jul 2023 11:50:26 +0800 Subject: [PATCH 03/22] refactor(cluster-controller): adopt unified types --- pkg/controllers/common/types.go | 4 +- pkg/controllers/federate/controller.go | 106 ++++--- .../federatedcluster/clusterjoin.go | 117 ++++--- .../federatedcluster/clusterstatus.go | 72 ++--- .../federatedcluster/controller.go | 297 +++++++++--------- pkg/controllers/federatedcluster/util.go | 1 - pkg/controllers/nsautoprop/controller.go | 47 +-- pkg/controllers/policyrc/controller.go | 57 ++-- pkg/controllers/status/controller.go | 95 ++++-- pkg/util/eventhandlers/eventhandler.go | 56 +++- 10 files changed, 490 insertions(+), 362 deletions(-) diff --git a/pkg/controllers/common/types.go b/pkg/controllers/common/types.go index 946d7324..210ca55b 100644 --- a/pkg/controllers/common/types.go +++ b/pkg/controllers/common/types.go @@ -25,7 +25,7 @@ import ( "strings" "k8s.io/apimachinery/pkg/api/meta" - pkgruntime "k8s.io/apimachinery/pkg/runtime" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // QualifiedName comprises a resource name with an optional namespace. @@ -39,7 +39,7 @@ type QualifiedName struct { Name string } -func NewQualifiedName(obj pkgruntime.Object) QualifiedName { +func NewQualifiedName(obj metav1.Object) QualifiedName { accessor, err := meta.Accessor(obj) if err != nil { // This should never happen, but if it does, the diff --git a/pkg/controllers/federate/controller.go b/pkg/controllers/federate/controller.go index 7964f5bf..bf6ea4a0 100644 --- a/pkg/controllers/federate/controller.go +++ b/pkg/controllers/federate/controller.go @@ -25,7 +25,6 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/dynamic" @@ -130,14 +129,16 @@ func NewFederateController( uns := obj.(*unstructured.Unstructured) return uns.GetNamespace() != fedSystemNamespace }, - Handler: eventhandlers.NewTriggerOnAllChanges(func(obj runtime.Object) { - uns := obj.(*unstructured.Unstructured) - c.worker.Enqueue(workerKey{ - name: uns.GetName(), - namespace: uns.GetNamespace(), - gvk: ftc.GetSourceTypeGVK(), - }) - }), + Handler: eventhandlers.NewTriggerOnAllChanges( + func(uns *unstructured.Unstructured) workerKey { + return workerKey{ + name: uns.GetName(), + namespace: uns.GetNamespace(), + gvk: ftc.GetSourceTypeGVK(), + } + }, + c.worker.Enqueue, + ), } }, }); err != nil { @@ -152,47 +153,59 @@ func NewFederateController( fedObj := obj.(*fedcorev1a1.FederatedObject) return fedObj.Namespace != fedSystemNamespace }, - Handler: eventhandlers.NewTriggerOnAllChanges(func(o runtime.Object) { - fedObj := o.(*fedcorev1a1.FederatedObject) - logger := c.logger.WithValues("federated-object", common.NewQualifiedName(fedObj)) - - srcMeta, err := fedObj.Spec.GetTemplateAsUnstructured() - if err != nil { - logger.Error(err, "Failed to get source object's metadata from FederatedObject") - return - } - - gvk := srcMeta.GroupVersionKind() - - c.worker.Enqueue(workerKey{ - name: srcMeta.GetName(), - namespace: srcMeta.GetNamespace(), - gvk: gvk, - }) - }), + Handler: eventhandlers.NewTriggerOnAllChanges( + func(fedObj *fedcorev1a1.FederatedObject) *fedcorev1a1.FederatedObject { + return fedObj + }, + func(fedObj *fedcorev1a1.FederatedObject) { + srcMeta, err := fedObj.Spec.GetTemplateAsUnstructured() + if err != nil { + c.logger.Error( + err, + "Failed to get source object's metadata from FederatedObject", + "object", + common.NewQualifiedName(fedObj), + ) + return + } + + gvk := srcMeta.GroupVersionKind() + + c.worker.Enqueue(workerKey{ + name: srcMeta.GetName(), + namespace: srcMeta.GetNamespace(), + gvk: gvk, + }) + }), }); err != nil { return nil, err } if _, err := clusterFedObjectInformer.Informer().AddEventHandler( - eventhandlers.NewTriggerOnAllChanges(func(o runtime.Object) { - fedObj := o.(*fedcorev1a1.ClusterFederatedObject) - logger := c.logger.WithValues("cluster-federated-object", common.NewQualifiedName(fedObj)) - - srcMeta, err := fedObj.Spec.GetTemplateAsUnstructured() - if err != nil { - logger.Error(err, "Failed to get source object's metadata from ClusterFederatedObject") - return - } - - gvk := srcMeta.GroupVersionKind() - - c.worker.Enqueue(workerKey{ - name: srcMeta.GetName(), - namespace: srcMeta.GetNamespace(), - gvk: gvk, - }) - }), + eventhandlers.NewTriggerOnAllChanges( + func(fedObj *fedcorev1a1.ClusterFederatedObject) *fedcorev1a1.ClusterFederatedObject { + return fedObj + }, + func(fedObj *fedcorev1a1.ClusterFederatedObject) { + srcMeta, err := fedObj.Spec.GetTemplateAsUnstructured() + if err != nil { + logger.Error( + err, + "Failed to get source object's metadata from ClusterFederatedObject", + "object", + common.NewQualifiedName(fedObj), + ) + return + } + + gvk := srcMeta.GroupVersionKind() + + c.worker.Enqueue(workerKey{ + name: srcMeta.GetName(), + namespace: srcMeta.GetNamespace(), + gvk: gvk, + }) + }), ); err != nil { return nil, err } @@ -224,8 +237,7 @@ func (c *FederateController) HasSynced() bool { func (c *FederateController) reconcile(ctx context.Context, key workerKey) (status worker.Result) { _ = c.metrics.Rate("federate.throughput", 1) - ctx, logger := logging.InjectLogger(ctx, c.logger) - ctx, logger = logging.InjectLoggerValues(ctx, "source-object", key.QualifiedName().String(), "gvk", key.gvk) + ctx, logger := logging.InjectLoggerValues(ctx, "source-object", key.QualifiedName().String(), "gvk", key.gvk) startTime := time.Now() diff --git a/pkg/controllers/federatedcluster/clusterjoin.go b/pkg/controllers/federatedcluster/clusterjoin.go index 7b54790e..906396be 100644 --- a/pkg/controllers/federatedcluster/clusterjoin.go +++ b/pkg/controllers/federatedcluster/clusterjoin.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2018 The Kubernetes Authors. @@ -33,15 +32,14 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" - kubeclient "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "k8s.io/utils/pointer" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" - fedclient "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" + "github.com/kubewharf/kubeadmiral/pkg/util/logging" ) const ( @@ -81,17 +79,11 @@ const ( // The returned condition (if not-nil) will have status, reason and message set. The other fields // should be added by the caller. // The returned err is for informational purpose only and the caller should not abort on non-nil error. -func handleNotJoinedCluster( +func (c *FederatedClusterController) handleNotJoinedCluster( ctx context.Context, cluster *fedcorev1a1.FederatedCluster, - client fedclient.Interface, - kubeClient kubeclient.Interface, - eventRecorder record.EventRecorder, - fedSystemNamespace string, - clusterJoinTimeout time.Duration, -) (c *fedcorev1a1.FederatedCluster, condition *fedcorev1a1.ClusterCondition, joinPerformed *bool, err error) { - logger := klog.FromContext(ctx).WithValues("process", "cluster-join") - ctx = klog.NewContext(ctx, logger) +) (updated *fedcorev1a1.FederatedCluster, condition *fedcorev1a1.ClusterCondition, joinPerformed *bool, err error) { + logger := klog.FromContext(ctx) joinedCondition := getClusterCondition(&cluster.Status, fedcorev1a1.ClusterJoined) @@ -99,10 +91,10 @@ func handleNotJoinedCluster( if joinedCondition != nil && joinedCondition.Status == corev1.ConditionFalse && - time.Since(joinedCondition.LastTransitionTime.Time) > clusterJoinTimeout { - // join timed out + time.Since(joinedCondition.LastTransitionTime.Time) > c.clusterJoinTimeout { + // Join timed out logger.Error(nil, "Cluster join timed out") - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeWarning, EventReasonJoinClusterTimeoutExceeded, @@ -117,11 +109,11 @@ func handleNotJoinedCluster( // 2. The remaining steps require a cluster kube client, attempt to create one - _, clusterKubeClient, err := getClusterClient(ctx, kubeClient, fedSystemNamespace, cluster) + _, clusterKubeClient, err := c.getClusterClient(ctx, cluster) if err != nil { logger.Error(err, "Failed to create cluster client") msg := fmt.Sprintf("Failed to create cluster client: %v", err.Error()) - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeWarning, EventReasonJoinClusterError, msg, ) @@ -134,13 +126,15 @@ func handleNotJoinedCluster( // 3. Get or create system namespace in the cluster, this will also tell us if the cluster is unjoinable - logger.V(2).WithValues("fed-system-namespace", fedSystemNamespace).Info("Get system namespace in cluster") - memberFedNamespace, err := clusterKubeClient.CoreV1().Namespaces().Get(ctx, fedSystemNamespace, metav1.GetOptions{ResourceVersion: "0"}) + ctx, logger = logging.InjectLoggerValues(ctx, "fed-system-namespace", c.fedSystemNamespace) + + logger.V(2).Info("Get system namespace in cluster") + memberFedNamespace, err := clusterKubeClient.CoreV1().Namespaces().Get(ctx, c.fedSystemNamespace, metav1.GetOptions{ResourceVersion: "0"}) if err != nil { if !apierrors.IsNotFound(err) { msg := fmt.Sprintf("Failed to get namespace: %v", err.Error()) logger.Error(err, "Failed to get namespace") - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeWarning, EventReasonJoinClusterError, msg, ) @@ -156,12 +150,14 @@ func handleNotJoinedCluster( } if memberFedNamespace != nil && memberFedNamespace.Annotations[FederatedClusterUID] != string(cluster.UID) { - // ns exists and is not created by us - the cluster is managed by another control plane + // Namespace exists and is not created by us - the cluster is managed by another control plane. msg := "Cluster is unjoinable (check if cluster is already joined to another federation)" logger.Error(nil, msg, "UID", memberFedNamespace.Annotations[FederatedClusterUID], "clusterUID", string(cluster.UID)) - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, - corev1.EventTypeWarning, EventReasonClusterUnjoinable, msg, + corev1.EventTypeWarning, + EventReasonClusterUnjoinable, + msg, ) return cluster, &fedcorev1a1.ClusterCondition{ Status: corev1.ConditionFalse, @@ -180,19 +176,19 @@ func handleNotJoinedCluster( if memberFedNamespace == nil { memberFedNamespace = &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: fedSystemNamespace, + Name: c.fedSystemNamespace, Annotations: map[string]string{ FederatedClusterUID: string(cluster.UID), }, }, } - logger.V(1).WithValues("fed-system-namespace", fedSystemNamespace).Info("Create system namespace in cluster") + logger.V(1).Info("Create system namespace in cluster") memberFedNamespace, err = clusterKubeClient.CoreV1().Namespaces().Create(ctx, memberFedNamespace, metav1.CreateOptions{}) if err != nil { msg := fmt.Sprintf("Failed to create system namespace: %v", err.Error()) logger.Error(err, "Failed to create system namespace") - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeWarning, EventReasonJoinClusterError, msg, ) @@ -208,12 +204,12 @@ func handleNotJoinedCluster( if cluster.Spec.UseServiceAccountToken { logger.V(2).Info("Get and save cluster token") - err = getAndSaveClusterToken(ctx, cluster, kubeClient, clusterKubeClient, fedSystemNamespace, memberFedNamespace) + err = c.getAndSaveClusterToken(ctx, cluster, clusterKubeClient, memberFedNamespace) if err != nil { msg := fmt.Sprintf("Failed to get and save cluster token: %v", err.Error()) logger.Error(err, "Failed to get and save cluster token") - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeWarning, EventReasonJoinClusterError, msg, ) @@ -228,9 +224,11 @@ func handleNotJoinedCluster( // 5. Cluster is joined, update condition logger.V(2).Info("Cluster joined successfully") - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, - corev1.EventTypeNormal, EventReasonJoinClusterSuccess, "Cluster joined successfully", + corev1.EventTypeNormal, + EventReasonJoinClusterSuccess, + "Cluster joined successfully", ) return cluster, &fedcorev1a1.ClusterCondition{ Status: corev1.ConditionTrue, @@ -239,12 +237,10 @@ func handleNotJoinedCluster( }, joinPerformed, nil } -func getAndSaveClusterToken( +func (c *FederatedClusterController) getAndSaveClusterToken( ctx context.Context, cluster *fedcorev1a1.FederatedCluster, - kubeClient kubeclient.Interface, - clusterKubeClient kubeclient.Interface, - fedSystemNamespace string, + clusterKubeClient kubernetes.Interface, memberSystemNamespace *corev1.Namespace, ) error { logger := klog.FromContext(ctx) @@ -262,13 +258,17 @@ func getAndSaveClusterToken( } err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { - secret, err := kubeClient.CoreV1().Secrets(fedSystemNamespace).Get(ctx, cluster.Spec.SecretRef.Name, metav1.GetOptions{}) + secret, err := c.kubeClient.CoreV1().Secrets(c.fedSystemNamespace).Get( + ctx, + cluster.Spec.SecretRef.Name, + metav1.GetOptions{}, + ) if err != nil { return err } secret.Data[ServiceAccountTokenKey] = token secret.Data[ServiceAccountCAKey] = ca - _, err = kubeClient.CoreV1().Secrets(fedSystemNamespace).Update(ctx, secret, metav1.UpdateOptions{}) + _, err = c.kubeClient.CoreV1().Secrets(c.fedSystemNamespace).Update(ctx, secret, metav1.UpdateOptions{}) return err }) if err != nil { @@ -286,22 +286,29 @@ func getAndSaveClusterToken( // resources in the joining cluster. The created secret name is returned on success. func createAuthorizedServiceAccount( ctx context.Context, - clusterKubeClient kubeclient.Interface, + clusterKubeClient kubernetes.Interface, memberSystemNamespace *corev1.Namespace, clusterName string, errorOnExisting bool, ) (string, error) { - logger := klog.FromContext(ctx).WithValues("member-service-account-name", MemberServiceAccountName) - ctx = klog.NewContext(ctx, logger) + ctx, logger := logging.InjectLoggerValues(ctx, "member-service-account-name", MemberServiceAccountName) // 1. create service account + logger.V(1).Info("Creating service account") - err := createServiceAccount(ctx, clusterKubeClient, memberSystemNamespace.Name, MemberServiceAccountName, clusterName, errorOnExisting) - if err != nil { + if err := createServiceAccount( + ctx, + clusterKubeClient, + memberSystemNamespace.Name, + MemberServiceAccountName, + clusterName, + errorOnExisting, + ); err != nil { return "", fmt.Errorf("failed to create service account %s: %w", MemberServiceAccountName, err) } // 2. create service account token secret + logger.V(1).Info("Creating service account token secret") saTokenSecretName, err := createServiceAccountTokenSecret( ctx, @@ -314,12 +321,21 @@ func createAuthorizedServiceAccount( if err != nil { return "", fmt.Errorf("error creating service account token secret %s : %w", MemberServiceAccountName, err) } - logger.V(1).WithValues("sa-token-secret-name", saTokenSecretName).Info("Created service account token secret for service account") + + ctx, logger = logging.InjectLoggerValues(ctx, "sa-token-secret-name", saTokenSecretName) + logger.V(1).Info("Created service account token secret for service account") // 3. create rbac + logger.V(1).Info("Creating RBAC for service account") - err = createClusterRoleAndBinding(ctx, clusterKubeClient, memberSystemNamespace, MemberServiceAccountName, clusterName, errorOnExisting) - if err != nil { + if err = createClusterRoleAndBinding( + ctx, + clusterKubeClient, + memberSystemNamespace, + MemberServiceAccountName, + clusterName, + errorOnExisting, + ); err != nil { return "", fmt.Errorf("error creating cluster role and binding for service account %s: %w", MemberServiceAccountName, err) } @@ -331,8 +347,9 @@ func createAuthorizedServiceAccount( // to access its API server. func createServiceAccount( ctx context.Context, - clusterClientset kubeclient.Interface, - namespace, saName, joiningClusterName string, errorOnExisting bool, + clusterClientset kubernetes.Interface, + namespace, saName, joiningClusterName string, + errorOnExisting bool, ) error { sa := &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ @@ -367,7 +384,7 @@ func createServiceAccount( // to access its API server. func createServiceAccountTokenSecret( ctx context.Context, - clusterClientset kubeclient.Interface, + clusterClientset kubernetes.Interface, namespace, saName, joiningClusterName string, errorOnExisting bool, ) (string, error) { @@ -421,7 +438,7 @@ func bindingSubjects(saName, namespace string) []rbacv1.Subject { // with clientset. func createClusterRoleAndBinding( ctx context.Context, - clientset kubeclient.Interface, + clientset kubernetes.Interface, namespace *corev1.Namespace, saName, clusterName string, errorOnExisting bool, @@ -551,7 +568,7 @@ func createClusterRoleAndBinding( func getServiceAccountToken( ctx context.Context, - clusterClientset kubeclient.Interface, + clusterClientset kubernetes.Interface, memberSystemNamespace, secretName string, ) ([]byte, []byte, error) { // Get the secret from the joining cluster. diff --git a/pkg/controllers/federatedcluster/clusterstatus.go b/pkg/controllers/federatedcluster/clusterstatus.go index 87adca6f..80c6fa8f 100644 --- a/pkg/controllers/federatedcluster/clusterstatus.go +++ b/pkg/controllers/federatedcluster/clusterstatus.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2016 The Kubernetes Authors. @@ -33,14 +32,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/discovery" - "k8s.io/client-go/informers" - "k8s.io/client-go/tools/cache" + corev1listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" - fedclient "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/federatedclient" ) const ( @@ -62,32 +58,38 @@ const ( ClusterNotReachableMsg = "Cluster is not reachable" ) -func collectIndividualClusterStatus( +func (c *FederatedClusterController) collectIndividualClusterStatus( ctx context.Context, cluster *fedcorev1a1.FederatedCluster, - fedClient fedclient.Interface, - federatedClient federatedclient.FederatedClientFactory, -) error { +) (retryAfter time.Duration, err error) { logger := klog.FromContext(ctx) - clusterKubeClient, exists, err := federatedClient.KubeClientsetForCluster(cluster.Name) + clusterKubeClient, exists := c.federatedInformerManager.GetClusterKubeClient(cluster.Name) if !exists { - return fmt.Errorf("federated client is not yet up to date") + return 0, fmt.Errorf("failed to get cluster client: FederatedInformerManager not yet up-to-date") } - if err != nil { - return fmt.Errorf("failed to get federated kube client: %w", err) + + podLister, podsSynced, exists := c.federatedInformerManager.GetPodLister(cluster.Name) + if !exists { + return 0, fmt.Errorf("failed to get pod lister: FederatedInformerManager not yet up-to-date") + } + if !podsSynced() { + logger.V(3).Info("Pod informer not synced, will reenqueue") + return 100 * time.Millisecond, nil } - clusterKubeInformer, exists, err := federatedClient.KubeSharedInformerFactoryForCluster(cluster.Name) + + nodeLister, nodesSynced, exists := c.federatedInformerManager.GetNodeLister(cluster.Name) if !exists { - return fmt.Errorf("federated client is not yet up to date") + return 0, fmt.Errorf("failed to get node lister: FederatedInformerManager not yet up-to-date") } - if err != nil { - return fmt.Errorf("failed to get federated kube informer factory: %w", err) + if !nodesSynced() { + logger.V(3).Info("Pod informer not synced, will reenqueue") + return 100 * time.Millisecond, nil } discoveryClient := clusterKubeClient.Discovery() - cluster = cluster.DeepCopy() + cluster = cluster.DeepCopy() conditionTime := metav1.Now() offlineStatus, readyStatus := checkReadyByHealthz(ctx, discoveryClient) @@ -104,9 +106,9 @@ func collectIndividualClusterStatus( readyMessage = ClusterNotReachableMsg } - // we skip updating cluster resources and api resources if cluster is not ready + // We skip updating cluster resources and api resources if cluster is not ready if readyStatus == corev1.ConditionTrue { - if err := updateClusterResources(ctx, &cluster.Status, clusterKubeInformer); err != nil { + if err := updateClusterResources(ctx, &cluster.Status, podLister, nodeLister); err != nil { logger.Error(err, "Failed to update cluster resources") readyStatus = corev1.ConditionFalse readyReason = ClusterResourceCollectionFailedReason @@ -125,18 +127,26 @@ func collectIndividualClusterStatus( setClusterCondition(&cluster.Status, &readyCondition) if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - latestCluster, err := fedClient.CoreV1alpha1().FederatedClusters().Get(context.TODO(), cluster.Name, metav1.GetOptions{}) + latestCluster, err := c.fedClient.CoreV1alpha1().FederatedClusters().Get( + context.TODO(), + cluster.Name, + metav1.GetOptions{}, + ) if err != nil { return err } cluster.Status.DeepCopyInto(&latestCluster.Status) - _, err = fedClient.CoreV1alpha1().FederatedClusters().UpdateStatus(context.TODO(), latestCluster, metav1.UpdateOptions{}) + _, err = c.fedClient.CoreV1alpha1().FederatedClusters().UpdateStatus( + context.TODO(), + latestCluster, + metav1.UpdateOptions{}, + ) return err }); err != nil { - return fmt.Errorf("failed to update cluster status: %w", err) + return 0, fmt.Errorf("failed to update cluster status: %w", err) } - return nil + return 0, nil } func checkReadyByHealthz( @@ -163,19 +173,9 @@ func checkReadyByHealthz( func updateClusterResources( ctx context.Context, clusterStatus *fedcorev1a1.FederatedClusterStatus, - clusterKubeInformer informers.SharedInformerFactory, + podLister corev1listers.PodLister, + nodeLister corev1listers.NodeLister, ) error { - podLister := clusterKubeInformer.Core().V1().Pods().Lister() - podsSynced := clusterKubeInformer.Core().V1().Pods().Informer().HasSynced - nodeLister := clusterKubeInformer.Core().V1().Nodes().Lister() - nodesSynced := clusterKubeInformer.Core().V1().Nodes().Informer().HasSynced - - ctx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - if !cache.WaitForNamedCacheSync("federated-cluster-controller-status-collect", ctx.Done(), podsSynced, nodesSynced) { - return fmt.Errorf("timeout waiting for node and pod informer sync") - } - nodes, err := nodeLister.List(labels.Everything()) if err != nil { return fmt.Errorf("failed to list nodes: %w", err) diff --git a/pkg/controllers/federatedcluster/controller.go b/pkg/controllers/federatedcluster/controller.go index d0c999ba..9be0041d 100644 --- a/pkg/controllers/federatedcluster/controller.go +++ b/pkg/controllers/federatedcluster/controller.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2016 The Kubernetes Authors. @@ -24,14 +23,13 @@ package federatedcluster import ( "context" "fmt" - "reflect" "time" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" kubeclient "k8s.io/client-go/kubernetes" @@ -45,18 +43,18 @@ import ( fedclient "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned" genscheme "github.com/kubewharf/kubeadmiral/pkg/client/generic/scheme" fedcorev1a1informers "github.com/kubewharf/kubeadmiral/pkg/client/informers/externalversions/core/v1alpha1" - fedcorev1a1listers "github.com/kubewharf/kubeadmiral/pkg/client/listers/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/delayingdeliver" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/federatedclient" - finalizerutils "github.com/kubewharf/kubeadmiral/pkg/controllers/util/finalizers" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/worker" "github.com/kubewharf/kubeadmiral/pkg/stats" + clusterutil "github.com/kubewharf/kubeadmiral/pkg/util/cluster" + "github.com/kubewharf/kubeadmiral/pkg/util/eventhandlers" + "github.com/kubewharf/kubeadmiral/pkg/util/finalizers" + "github.com/kubewharf/kubeadmiral/pkg/util/informermanager" + "github.com/kubewharf/kubeadmiral/pkg/util/logging" + "github.com/kubewharf/kubeadmiral/pkg/util/worker" ) const ( - ControllerName = "federated-cluster-controller" + FederatedClusterControllerName = "federated-cluster-controller" FinalizerFederatedClusterController = common.DefaultPrefix + "federated-cluster-controller" @@ -71,128 +69,145 @@ type ClusterHealthCheckConfig struct { // FederatedClusterController reconciles a FederatedCluster object type FederatedClusterController struct { - client fedclient.Interface - kubeClient kubeclient.Interface + clusterInformer fedcorev1a1informers.FederatedClusterInformer + federatedInformerManager informermanager.FederatedInformerManager - clusterLister fedcorev1a1listers.FederatedClusterLister - clusterSynced cache.InformerSynced - federatedClient federatedclient.FederatedClientFactory + kubeClient kubeclient.Interface + fedClient fedclient.Interface fedSystemNamespace string clusterHealthCheckConfig *ClusterHealthCheckConfig clusterJoinTimeout time.Duration - eventRecorder record.EventRecorder - metrics stats.Metrics - logger klog.Logger + worker worker.ReconcileWorker[common.QualifiedName] + statusCollectWorker worker.ReconcileWorker[common.QualifiedName] + eventRecorder record.EventRecorder - worker worker.ReconcileWorker - statusCollectWorker worker.ReconcileWorker + metrics stats.Metrics + logger klog.Logger } func NewFederatedClusterController( - client fedclient.Interface, kubeClient kubeclient.Interface, - informer fedcorev1a1informers.FederatedClusterInformer, - federatedClient federatedclient.FederatedClientFactory, + fedClient fedclient.Interface, + clusterInformer fedcorev1a1informers.FederatedClusterInformer, + federatedInformerManager informermanager.FederatedInformerManager, metrics stats.Metrics, - fedsystemNamespace string, - restConfig *rest.Config, - workerCount int, + logger klog.Logger, clusterJoinTimeout time.Duration, + workerCount int, + fedsystemNamespace string, ) (*FederatedClusterController, error) { c := &FederatedClusterController{ - client: client, - kubeClient: kubeClient, - clusterLister: informer.Lister(), - clusterSynced: informer.Informer().HasSynced, - federatedClient: federatedClient, - fedSystemNamespace: fedsystemNamespace, + clusterInformer: clusterInformer, + federatedInformerManager: federatedInformerManager, + kubeClient: kubeClient, + fedClient: fedClient, + fedSystemNamespace: fedsystemNamespace, clusterHealthCheckConfig: &ClusterHealthCheckConfig{ Period: time.Minute, }, clusterJoinTimeout: clusterJoinTimeout, metrics: metrics, - logger: klog.LoggerWithValues(klog.Background(), "controller", ControllerName), + logger: logger.WithValues("controller", FederatedClusterControllerName), } broadcaster := record.NewBroadcaster() broadcaster.StartRecordingToSink( &corev1client.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}, ) - broadcaster.StartLogging(klog.V(4).Infof) + broadcaster.StartLogging(klog.V(6).Infof) c.eventRecorder = broadcaster.NewRecorder( genscheme.Scheme, corev1.EventSource{Component: ("federatedcluster-controller")}, ) - c.worker = worker.NewReconcileWorker( + c.worker = worker.NewReconcileWorker[common.QualifiedName]( + FederatedClusterControllerName, + nil, c.reconcile, worker.RateLimiterOptions{}, workerCount, metrics, - delayingdeliver.NewMetricTags("federatedcluster-worker", "FederatedCluster"), ) - c.statusCollectWorker = worker.NewReconcileWorker( + c.statusCollectWorker = worker.NewReconcileWorker[common.QualifiedName]( + FederatedClusterControllerName, + nil, c.collectClusterStatus, worker.RateLimiterOptions{ InitialDelay: 50 * time.Millisecond, }, workerCount, metrics, - delayingdeliver.NewMetricTags("federatedcluster-status-collect-worker", "FederatedCluster"), ) - informer.Informer(). - AddEventHandler(util.NewTriggerOnGenerationAndMetadataChanges(c.worker.EnqueueObject, - func(oldMeta, newMeta metav1.Object) bool { - if !reflect.DeepEqual(oldMeta.GetAnnotations(), newMeta.GetAnnotations()) || - !reflect.DeepEqual(oldMeta.GetFinalizers(), newMeta.GetFinalizers()) { - return true - } - return false - })) + clusterInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnChanges( + func(old metav1.Object, cur metav1.Object) bool { + if old.GetGeneration() != cur.GetGeneration() { + return true + } + if !equality.Semantic.DeepEqual(old.GetAnnotations(), cur.GetAnnotations) { + return true + } + if !equality.Semantic.DeepEqual(old.GetFinalizers(), cur.GetFinalizers()) { + return true + } + return false + }, + common.NewQualifiedName, + c.worker.Enqueue, + )) return c, nil } +func (c *FederatedClusterController) HasSynced() bool { + return c.clusterInformer.Informer().HasSynced() +} + func (c *FederatedClusterController) IsControllerReady() bool { - return c.clusterSynced() + return c.HasSynced() } func (c *FederatedClusterController) Run(ctx context.Context) { - defer utilruntime.HandleCrash() + ctx, logger := logging.InjectLogger(ctx, c.logger) - c.logger.Info("Starting controller") - defer c.logger.Info("Stopping controller") + logger.Info("Starting controller") + defer logger.Info("Stopping controller") - if !cache.WaitForNamedCacheSync("federated-controller", ctx.Done(), c.clusterSynced) { + if !cache.WaitForNamedCacheSync("federated-controller", ctx.Done(), c.HasSynced) { + logger.Error(nil, "Timed out waiting for cache sync") return } - c.worker.Run(ctx.Done()) - c.statusCollectWorker.Run(ctx.Done()) + logger.Info("Caches are synced") + + c.worker.Run(ctx) + c.statusCollectWorker.Run(ctx) - // periodically enqueue all clusters to trigger status collection + // Periodically enqueue all clusters to trigger status collection. go wait.Until(c.enqueueAllJoinedClusters, c.clusterHealthCheckConfig.Period, ctx.Done()) <-ctx.Done() } -func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedName) (status worker.Result) { +func (c *FederatedClusterController) reconcile( + ctx context.Context, + key common.QualifiedName, +) (status worker.Result) { _ = c.metrics.Rate("federated-cluster-controller.throughput", 1) - logger := c.logger.WithValues("control-loop", "reconcile", "object", qualifiedName.String()) - ctx := klog.NewContext(context.TODO(), logger) + ctx, logger := logging.InjectLoggerValues(ctx, "cluster", key.String()) + startTime := time.Now() logger.V(3).Info("Starting reconcile") - defer c.metrics.Duration("federated-cluster-controller.latency", startTime) defer func() { + c.metrics.Duration(fmt.Sprintf("%s.latency", FederatedClusterControllerName), startTime) logger.WithValues("duration", time.Since(startTime), "status", status.String()).V(3).Info("Finished reconcile") }() - cluster, err := c.clusterLister.Get(qualifiedName.Name) + cluster, err := c.clusterInformer.Lister().Get(key.Name) if err != nil && apierrors.IsNotFound(err) { logger.V(3).Info("Observed cluster deletion") return worker.StatusAllOK @@ -201,19 +216,12 @@ func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedNam logger.Error(err, "Failed to get cluster from store") return worker.StatusError } + cluster = cluster.DeepCopy() if cluster.GetDeletionTimestamp() != nil { logger.V(2).Info("Handle terminating cluster") - err := handleTerminatingCluster( - ctx, - cluster, - c.client, - c.kubeClient, - c.eventRecorder, - c.fedSystemNamespace, - ) - if err != nil { + if err := c.handleTerminatingCluster(ctx, cluster); err != nil { if apierrors.IsConflict(err) { return worker.StatusConflict } @@ -223,9 +231,8 @@ func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedNam return worker.StatusAllOK } - if cluster, err = ensureFinalizer(ctx, cluster, c.client); err != nil { + if cluster, err = c.ensureFinalizer(ctx, cluster); err != nil { if apierrors.IsConflict(err) { - // Ignore IsConflict errors because we will retry on the next reconcile return worker.StatusConflict } logger.Error(err, "Failed to ensure cluster finalizer") @@ -236,17 +243,9 @@ func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedNam return worker.StatusAllOK } - // not joined yet and not failed, so we try to join + // Not joined yet and not failed, so we try to join logger.V(2).Info("Handle unjoined cluster") - cluster, newCondition, newJoinPerformed, err := handleNotJoinedCluster( - ctx, - cluster, - c.client, - c.kubeClient, - c.eventRecorder, - c.fedSystemNamespace, - c.clusterJoinTimeout, - ) + cluster, newCondition, newJoinPerformed, err := c.handleNotJoinedCluster(ctx, cluster) needsUpdate := false if newCondition != nil { @@ -256,8 +255,7 @@ func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedNam newCondition.Type = fedcorev1a1.ClusterJoined newCondition.LastProbeTime = currentTime - // The condition's last transition time is updated to the current time only if - // the status has changed. + // The condition's last transition time is updated to the current time only if the status has changed. oldCondition := getClusterCondition(&cluster.Status, fedcorev1a1.ClusterJoined) if oldCondition != nil && oldCondition.Status == newCondition.Status { newCondition.LastTransitionTime = oldCondition.LastTransitionTime @@ -274,8 +272,10 @@ func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedNam if needsUpdate { var updateErr error - if cluster, updateErr = c.client.CoreV1alpha1().FederatedClusters().UpdateStatus( - ctx, cluster, metav1.UpdateOptions{}, + if cluster, updateErr = c.fedClient.CoreV1alpha1().FederatedClusters().UpdateStatus( + ctx, + cluster, + metav1.UpdateOptions{}, ); updateErr != nil { logger.Error(updateErr, "Failed to update cluster status") err = updateErr @@ -289,7 +289,7 @@ func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedNam return worker.StatusError } - // trigger initial status collection if successfully joined + // Trigger initial status collection if successfully joined if joined, alreadyFailed := isClusterJoined(&cluster.Status); joined && !alreadyFailed { c.statusCollectWorker.EnqueueObject(cluster) } @@ -297,17 +297,26 @@ func (c *FederatedClusterController) reconcile(qualifiedName common.QualifiedNam return worker.StatusAllOK } -func (c *FederatedClusterController) collectClusterStatus(qualifiedName common.QualifiedName) (status worker.Result) { - logger := c.logger.WithValues("control-loop", "status-collect", "object", qualifiedName.String()) - ctx := klog.NewContext(context.TODO(), logger) +func (c *FederatedClusterController) collectClusterStatus( + ctx context.Context, + key common.QualifiedName, +) (status worker.Result) { + ctx, logger := logging.InjectLoggerValues(ctx, "cluster", key.String(), "worker", "status-collect") + startTime := time.Now() - logger.V(3).Info("Start status collection") + logger.V(3).Info("Starting to collect cluster status") defer func() { - logger.WithValues("duration", time.Since(startTime), "status", status.String()).V(3).Info("Finished status collection") + c.metrics.Duration(fmt.Sprintf("%s.status-collect.latency", FederatedClusterControllerName), startTime) + logger.WithValues( + "duration", + time.Since(startTime), + "status", + status.String(), + ).V(3).Info("Finished collecting cluster status") }() - cluster, err := c.clusterLister.Get(qualifiedName.Name) + cluster, err := c.clusterInformer.Lister().Get(key.Name) if err != nil && apierrors.IsNotFound(err) { logger.V(3).Info("Observed cluster deletion") return worker.StatusAllOK @@ -318,80 +327,77 @@ func (c *FederatedClusterController) collectClusterStatus(qualifiedName common.Q } cluster = cluster.DeepCopy() + if shouldCollectClusterStatus(cluster, c.clusterHealthCheckConfig.Period) { - if err := collectIndividualClusterStatus(ctx, cluster, c.client, c.federatedClient); err != nil { + retryAfter, err := c.collectIndividualClusterStatus(ctx, cluster) + if err != nil { logger.Error(err, "Failed to collect cluster status") return worker.StatusError } + if retryAfter > 0 { + return worker.Result{ + Success: true, + RequeueAfter: &retryAfter, + Backoff: false, + } + } } return worker.StatusAllOK } -func ensureFinalizer( +func (c *FederatedClusterController) ensureFinalizer( ctx context.Context, cluster *fedcorev1a1.FederatedCluster, - client fedclient.Interface, ) (*fedcorev1a1.FederatedCluster, error) { - updated, err := finalizerutils.AddFinalizers( - cluster, - sets.NewString(FinalizerFederatedClusterController), - ) + updated, err := finalizers.AddFinalizers(cluster, sets.NewString(FinalizerFederatedClusterController)) if err != nil { return nil, err } if updated { - return client.CoreV1alpha1(). - FederatedClusters(). - Update(ctx, cluster, metav1.UpdateOptions{}) + return c.fedClient.CoreV1alpha1().FederatedClusters().Update(ctx, cluster, metav1.UpdateOptions{}) } return cluster, nil } -func handleTerminatingCluster( +func (c *FederatedClusterController) handleTerminatingCluster( ctx context.Context, cluster *fedcorev1a1.FederatedCluster, - client fedclient.Interface, - kubeClient kubeclient.Interface, - eventRecorder record.EventRecorder, - fedSystemNamespace string, ) error { finalizers := sets.New(cluster.GetFinalizers()...) if !finalizers.Has(FinalizerFederatedClusterController) { return nil } - // we need to ensure that all other finalizers are removed before we start cleanup as other controllers may - // still rely on the credentials to do their own cleanup + // We need to ensure that all other finalizers are removed before we start cleanup as other controllers may still + // rely on the service account credentials to do their own cleanup. if len(finalizers) > 1 { - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeNormal, EventReasonHandleTerminatingClusterBlocked, - "waiting for other finalizers to be cleaned up", + "Waiting for other finalizers to be cleaned up", ) - return nil } // Only perform clean-up if we made any effectual changes to the cluster during join. if cluster.Status.JoinPerformed { - clusterSecret, clusterKubeClient, err := getClusterClient(ctx, kubeClient, fedSystemNamespace, cluster) + clusterSecret, clusterKubeClient, err := c.getClusterClient(ctx, cluster) if err != nil { - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeWarning, EventReasonHandleTerminatingClusterFailed, "Failed to get cluster client: %v", err, ) - return fmt.Errorf("failed to get cluster client: %w", err) } - // 1. cleanup service account token from cluster secret if required + // 1. cleanup service account token from cluster secret if required. if cluster.Spec.UseServiceAccountToken { var err error @@ -399,32 +405,32 @@ func handleTerminatingCluster( _, tokenKeyExists := clusterSecret.Data[common.ClusterServiceAccountTokenKey] _, caExists := clusterSecret.Data[common.ClusterServiceAccountCAKey] if tokenKeyExists || caExists { - delete(clusterSecret.Data, ServiceAccountTokenKey) - delete(clusterSecret.Data, ServiceAccountCAKey) - - _, err = kubeClient.CoreV1(). - Secrets(fedSystemNamespace). - Update(ctx, clusterSecret, metav1.UpdateOptions{}) - if err != nil { - return fmt.Errorf( - "failed to remove service account info from cluster secret: %w", - err, - ) + delete(clusterSecret.Data, common.ClusterServiceAccountTokenKey) + delete(clusterSecret.Data, common.ClusterServiceAccountCAKey) + + if _, err = c.kubeClient.CoreV1().Secrets(c.fedSystemNamespace).Update( + ctx, + clusterSecret, + metav1.UpdateOptions{}, + ); err != nil { + return fmt.Errorf("failed to remove service account info from cluster secret: %w", err) } } } // 2. connect to cluster and perform cleanup - err = clusterKubeClient.CoreV1(). - Namespaces(). - Delete(ctx, fedSystemNamespace, metav1.DeleteOptions{}) + err = clusterKubeClient.CoreV1().Namespaces().Delete( + ctx, + c.fedSystemNamespace, + metav1.DeleteOptions{}, + ) if err != nil && !apierrors.IsNotFound(err) { - eventRecorder.Eventf( + c.eventRecorder.Eventf( cluster, corev1.EventTypeWarning, EventReasonHandleTerminatingClusterFailed, - "delete system namespace from cluster %q failed: %v, will retry later", + "Delete system namespace from cluster %q failed: %v, will retry later", cluster.Name, err, ) @@ -432,22 +438,21 @@ func handleTerminatingCluster( } } - // we have already checked that we are the last finalizer so we can simply set finalizers to be empty + // We have already checked that we are the last finalizer so we can simply set finalizers to be empty. cluster.SetFinalizers(nil) - _, err := client.CoreV1alpha1(). - FederatedClusters(). - Update(ctx, cluster, metav1.UpdateOptions{}) - if err != nil { + if _, err := c.fedClient.CoreV1alpha1().FederatedClusters().Update( + ctx, + cluster, + metav1.UpdateOptions{}, + ); err != nil { return fmt.Errorf("failed to update cluster for finalizer removal: %w", err) } return nil } -func getClusterClient( +func (c *FederatedClusterController) getClusterClient( ctx context.Context, - hostClient kubeclient.Interface, - fedSystemNamespace string, cluster *fedcorev1a1.FederatedCluster, ) (*corev1.Secret, kubeclient.Interface, error) { restConfig := &rest.Config{Host: cluster.Spec.APIEndpoint} @@ -457,12 +462,16 @@ func getClusterClient( return nil, nil, fmt.Errorf("cluster secret is not set") } - clusterSecret, err := hostClient.CoreV1().Secrets(fedSystemNamespace).Get(ctx, clusterSecretName, metav1.GetOptions{}) + clusterSecret, err := c.kubeClient.CoreV1().Secrets(c.fedSystemNamespace).Get( + ctx, + clusterSecretName, + metav1.GetOptions{}, + ) if err != nil { return nil, nil, fmt.Errorf("failed to get cluster secret: %w", err) } - if err := util.PopulateAuthDetailsFromSecret(restConfig, cluster.Spec.Insecure, clusterSecret, false); err != nil { + if err := clusterutil.PopulateAuthDetailsFromSecret(restConfig, cluster.Spec.Insecure, clusterSecret, false); err != nil { return nil, nil, fmt.Errorf("cluster secret malformed: %w", err) } @@ -475,13 +484,13 @@ func getClusterClient( } func (c *FederatedClusterController) enqueueAllJoinedClusters() { - clusters, err := c.clusterLister.List(labels.Everything()) + clusters, err := c.clusterInformer.Lister().List(labels.Everything()) if err != nil { c.logger.Error(err, "Failed to enqueue all clusters") } for _, cluster := range clusters { - if util.IsClusterJoined(&cluster.Status) { + if clusterutil.IsClusterJoined(&cluster.Status) { c.statusCollectWorker.EnqueueObject(cluster) } } diff --git a/pkg/controllers/federatedcluster/util.go b/pkg/controllers/federatedcluster/util.go index 32ff9a95..e21ab7ae 100644 --- a/pkg/controllers/federatedcluster/util.go +++ b/pkg/controllers/federatedcluster/util.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. diff --git a/pkg/controllers/nsautoprop/controller.go b/pkg/controllers/nsautoprop/controller.go index 52f36efe..ed138372 100644 --- a/pkg/controllers/nsautoprop/controller.go +++ b/pkg/controllers/nsautoprop/controller.go @@ -28,7 +28,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" corev1informers "k8s.io/client-go/informers/core/v1" kubeclient "k8s.io/client-go/kubernetes" @@ -133,22 +132,27 @@ func NewNamespaceAutoPropagationController( ) if _, err := c.clusterFedObjectInformer.Informer().AddEventHandlerWithResyncPeriod( - eventhandlers.NewTriggerOnAllChanges(func(o runtime.Object) { - fedObj := o.(*fedcorev1a1.ClusterFederatedObject) - logger := c.logger.WithValues("cluster-federated-object", common.NewQualifiedName(fedObj)) - - srcMeta, err := fedObj.Spec.GetTemplateAsUnstructured() - if err != nil { - logger.Error(err, "Failed to get source object's metadata from ClusterFederatedObject") - return - } - - if srcMeta.GetKind() != common.NamespaceKind || !c.shouldBeAutoPropagated(srcMeta) { - return - } - - c.worker.Enqueue(common.QualifiedName{Name: fedObj.GetName()}) - }), util.NoResyncPeriod); err != nil { + eventhandlers.NewTriggerOnAllChanges( + func(obj *fedcorev1a1.ClusterFederatedObject) *fedcorev1a1.ClusterFederatedObject { + return obj + }, + func(obj *fedcorev1a1.ClusterFederatedObject) { + srcMeta, err := obj.Spec.GetTemplateAsUnstructured() + if err != nil { + logger.Error( + err, + "Failed to get source object's metadata from ClusterFederatedObject", + "object", + common.NewQualifiedName(obj), + ) + return + } + if srcMeta.GetKind() != common.NamespaceKind || !c.shouldBeAutoPropagated(srcMeta) { + return + } + c.worker.Enqueue(common.QualifiedName{Name: obj.GetName()}) + }, + ), util.NoResyncPeriod); err != nil { return nil, err } @@ -240,7 +244,9 @@ func (c *Controller) reconcile(ctx context.Context, qualifiedName common.Qualifi } if updated { - _, err = c.fedClient.CoreV1alpha1().ClusterFederatedObjects().Update(ctx, fedNamespace, metav1.UpdateOptions{}) + _, err = c.fedClient.CoreV1alpha1(). + ClusterFederatedObjects(). + Update(ctx, fedNamespace, metav1.UpdateOptions{}) if err != nil { if apierrors.IsConflict(err) { return worker.StatusConflict @@ -355,7 +361,10 @@ func (c *Controller) shouldBeAutoPropagated(fedNamespace *unstructured.Unstructu return true } -func (c *Controller) ensureAnnotation(fedNamespace *fedcorev1a1.ClusterFederatedObject, key, value string) (bool, error) { +func (c *Controller) ensureAnnotation( + fedNamespace *fedcorev1a1.ClusterFederatedObject, + key, value string, +) (bool, error) { needsUpdate, err := annotationutil.AddAnnotation(fedNamespace, key, value) if err != nil { return false, fmt.Errorf( diff --git a/pkg/controllers/policyrc/controller.go b/pkg/controllers/policyrc/controller.go index dfd1ceca..869ef57e 100644 --- a/pkg/controllers/policyrc/controller.go +++ b/pkg/controllers/policyrc/controller.go @@ -22,7 +22,6 @@ import ( "time" apierrors "k8s.io/apimachinery/pkg/api/errors" - pkgruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -33,8 +32,8 @@ import ( fedcorev1a1informers "github.com/kubewharf/kubeadmiral/pkg/client/informers/externalversions/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" "github.com/kubewharf/kubeadmiral/pkg/controllers/override" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util" "github.com/kubewharf/kubeadmiral/pkg/stats" + "github.com/kubewharf/kubeadmiral/pkg/util/eventhandlers" "github.com/kubewharf/kubeadmiral/pkg/util/fedobjectadapters" "github.com/kubewharf/kubeadmiral/pkg/util/logging" "github.com/kubewharf/kubeadmiral/pkg/util/worker" @@ -71,7 +70,6 @@ func NewPolicyRCController( logger klog.Logger, workerCount int, ) (*Controller, error) { - c := &Controller{ client: generic.NewForConfigOrDie(restConfig), fedObjectInformer: fedInformerFactory.Core().V1alpha1().FederatedObjects(), @@ -84,15 +82,17 @@ func NewPolicyRCController( logger: logger.WithValues("controller", ControllerName), } - if _, err := c.fedObjectInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(func(o pkgruntime.Object) { - c.countWorker.Enqueue(common.NewQualifiedName(o)) - })); err != nil { + if _, err := c.fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + c.countWorker.Enqueue, + )); err != nil { return nil, err } - if _, err := c.clusterFedObjectInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(func(o pkgruntime.Object) { - c.countWorker.Enqueue(common.NewQualifiedName(o)) - })); err != nil { + if _, err := c.clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + c.countWorker.Enqueue, + )); err != nil { return nil, err } @@ -141,27 +141,31 @@ func NewPolicyRCController( metrics, ) - persistPpWorkerTrigger := func(o pkgruntime.Object) { - c.persistPpWorker.Enqueue(common.NewQualifiedName(o)) - } - - if _, err := c.propagationPolicyInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(persistPpWorkerTrigger)); err != nil { + if _, err := c.propagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + c.persistPpWorker.Enqueue, + )); err != nil { return nil, err } - if _, err := c.clusterPropagationPolicyInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(persistPpWorkerTrigger)); err != nil { + if _, err := c.clusterPropagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + c.persistPpWorker.Enqueue, + )); err != nil { return nil, err } - persistOpWorkerTrigger := func(o pkgruntime.Object) { - c.persistOpWorker.Enqueue(common.NewQualifiedName(o)) - } - - if _, err := c.overridePolicyInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(persistOpWorkerTrigger)); err != nil { + if _, err := c.overridePolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + c.persistOpWorker.Enqueue, + )); err != nil { return nil, err } - if _, err := c.clusterOverridePolicyInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(persistOpWorkerTrigger)); err != nil { + if _, err := c.clusterOverridePolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + c.persistPpWorker.Enqueue, + )); err != nil { return nil, err } @@ -223,7 +227,12 @@ func (c *Controller) reconcileCount(ctx context.Context, qualifiedName common.Qu Info("Policyrc count controller finished reconciling") }() - fedObj, err := fedobjectadapters.GetFromLister(c.fedObjectInformer.Lister(), c.clusterFedObjectInformer.Lister(), qualifiedName.Namespace, qualifiedName.Name) + fedObj, err := fedobjectadapters.GetFromLister( + c.fedObjectInformer.Lister(), + c.clusterFedObjectInformer.Lister(), + qualifiedName.Namespace, + qualifiedName.Name, + ) if err != nil && !apierrors.IsNotFound(err) { logger.Error(err, "Failed to get federated object") return worker.StatusError @@ -270,7 +279,9 @@ func (c *Controller) reconcilePersist( startTime := time.Now() defer func() { c.metrics.Duration(fmt.Sprintf("policyrc-persist-%s-controller.latency", metricName), startTime) - logger.V(3).WithValues("duration", time.Since(startTime)).Info("Policyrc persist controller finished reconciling") + logger.V(3). + WithValues("duration", time.Since(startTime)). + Info("Policyrc persist controller finished reconciling") }() store := clusterScopeStore diff --git a/pkg/controllers/status/controller.go b/pkg/controllers/status/controller.go index 688b98f1..15959145 100644 --- a/pkg/controllers/status/controller.go +++ b/pkg/controllers/status/controller.go @@ -38,7 +38,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" - pkgruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" @@ -153,9 +152,12 @@ func NewStatusController( // Build queue for triggering cluster reconciliations. s.clusterQueue = workqueue.NewNamedDelayingQueue("status-controller-cluster-queue") - fedObjectHandler := util.NewTriggerOnAllChanges(func(o pkgruntime.Object) { - s.enqueueEnableCollectedStatusObject(common.NewQualifiedName(o), 0) - }) + fedObjectHandler := eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + func(key common.QualifiedName) { + s.enqueueEnableCollectedStatusObject(key, 0) + }, + ) if _, err := s.fedObjectInformer.Informer().AddEventHandler(fedObjectHandler); err != nil { return nil, err @@ -165,15 +167,17 @@ func NewStatusController( return nil, err } - if _, err := s.collectedStatusInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(func(o pkgruntime.Object) { - s.worker.Enqueue(common.NewQualifiedName(o)) - })); err != nil { + if _, err := s.collectedStatusInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + s.worker.Enqueue, + )); err != nil { return nil, err } - if _, err := s.clusterCollectedStatusInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(func(o pkgruntime.Object) { - s.worker.Enqueue(common.NewQualifiedName(o)) - })); err != nil { + if _, err := s.clusterCollectedStatusInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + s.worker.Enqueue, + )); err != nil { return nil, err } @@ -189,20 +193,23 @@ func NewStatusController( return nil } - return eventhandlers.NewTriggerOnAllChanges(func(o pkgruntime.Object) { - obj := o.(*unstructured.Unstructured) - - ftc, exists := s.ftcManager.GetResourceFTC(obj.GroupVersionKind()) - if !exists { - return - } + return eventhandlers.NewTriggerOnAllChanges( + func(uns *unstructured.Unstructured) *unstructured.Unstructured { + return uns + }, + func(uns *unstructured.Unstructured) { + ftc, exists := s.ftcManager.GetResourceFTC(uns.GroupVersionKind()) + if !exists { + return + } - federatedName := common.QualifiedName{ - Namespace: obj.GetNamespace(), - Name: naming.GenerateFederatedObjectName(obj.GetName(), ftc.GetName()), - } - s.worker.EnqueueWithDelay(federatedName, s.memberObjectEnqueueDelay) - }) + federatedName := common.QualifiedName{ + Namespace: uns.GetNamespace(), + Name: naming.GenerateFederatedObjectName(uns.GetName(), ftc.GetName()), + } + s.worker.EnqueueWithDelay(federatedName, s.memberObjectEnqueueDelay) + }, + ) }, }); err != nil { return nil, fmt.Errorf("failed to add event handler generator: %w", err) @@ -298,7 +305,10 @@ func (s *StatusController) reconcileOnClusterChange() { } } -func (s *StatusController) reconcile(ctx context.Context, qualifiedName common.QualifiedName) (reconcileStatus worker.Result) { +func (s *StatusController) reconcile( + ctx context.Context, + qualifiedName common.QualifiedName, +) (reconcileStatus worker.Result) { keyedLogger := s.logger.WithValues("federated-name", qualifiedName.String()) ctx = klog.NewContext(ctx, keyedLogger) @@ -327,7 +337,13 @@ func (s *StatusController) reconcile(ctx context.Context, qualifiedName common.Q if fedObject == nil || fedObject.GetDeletionTimestamp() != nil { keyedLogger.V(1).Info("No federated type found, deleting status object") - err = collectedstatusadapters.Delete(ctx, s.fedClient.CoreV1alpha1(), qualifiedName.Namespace, qualifiedName.Name, metav1.DeleteOptions{}) + err = collectedstatusadapters.Delete( + ctx, + s.fedClient.CoreV1alpha1(), + qualifiedName.Namespace, + qualifiedName.Name, + metav1.DeleteOptions{}, + ) if err != nil && !apierrors.IsNotFound(err) { return worker.StatusError } @@ -379,7 +395,13 @@ func (s *StatusController) reconcile(ctx context.Context, qualifiedName common.Q var rsDigestsAnnotation string if targetIsDeployment { - latestReplicasetDigests, err := s.latestReplicasetDigests(ctx, clusterNames, templateQualifiedName, templateGVK, typeConfig) + latestReplicasetDigests, err := s.latestReplicasetDigests( + ctx, + clusterNames, + templateQualifiedName, + templateGVK, + typeConfig, + ) if err != nil { keyedLogger.Error(err, "Failed to get latest replicaset digests") } else { @@ -426,7 +448,12 @@ func (s *StatusController) reconcile(ctx context.Context, qualifiedName common.Q if existingStatus == nil { collectedStatus.GetLastUpdateTime().Time = time.Now() - _, err = collectedstatusadapters.Create(ctx, s.fedClient.CoreV1alpha1(), collectedStatus, metav1.CreateOptions{}) + _, err = collectedstatusadapters.Create( + ctx, + s.fedClient.CoreV1alpha1(), + collectedStatus, + metav1.CreateOptions{}, + ) if err != nil { if apierrors.IsAlreadyExists(err) { return worker.StatusConflict @@ -594,8 +621,14 @@ func (s *StatusController) clusterStatuses( sort.Slice(failedFields, func(i, j int) bool { return failedFields[i] < failedFields[j] }) - resourceClusterStatus.Error = fmt.Sprintf("Failed to get those fields: %s", strings.Join(failedFields, ", ")) - errList = append(errList, fmt.Sprintf("cluster-name: %s, error-info: %s", clusterName, resourceClusterStatus.Error)) + resourceClusterStatus.Error = fmt.Sprintf( + "Failed to get those fields: %s", + strings.Join(failedFields, ", "), + ) + errList = append( + errList, + fmt.Sprintf("cluster-name: %s, error-info: %s", clusterName, resourceClusterStatus.Error), + ) } clusterStatus = append(clusterStatus, resourceClusterStatus) } @@ -698,7 +731,9 @@ func (s *StatusController) realUpdatedReplicas( keyedLogger.WithValues("cluster-name", clusterName).Error(err, "Failed to get latestreplicaset digest") continue } - keyedLogger.WithValues("cluster-name", clusterName, "replicas-digest", digest).V(4).Info("Got latestreplicaset digest") + keyedLogger.WithValues("cluster-name", clusterName, "replicas-digest", digest). + V(4). + Info("Got latestreplicaset digest") if digest.CurrentRevision != revision { continue } diff --git a/pkg/util/eventhandlers/eventhandler.go b/pkg/util/eventhandlers/eventhandler.go index a5dbd3ae..1e2cdf5a 100644 --- a/pkg/util/eventhandlers/eventhandler.go +++ b/pkg/util/eventhandlers/eventhandler.go @@ -19,13 +19,15 @@ package eventhandlers import ( "reflect" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/cache" ) -// NewTriggerOnAllChanges returns a cache.ResourceEventHandlerFuncs that will call the given function on all object -// changes. The given function will also be called on receiving cache.DeletedFinalStateUnknown deletion events. -func NewTriggerOnAllChanges(triggerFunc func(runtime.Object)) *cache.ResourceEventHandlerFuncs { +// NewTriggerOnAllChanges returns a cache.ResourceEventHandlerFuncs that will call the given triggerFunc on all object +// changes. The object is first transformed with the given keyFunc. triggerFunc is also called for add or delete events. +func NewTriggerOnAllChanges[Source any, Key any]( + keyFunc func(Source) Key, + triggerFunc func(Key), +) *cache.ResourceEventHandlerFuncs { return &cache.ResourceEventHandlerFuncs{ DeleteFunc: func(old interface{}) { if deleted, ok := old.(cache.DeletedFinalStateUnknown); ok { @@ -34,17 +36,51 @@ func NewTriggerOnAllChanges(triggerFunc func(runtime.Object)) *cache.ResourceEve return } } - oldObj := old.(runtime.Object) - triggerFunc(oldObj) + oldSource := old.(Source) + triggerFunc(keyFunc(oldSource)) }, AddFunc: func(cur interface{}) { - curObj := cur.(runtime.Object) - triggerFunc(curObj) + curObj := cur.(Source) + triggerFunc(keyFunc(curObj)) }, UpdateFunc: func(old, cur interface{}) { if !reflect.DeepEqual(old, cur) { - curObj := cur.(runtime.Object) - triggerFunc(curObj) + curObj := cur.(Source) + triggerFunc(keyFunc(curObj)) + } + }, + } +} + +// NewTriggerOnChanges returns a cache.ResourceEventHandlerFuncs that will call the given triggerFunc on object changes +// that passes the given predicate. The object is first transformed with the given keyFunc. triggerFunc is also called +// for add and delete events. +func NewTriggerOnChanges[Source any, Key any]( + predicate func(old, cur Source) bool, + keyFunc func(Source) Key, + triggerFunc func(Key), +) *cache.ResourceEventHandlerFuncs { + return &cache.ResourceEventHandlerFuncs{ + DeleteFunc: func(old interface{}) { + if deleted, ok := old.(cache.DeletedFinalStateUnknown); ok { + // This object might be stale but ok for our current usage. + old = deleted.Obj + if old == nil { + return + } + } + oldObj := old.(Source) + triggerFunc(keyFunc(oldObj)) + }, + AddFunc: func(cur interface{}) { + curObj := cur.(Source) + triggerFunc(keyFunc(curObj)) + }, + UpdateFunc: func(old, cur interface{}) { + oldObj := old.(Source) + curObj := cur.(Source) + if predicate(oldObj, curObj) { + triggerFunc(keyFunc(curObj)) } }, } From f87a860550a037d5c27598ffa2d551b337ae9eb0 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Tue, 25 Jul 2023 11:54:04 +0800 Subject: [PATCH 04/22] refactor(cluster-controller): bootstrap controller-manager with cluster controller --- .../app/controllermanager.go | 3 +-- cmd/controller-manager/app/core.go | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cmd/controller-manager/app/controllermanager.go b/cmd/controller-manager/app/controllermanager.go index 2300a2b1..889dc68c 100644 --- a/cmd/controller-manager/app/controllermanager.go +++ b/cmd/controller-manager/app/controllermanager.go @@ -37,7 +37,6 @@ import ( const ( FederatedClusterControllerName = "cluster" FederateControllerName = "federate" - MonitorControllerName = "monitor" FollowerControllerName = "follower" PolicyRCControllerName = "policyrc" OverrideControllerName = "overridepolicy" @@ -53,7 +52,7 @@ var knownControllers = map[string]controllermanager.StartControllerFunc{ StatusControllerName: startStatusController, } -var controllersDisabledByDefault = sets.New(MonitorControllerName) +var controllersDisabledByDefault = sets.New[string]() // Run starts the controller manager according to the given options. func Run(ctx context.Context, opts *options.Options) { diff --git a/cmd/controller-manager/app/core.go b/cmd/controller-manager/app/core.go index ccc98b00..a0ee4513 100644 --- a/cmd/controller-manager/app/core.go +++ b/cmd/controller-manager/app/core.go @@ -29,6 +29,7 @@ import ( "github.com/kubewharf/kubeadmiral/pkg/controllers/override" "github.com/kubewharf/kubeadmiral/pkg/controllers/policyrc" "github.com/kubewharf/kubeadmiral/pkg/controllers/status" + "github.com/kubewharf/kubeadmiral/pkg/controllers/federatedcluster" ) func startFederateController( @@ -152,3 +153,27 @@ func startStatusController( return statusController, nil } + +func startFederatedClusterController( + ctx context.Context, + controllerCtx *controllercontext.Context, +) (controllermanager.Controller, error) { + federatedClusterController, err := federatedcluster.NewFederatedClusterController( + controllerCtx.KubeClientset, + controllerCtx.FedClientset, + controllerCtx.FedInformerFactory.Core().V1alpha1().FederatedClusters(), + controllerCtx.FederatedInformerManager, + controllerCtx.Metrics, + klog.Background(), + controllerCtx.ComponentConfig.ClusterJoinTimeout, + controllerCtx.WorkerCount, + controllerCtx.FedSystemNamespace, + ) + if err != nil { + return nil, fmt.Errorf("error creating federate controller: %w", err) + } + + go federatedClusterController.Run(ctx) + + return federatedClusterController, nil +} From a54768f602c9b0ed06282a7536c993947ca057c6 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Sat, 22 Jul 2023 19:50:05 +0800 Subject: [PATCH 05/22] refactor(scheduler): adopt unified types --- .../app/controllermanager.go | 3 + cmd/controller-manager/app/core.go | 30 + .../v1alpha1/extensions_federatedobject.go | 79 ++- pkg/controllers/common/constants.go | 3 + pkg/controllers/federate/util.go | 4 +- pkg/controllers/override/util.go | 2 +- pkg/controllers/scheduler/constants.go | 3 - .../scheduler/core/generic_scheduler.go | 8 +- .../extensions/webhook/v1alpha1/adapter.go | 5 +- .../extensions/webhook/v1alpha1/plugin.go | 1 - .../scheduler/framework/interface.go | 1 - .../plugins/apiresources/apiresources.go | 1 - .../clusteraffinity/cluster_affinity.go | 3 +- .../clusterresources/balanced_allocation.go | 1 - .../framework/plugins/clusterresources/fit.go | 1 - .../clusterresources/least_allocated.go | 1 - .../clusterresources/most_allocated.go | 1 - .../plugins/maxcluster/max_cluster.go | 1 - .../framework/plugins/placement/filter.go | 1 - .../scheduler/framework/plugins/rsp/rsp.go | 3 +- .../tainttoleration/taint_toleration.go | 1 - .../scheduler/framework/runtime/framework.go | 1 - .../scheduler/framework/runtime/registry.go | 1 - pkg/controllers/scheduler/framework/types.go | 1 - pkg/controllers/scheduler/framework/util.go | 1 - pkg/controllers/scheduler/handle.go | 1 - .../{util => scheduler}/planner/planner.go | 1 - .../planner/planner_test.go | 0 pkg/controllers/scheduler/profile.go | 1 - pkg/controllers/scheduler/scheduler.go | 574 +++++++++++------- .../scheduler/schedulingtriggers.go | 88 +-- pkg/controllers/scheduler/schedulingunit.go | 117 ++-- pkg/controllers/scheduler/util.go | 169 +----- pkg/controllers/scheduler/util_test.go | 2 +- pkg/controllers/scheduler/webhook.go | 1 - .../util/clusterselector/util.go | 0 pkg/util/eventhandlers/eventhandler.go | 35 ++ pkg/util/fedobjectadapters/adapters.go | 5 +- .../util/unstructured/unstructured.go | 3 +- 39 files changed, 612 insertions(+), 542 deletions(-) rename pkg/controllers/{util => scheduler}/planner/planner.go (99%) rename pkg/controllers/{util => scheduler}/planner/planner_test.go (100%) rename pkg/{controllers => }/util/clusterselector/util.go (100%) rename pkg/{controllers => }/util/unstructured/unstructured.go (98%) diff --git a/cmd/controller-manager/app/controllermanager.go b/cmd/controller-manager/app/controllermanager.go index 889dc68c..a23c4fbc 100644 --- a/cmd/controller-manager/app/controllermanager.go +++ b/cmd/controller-manager/app/controllermanager.go @@ -42,6 +42,7 @@ const ( OverrideControllerName = "overridepolicy" NamespaceAutoPropagationControllerName = "nsautoprop" StatusControllerName = "status" + SchedulerName = "scheduler" ) var knownControllers = map[string]controllermanager.StartControllerFunc{ @@ -50,6 +51,8 @@ var knownControllers = map[string]controllermanager.StartControllerFunc{ OverrideControllerName: startOverridePolicyController, NamespaceAutoPropagationControllerName: startNamespaceAutoPropagationController, StatusControllerName: startStatusController, + FederatedClusterControllerName: startFederatedClusterController, + SchedulerName: startScheduler, } var controllersDisabledByDefault = sets.New[string]() diff --git a/cmd/controller-manager/app/core.go b/cmd/controller-manager/app/core.go index a0ee4513..442bc45c 100644 --- a/cmd/controller-manager/app/core.go +++ b/cmd/controller-manager/app/core.go @@ -30,6 +30,7 @@ import ( "github.com/kubewharf/kubeadmiral/pkg/controllers/policyrc" "github.com/kubewharf/kubeadmiral/pkg/controllers/status" "github.com/kubewharf/kubeadmiral/pkg/controllers/federatedcluster" + "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler" ) func startFederateController( @@ -177,3 +178,32 @@ func startFederatedClusterController( return federatedClusterController, nil } + +func startScheduler( + ctx context.Context, + controllerCtx *controllercontext.Context, +) (controllermanager.Controller, error) { + scheduler, err := scheduler.NewScheduler( + controllerCtx.KubeClientset, + controllerCtx.FedClientset, + controllerCtx.DynamicClientset, + controllerCtx.FedInformerFactory.Core().V1alpha1().FederatedObjects(), + controllerCtx.FedInformerFactory.Core().V1alpha1().ClusterFederatedObjects(), + controllerCtx.FedInformerFactory.Core().V1alpha1().PropagationPolicies(), + controllerCtx.FedInformerFactory.Core().V1alpha1().ClusterPropagationPolicies(), + controllerCtx.FedInformerFactory.Core().V1alpha1().FederatedClusters(), + controllerCtx.FedInformerFactory.Core().V1alpha1().SchedulingProfiles(), + controllerCtx.InformerManager, + controllerCtx.FedInformerFactory.Core().V1alpha1().SchedulerPluginWebhookConfigurations(), + controllerCtx.Metrics, + klog.Background(), + controllerCtx.WorkerCount, + ) + if err != nil { + return nil, fmt.Errorf("error creating scheduler: %w", err) + } + + go scheduler.Run(ctx) + + return scheduler, nil +} diff --git a/pkg/apis/core/v1alpha1/extensions_federatedobject.go b/pkg/apis/core/v1alpha1/extensions_federatedobject.go index 92d4df7a..f7ec115a 100644 --- a/pkg/apis/core/v1alpha1/extensions_federatedobject.go +++ b/pkg/apis/core/v1alpha1/extensions_federatedobject.go @@ -81,8 +81,9 @@ func (spec *GenericFederatedObjectSpec) GetControllerPlacement(controller string return nil } -// SetControllerPlacement sets the ClusterPlacements for a given controller. If clusterNames is nil or empty, the previous -// placement for the given controller will be deleted. Returns a bool indicating if the GenericFederatedObject has changed. +// SetControllerPlacement sets the cluster placements for a given controller. If clusterNames is nil or empty, the +// previous placement for the given controller will be deleted. Returns a bool indicating if the GenericFederatedObject +// has changed. func (spec *GenericFederatedObjectSpec) SetControllerPlacement(controller string, clusterNames []string) bool { if len(clusterNames) == 0 { return spec.DeleteControllerPlacement(controller) @@ -140,6 +141,79 @@ func (spec *GenericFederatedObjectSpec) DeleteControllerPlacement(controller str return true } +// Overrides extensions + +func (spec *GenericFederatedObjectSpec) GetControllerOverrides(controller string) []ClusterReferenceWithPatches { + for _, overrides := range spec.Overrides { + if overrides.Controller == controller { + return overrides.Override + } + } + return nil +} + +// SetControllerOverrides sets the cluster overrides for a given controller. If clusterNames is nil or empty, the +// previous overrides for the given controller will be deleted. Returns a bool indicating if the GenericFederatedObject +// has changed. +func (spec *GenericFederatedObjectSpec) SetControllerOverrides( + controller string, + clusterOverrides []ClusterReferenceWithPatches, +) bool { + if len(clusterOverrides) == 0 { + return spec.DeleteControllerOverrides(controller) + } + + // sort the clusters by name for readability and to avoid unnecessary updates + sort.Slice(clusterOverrides, func(i, j int) bool { + return clusterOverrides[i].Cluster < clusterOverrides[j].Cluster + }) + + oldOverridesWithControllerIdx := -1 + for i := range spec.Overrides { + if spec.Overrides[i].Controller == controller { + oldOverridesWithControllerIdx = i + break + } + } + + newOverridesWithController := OverrideWithController{ + Controller: controller, + Override: clusterOverrides, + } + if oldOverridesWithControllerIdx == -1 { + spec.Overrides = append(spec.Overrides, newOverridesWithController) + return true + } + if !reflect.DeepEqual(newOverridesWithController, spec.Overrides[oldOverridesWithControllerIdx]) { + spec.Overrides[oldOverridesWithControllerIdx] = newOverridesWithController + return true + } + + return false +} + +// DeleteControllerOverrides deletes a controller's overrides, returning a bool to indicate if the +// GenericFederatedObject has changed. +func (spec *GenericFederatedObjectSpec) DeleteControllerOverrides(controller string) bool { + oldOverridesIdx := -1 + for i := range spec.Overrides { + if spec.Overrides[i].Controller == controller { + oldOverridesIdx = i + break + } + } + + if oldOverridesIdx == -1 { + return false + } + + spec.Overrides = append(spec.Overrides[:oldOverridesIdx], spec.Overrides[(oldOverridesIdx+1):]...) + return true +} + +// Template extensions + +// GetTemplateAsUnstructured returns the FederatedObject's template unmarshalled into an *unstructured.Unstructured. func (spec *GenericFederatedObjectSpec) GetTemplateAsUnstructured() (*unstructured.Unstructured, error) { template := &unstructured.Unstructured{} if err := template.UnmarshalJSON(spec.Template.Raw); err != nil { @@ -148,6 +222,7 @@ func (spec *GenericFederatedObjectSpec) GetTemplateAsUnstructured() (*unstructur return template, nil } +// GetTemplateGVK returns the GVK of the FederatedObject's source object by parsing the FederatedObject's template. func (spec *GenericFederatedObjectSpec) GetTemplateGVK() (schema.GroupVersionKind, error) { type partialTypeMetadata struct { metav1.TypeMeta `json:",inline"` diff --git a/pkg/controllers/common/constants.go b/pkg/controllers/common/constants.go index 810817e5..41d9c783 100644 --- a/pkg/controllers/common/constants.go +++ b/pkg/controllers/common/constants.go @@ -140,6 +140,9 @@ const ( // TemplateGeneratorMergePatchAnnotation indicates the merge patch document capable of converting // the source object to the template object. TemplateGeneratorMergePatchAnnotation = FederateControllerPrefix + "template-generator-merge-patch" + + PropagationPolicyNameLabel = DefaultPrefix + "propagation-policy-name" + ClusterPropagationPolicyNameLabel = DefaultPrefix + "cluster-propagation-policy-name" ) // PropagatedAnnotationKeys and PropagatedLabelKeys are used to store the keys of annotations and labels that are present diff --git a/pkg/controllers/federate/util.go b/pkg/controllers/federate/util.go index 96802afd..104831de 100644 --- a/pkg/controllers/federate/util.go +++ b/pkg/controllers/federate/util.go @@ -270,8 +270,8 @@ var ( ) federatedLabelSet = sets.New[string]( - scheduler.PropagationPolicyNameLabel, - scheduler.ClusterPropagationPolicyNameLabel, + common.PropagationPolicyNameLabel, + common.ClusterPropagationPolicyNameLabel, override.OverridePolicyNameLabel, override.ClusterOverridePolicyNameLabel, ) diff --git a/pkg/controllers/override/util.go b/pkg/controllers/override/util.go index cea0b9f7..3652f153 100644 --- a/pkg/controllers/override/util.go +++ b/pkg/controllers/override/util.go @@ -27,7 +27,7 @@ import ( fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" fedcorev1a1listers "github.com/kubewharf/kubeadmiral/pkg/client/listers/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/util" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/clusterselector" + "github.com/kubewharf/kubeadmiral/pkg/util/clusterselector" ) /* diff --git a/pkg/controllers/scheduler/constants.go b/pkg/controllers/scheduler/constants.go index 6bd4a6ce..460ede03 100644 --- a/pkg/controllers/scheduler/constants.go +++ b/pkg/controllers/scheduler/constants.go @@ -25,9 +25,6 @@ const ( GlobalSchedulerName = "global-scheduler" PrefixedGlobalSchedulerName = common.DefaultPrefix + "global-scheduler" - PropagationPolicyNameLabel = common.DefaultPrefix + "propagation-policy-name" - ClusterPropagationPolicyNameLabel = common.DefaultPrefix + "cluster-propagation-policy-name" - // Marks that the annotated object must follow the placement of the followed object. // Value is in the form G/V/R/ns/name, e.g. `types.kubeadmiral.io/v1alpha1/federateddeployments/default/fed-dp-xxx`. FollowsObjectAnnotation = common.DefaultPrefix + "follows-object" diff --git a/pkg/controllers/scheduler/core/generic_scheduler.go b/pkg/controllers/scheduler/core/generic_scheduler.go index dfd04a0b..55af8d8a 100644 --- a/pkg/controllers/scheduler/core/generic_scheduler.go +++ b/pkg/controllers/scheduler/core/generic_scheduler.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2014 The Kubernetes Authors. @@ -28,6 +27,7 @@ import ( "strings" "github.com/davecgh/go-spew/spew" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" "k8s.io/utils/pointer" @@ -53,10 +53,10 @@ type ScheduleResult struct { SuggestedClusters map[string]*int64 } -func (result ScheduleResult) ClusterSet() map[string]struct{} { - clusterSet := make(map[string]struct{}, len(result.SuggestedClusters)) +func (result ScheduleResult) ClusterSet() sets.Set[string] { + clusterSet := sets.New[string]() for cluster := range result.SuggestedClusters { - clusterSet[cluster] = struct{}{} + clusterSet.Insert(cluster) } return clusterSet } diff --git a/pkg/controllers/scheduler/extensions/webhook/v1alpha1/adapter.go b/pkg/controllers/scheduler/extensions/webhook/v1alpha1/adapter.go index 9b964747..f19e7b37 100644 --- a/pkg/controllers/scheduler/extensions/webhook/v1alpha1/adapter.go +++ b/pkg/controllers/scheduler/extensions/webhook/v1alpha1/adapter.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. @@ -39,7 +38,7 @@ func ConvertSchedulingUnit(su *framework.SchedulingUnit) *schedwebhookv1a1.Sched } } - placements := []fedcorev1a1.ClusterReference{} + placements := []fedcorev1a1.DesiredPlacement{} for cluster := range su.ClusterNames { var weight *int64 if w, ok := su.Weights[cluster]; ok { @@ -51,7 +50,7 @@ func ConvertSchedulingUnit(su *framework.SchedulingUnit) *schedwebhookv1a1.Sched maxReplicas = &max } - placement := fedcorev1a1.ClusterReference{ + placement := fedcorev1a1.DesiredPlacement{ Cluster: cluster, Preferences: fedcorev1a1.Preferences{ MinReplicas: su.MinReplicas[cluster], diff --git a/pkg/controllers/scheduler/extensions/webhook/v1alpha1/plugin.go b/pkg/controllers/scheduler/extensions/webhook/v1alpha1/plugin.go index 78b78979..ea1da454 100644 --- a/pkg/controllers/scheduler/extensions/webhook/v1alpha1/plugin.go +++ b/pkg/controllers/scheduler/extensions/webhook/v1alpha1/plugin.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. diff --git a/pkg/controllers/scheduler/framework/interface.go b/pkg/controllers/scheduler/framework/interface.go index 3052b1cc..50165e89 100644 --- a/pkg/controllers/scheduler/framework/interface.go +++ b/pkg/controllers/scheduler/framework/interface.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/plugins/apiresources/apiresources.go b/pkg/controllers/scheduler/framework/plugins/apiresources/apiresources.go index a949e86f..71f23d1c 100644 --- a/pkg/controllers/scheduler/framework/plugins/apiresources/apiresources.go +++ b/pkg/controllers/scheduler/framework/plugins/apiresources/apiresources.go @@ -1,4 +1,3 @@ -//go:build exclude // The design of this plugin is heavily inspired by karmada-scheduler. Kudos! package apiresources diff --git a/pkg/controllers/scheduler/framework/plugins/clusteraffinity/cluster_affinity.go b/pkg/controllers/scheduler/framework/plugins/clusteraffinity/cluster_affinity.go index e034f79c..2e1914d5 100644 --- a/pkg/controllers/scheduler/framework/plugins/clusteraffinity/cluster_affinity.go +++ b/pkg/controllers/scheduler/framework/plugins/clusteraffinity/cluster_affinity.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. @@ -29,7 +28,7 @@ import ( fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework" "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework/plugins/names" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/clusterselector" + "github.com/kubewharf/kubeadmiral/pkg/util/clusterselector" ) const ( diff --git a/pkg/controllers/scheduler/framework/plugins/clusterresources/balanced_allocation.go b/pkg/controllers/scheduler/framework/plugins/clusterresources/balanced_allocation.go index a99d2a6d..46e41d80 100644 --- a/pkg/controllers/scheduler/framework/plugins/clusterresources/balanced_allocation.go +++ b/pkg/controllers/scheduler/framework/plugins/clusterresources/balanced_allocation.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/plugins/clusterresources/fit.go b/pkg/controllers/scheduler/framework/plugins/clusterresources/fit.go index 114926a6..0310205b 100644 --- a/pkg/controllers/scheduler/framework/plugins/clusterresources/fit.go +++ b/pkg/controllers/scheduler/framework/plugins/clusterresources/fit.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/plugins/clusterresources/least_allocated.go b/pkg/controllers/scheduler/framework/plugins/clusterresources/least_allocated.go index 38d365bf..1eca9f16 100644 --- a/pkg/controllers/scheduler/framework/plugins/clusterresources/least_allocated.go +++ b/pkg/controllers/scheduler/framework/plugins/clusterresources/least_allocated.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/plugins/clusterresources/most_allocated.go b/pkg/controllers/scheduler/framework/plugins/clusterresources/most_allocated.go index 6f5b830a..586455ab 100644 --- a/pkg/controllers/scheduler/framework/plugins/clusterresources/most_allocated.go +++ b/pkg/controllers/scheduler/framework/plugins/clusterresources/most_allocated.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/plugins/maxcluster/max_cluster.go b/pkg/controllers/scheduler/framework/plugins/maxcluster/max_cluster.go index 08ca56f1..7a1691de 100644 --- a/pkg/controllers/scheduler/framework/plugins/maxcluster/max_cluster.go +++ b/pkg/controllers/scheduler/framework/plugins/maxcluster/max_cluster.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. diff --git a/pkg/controllers/scheduler/framework/plugins/placement/filter.go b/pkg/controllers/scheduler/framework/plugins/placement/filter.go index 19c3253b..35748d7d 100644 --- a/pkg/controllers/scheduler/framework/plugins/placement/filter.go +++ b/pkg/controllers/scheduler/framework/plugins/placement/filter.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. diff --git a/pkg/controllers/scheduler/framework/plugins/rsp/rsp.go b/pkg/controllers/scheduler/framework/plugins/rsp/rsp.go index 50b94495..129021bd 100644 --- a/pkg/controllers/scheduler/framework/plugins/rsp/rsp.go +++ b/pkg/controllers/scheduler/framework/plugins/rsp/rsp.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. @@ -36,7 +35,7 @@ import ( fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework" "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework/plugins/names" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/planner" + "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/planner" ) const ( diff --git a/pkg/controllers/scheduler/framework/plugins/tainttoleration/taint_toleration.go b/pkg/controllers/scheduler/framework/plugins/tainttoleration/taint_toleration.go index 80571411..dae64acf 100644 --- a/pkg/controllers/scheduler/framework/plugins/tainttoleration/taint_toleration.go +++ b/pkg/controllers/scheduler/framework/plugins/tainttoleration/taint_toleration.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/runtime/framework.go b/pkg/controllers/scheduler/framework/runtime/framework.go index 3cf2bee7..8da7e857 100644 --- a/pkg/controllers/scheduler/framework/runtime/framework.go +++ b/pkg/controllers/scheduler/framework/runtime/framework.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/runtime/registry.go b/pkg/controllers/scheduler/framework/runtime/registry.go index 278ed0d2..b8970bed 100644 --- a/pkg/controllers/scheduler/framework/runtime/registry.go +++ b/pkg/controllers/scheduler/framework/runtime/registry.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/types.go b/pkg/controllers/scheduler/framework/types.go index b8b8a212..3b1abbde 100644 --- a/pkg/controllers/scheduler/framework/types.go +++ b/pkg/controllers/scheduler/framework/types.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2019 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/framework/util.go b/pkg/controllers/scheduler/framework/util.go index 165e73a0..a12dedd9 100644 --- a/pkg/controllers/scheduler/framework/util.go +++ b/pkg/controllers/scheduler/framework/util.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2015 The Kubernetes Authors. diff --git a/pkg/controllers/scheduler/handle.go b/pkg/controllers/scheduler/handle.go index 84589766..6b99558e 100644 --- a/pkg/controllers/scheduler/handle.go +++ b/pkg/controllers/scheduler/handle.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. diff --git a/pkg/controllers/util/planner/planner.go b/pkg/controllers/scheduler/planner/planner.go similarity index 99% rename from pkg/controllers/util/planner/planner.go rename to pkg/controllers/scheduler/planner/planner.go index d4034bcd..46f91c3f 100644 --- a/pkg/controllers/util/planner/planner.go +++ b/pkg/controllers/scheduler/planner/planner.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2016 The Kubernetes Authors. diff --git a/pkg/controllers/util/planner/planner_test.go b/pkg/controllers/scheduler/planner/planner_test.go similarity index 100% rename from pkg/controllers/util/planner/planner_test.go rename to pkg/controllers/scheduler/planner/planner_test.go diff --git a/pkg/controllers/scheduler/profile.go b/pkg/controllers/scheduler/profile.go index 811d9325..3c6f8c44 100644 --- a/pkg/controllers/scheduler/profile.go +++ b/pkg/controllers/scheduler/profile.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index b62cf5a2..0c4a2645 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. @@ -30,12 +29,10 @@ import ( "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" - pkgruntime "k8s.io/apimachinery/pkg/runtime" - dynamicclient "k8s.io/client-go/dynamic" - "k8s.io/client-go/informers" - kubeclient "k8s.io/client-go/kubernetes" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" @@ -44,17 +41,22 @@ import ( fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" fedclient "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned" fedcorev1a1informers "github.com/kubewharf/kubeadmiral/pkg/client/informers/externalversions/core/v1alpha1" - fedcorev1a1listers "github.com/kubewharf/kubeadmiral/pkg/client/listers/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/core" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util" - annotationutil "github.com/kubewharf/kubeadmiral/pkg/controllers/util/annotation" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/delayingdeliver" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/eventsink" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/pendingcontrollers" - schemautil "github.com/kubewharf/kubeadmiral/pkg/controllers/util/schema" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util/worker" "github.com/kubewharf/kubeadmiral/pkg/stats" + "github.com/kubewharf/kubeadmiral/pkg/util/annotation" + clusterutil "github.com/kubewharf/kubeadmiral/pkg/util/cluster" + "github.com/kubewharf/kubeadmiral/pkg/util/eventhandlers" + "github.com/kubewharf/kubeadmiral/pkg/util/eventsink" + "github.com/kubewharf/kubeadmiral/pkg/util/fedobjectadapters" + "github.com/kubewharf/kubeadmiral/pkg/util/informermanager" + "github.com/kubewharf/kubeadmiral/pkg/util/logging" + "github.com/kubewharf/kubeadmiral/pkg/util/pendingcontrollers" + "github.com/kubewharf/kubeadmiral/pkg/util/worker" +) + +const ( + SchedulerName = "scheduler" ) type ClusterWeight struct { @@ -63,31 +65,22 @@ type ClusterWeight struct { } type Scheduler struct { - typeConfig *fedcorev1a1.FederatedTypeConfig - name string - fedClient fedclient.Interface - dynamicClient dynamicclient.Interface - - federatedObjectClient dynamicclient.NamespaceableResourceInterface - federatedObjectLister cache.GenericLister - federatedObjectSynced cache.InformerSynced + dynamicClient dynamic.Interface - propagationPolicyLister fedcorev1a1listers.PropagationPolicyLister - clusterPropagationPolicyLister fedcorev1a1listers.ClusterPropagationPolicyLister - propagationPolicySynced cache.InformerSynced - clusterPropagationPolicySynced cache.InformerSynced + fedObjectInformer fedcorev1a1informers.FederatedObjectInformer + clusterFedObjectInformer fedcorev1a1informers.ClusterFederatedObjectInformer + propagationPolicyInformer fedcorev1a1informers.PropagationPolicyInformer + clusterPropagationPolicyInformer fedcorev1a1informers.ClusterPropagationPolicyInformer + federatedClusterInformer fedcorev1a1informers.FederatedClusterInformer + schedulingProfileInformer fedcorev1a1informers.SchedulingProfileInformer - clusterLister fedcorev1a1listers.FederatedClusterLister - clusterSynced cache.InformerSynced + informerManager informermanager.InformerManager - schedulingProfileLister fedcorev1a1listers.SchedulingProfileLister - schedulingProfileSynced cache.InformerSynced - - webhookConfigurationSynced cache.InformerSynced webhookPlugins sync.Map + webhookConfigurationSynced cache.InformerSynced - worker worker.ReconcileWorker + worker worker.ReconcileWorker[common.QualifiedName] eventRecorder record.EventRecorder algorithm core.ScheduleAlgorithm @@ -101,84 +94,76 @@ func (s *Scheduler) IsControllerReady() bool { } func NewScheduler( - logger klog.Logger, - typeConfig *fedcorev1a1.FederatedTypeConfig, - kubeClient kubeclient.Interface, + kubeClient kubernetes.Interface, fedClient fedclient.Interface, - dynamicClient dynamicclient.Interface, - federatedObjectInformer informers.GenericInformer, + dynamicClient dynamic.Interface, + fedObjectInformer fedcorev1a1informers.FederatedObjectInformer, + clusterFedObjectInformer fedcorev1a1informers.ClusterFederatedObjectInformer, propagationPolicyInformer fedcorev1a1informers.PropagationPolicyInformer, clusterPropagationPolicyInformer fedcorev1a1informers.ClusterPropagationPolicyInformer, - clusterInformer fedcorev1a1informers.FederatedClusterInformer, + federatedClusterInformer fedcorev1a1informers.FederatedClusterInformer, schedulingProfileInformer fedcorev1a1informers.SchedulingProfileInformer, + informerManager informermanager.InformerManager, webhookConfigurationInformer fedcorev1a1informers.SchedulerPluginWebhookConfigurationInformer, metrics stats.Metrics, + logger klog.Logger, workerCount int, ) (*Scheduler, error) { - schedulerName := fmt.Sprintf("%s-scheduler", typeConfig.GetFederatedType().Name) - s := &Scheduler{ - typeConfig: typeConfig, - name: schedulerName, - fedClient: fedClient, - dynamicClient: dynamicClient, - metrics: metrics, - logger: logger.WithValues("controller", GlobalSchedulerName, "ftc", typeConfig.Name), - } - - s.worker = worker.NewReconcileWorker( + fedClient: fedClient, + dynamicClient: dynamicClient, + fedObjectInformer: fedObjectInformer, + clusterFedObjectInformer: clusterFedObjectInformer, + propagationPolicyInformer: propagationPolicyInformer, + clusterPropagationPolicyInformer: clusterPropagationPolicyInformer, + federatedClusterInformer: federatedClusterInformer, + schedulingProfileInformer: schedulingProfileInformer, + informerManager: informerManager, + webhookConfigurationSynced: webhookConfigurationInformer.Informer().HasSynced, + webhookPlugins: sync.Map{}, + metrics: metrics, + logger: logger.WithValues("controller", SchedulerName), + } + + s.eventRecorder = eventsink.NewDefederatingRecorderMux(kubeClient, SchedulerName, 6) + s.worker = worker.NewReconcileWorker[common.QualifiedName]( + SchedulerName, + nil, s.reconcile, worker.RateLimiterOptions{}, workerCount, metrics, - delayingdeliver.NewMetricTags("scheduler-worker", s.typeConfig.GetFederatedType().Kind), ) - s.eventRecorder = eventsink.NewDefederatingRecorderMux(kubeClient, s.name, 6) - - apiResource := typeConfig.GetFederatedType() - s.federatedObjectClient = dynamicClient.Resource(schemautil.APIResourceToGVR(&apiResource)) - - s.federatedObjectLister = federatedObjectInformer.Lister() - s.federatedObjectSynced = federatedObjectInformer.Informer().HasSynced - federatedObjectInformer.Informer().AddEventHandler(util.NewTriggerOnAllChanges(s.worker.EnqueueObject)) - - // only required if namespaced - if s.typeConfig.GetNamespaced() { - s.propagationPolicyLister = propagationPolicyInformer.Lister() - s.propagationPolicySynced = propagationPolicyInformer.Informer().HasSynced - propagationPolicyInformer.Informer().AddEventHandler(util.NewTriggerOnGenerationChanges(s.enqueueFederatedObjectsForPolicy)) - } - s.clusterPropagationPolicyLister = clusterPropagationPolicyInformer.Lister() - s.clusterPropagationPolicySynced = clusterPropagationPolicyInformer.Informer().HasSynced - clusterPropagationPolicyInformer.Informer().AddEventHandler(util.NewTriggerOnGenerationChanges(s.enqueueFederatedObjectsForPolicy)) - - s.clusterLister = clusterInformer.Lister() - s.clusterSynced = clusterInformer.Informer().HasSynced - clusterInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { s.enqueueFederatedObjectsForCluster(obj.(pkgruntime.Object)) }, - DeleteFunc: func(obj interface{}) { - if deleted, ok := obj.(cache.DeletedFinalStateUnknown); ok { - // This object might be stale but ok for our current usage. - obj = deleted.Obj - if obj == nil { - return - } - } - s.enqueueFederatedObjectsForCluster(obj.(pkgruntime.Object)) + fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + s.worker.Enqueue, + )) + clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + common.NewQualifiedName, + s.worker.Enqueue, + )) + + propagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnGenerationChanges( + func(obj metav1.Object) metav1.Object { return obj }, + s.enqueueFederatedObjectsForPolicy, + )) + clusterPropagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnGenerationChanges( + func(obj metav1.Object) metav1.Object { return obj }, + s.enqueueFederatedObjectsForPolicy, + )) + + federatedClusterInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnChanges( + func(oldCluster, curCluster *fedcorev1a1.FederatedCluster) bool { + return !equality.Semantic.DeepEqual(oldCluster.Labels, curCluster.Labels) || + !equality.Semantic.DeepEqual(oldCluster.Spec.Taints, curCluster.Spec.Taints) || + !equality.Semantic.DeepEqual(oldCluster.Status.APIResourceTypes, curCluster.Status.APIResourceTypes) }, - UpdateFunc: func(oldUntyped, newUntyped interface{}) { - oldCluster, newCluster := oldUntyped.(*fedcorev1a1.FederatedCluster), newUntyped.(*fedcorev1a1.FederatedCluster) - if !equality.Semantic.DeepEqual(oldCluster.Labels, newCluster.Labels) || - !equality.Semantic.DeepEqual(oldCluster.Spec.Taints, newCluster.Spec.Taints) || - !equality.Semantic.DeepEqual(oldCluster.Status.APIResourceTypes, newCluster.Status.APIResourceTypes) { - s.enqueueFederatedObjectsForCluster(newCluster) - } + func(cluster *fedcorev1a1.FederatedCluster) *fedcorev1a1.FederatedCluster { + return cluster }, - }) - - s.schedulingProfileLister = schedulingProfileInformer.Lister() - s.schedulingProfileSynced = schedulingProfileInformer.Informer().HasSynced + s.enqueueFederatedObjectsForCluster, + )) s.webhookConfigurationSynced = webhookConfigurationInformer.Informer().HasSynced webhookConfigurationInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -206,6 +191,13 @@ func NewScheduler( }, }) + informerManager.AddFTCUpdateHandler(func(lastObserved, latest *fedcorev1a1.FederatedTypeConfig) { + if lastObserved == nil && latest != nil { + s.enqueueFederatedObjectsForFTC(latest) + return + } + }) + s.algorithm = core.NewSchedulerAlgorithm() return s, nil @@ -213,15 +205,15 @@ func NewScheduler( func (s *Scheduler) HasSynced() bool { cachesSynced := []cache.InformerSynced{ - s.federatedObjectSynced, - s.clusterPropagationPolicySynced, - s.clusterSynced, - s.schedulingProfileSynced, + s.fedObjectInformer.Informer().HasSynced, + s.clusterFedObjectInformer.Informer().HasSynced, + s.propagationPolicyInformer.Informer().HasSynced, + s.clusterPropagationPolicyInformer.Informer().HasSynced, + s.federatedClusterInformer.Informer().HasSynced, + s.informerManager.HasSynced, + s.schedulingProfileInformer.Informer().HasSynced, s.webhookConfigurationSynced, } - if s.typeConfig.GetNamespaced() { - cachesSynced = append(cachesSynced, s.propagationPolicySynced) - } for _, synced := range cachesSynced { if !synced() { @@ -233,61 +225,88 @@ func (s *Scheduler) HasSynced() bool { } func (s *Scheduler) Run(ctx context.Context) { - s.logger.Info("Starting controller") - defer s.logger.Info("Stopping controller") + ctx, logger := logging.InjectLogger(ctx, s.logger) - if !cache.WaitForNamedCacheSync(s.name, ctx.Done(), s.HasSynced) { + logger.Info("Starting controller") + defer logger.Info("Stopping controller") + + if !cache.WaitForNamedCacheSync(SchedulerName, ctx.Done(), s.HasSynced) { + logger.Error(nil, "Timed out waiting for cache sync") return } - s.worker.Run(ctx.Done()) + logger.Info("Caches are synced") + + s.worker.Run(ctx) <-ctx.Done() } -func (s *Scheduler) reconcile(qualifiedName common.QualifiedName) (status worker.Result) { +func (s *Scheduler) reconcile(ctx context.Context, key common.QualifiedName) (status worker.Result) { _ = s.metrics.Rate("scheduler.throughput", 1) - keyedLogger := s.logger.WithValues("origin", "reconcile", "object", qualifiedName.String()) - ctx := klog.NewContext(context.TODO(), keyedLogger) + ctx, logger := logging.InjectLoggerValues(ctx, "key", key.String()) + startTime := time.Now() - keyedLogger.V(3).Info("Start reconcile") + logger.V(3).Info("Start reconcile") defer func() { - s.metrics.Duration(fmt.Sprintf("%s.latency", s.name), startTime) - keyedLogger.V(3).WithValues("duration", time.Since(startTime), "status", status.String()).Info("Finished reconcile") + s.metrics.Duration(fmt.Sprintf("%s.latency", SchedulerName), startTime) + logger.V(3).WithValues("duration", time.Since(startTime), "status", status.String()).Info("Finished reconcile") }() - fedObject, err := s.federatedObjectFromStore(qualifiedName) + fedObject, err := fedobjectadapters.GetFromLister( + s.fedObjectInformer.Lister(), + s.clusterFedObjectInformer.Lister(), + key.Namespace, + key.Name, + ) if err != nil && !apierrors.IsNotFound(err) { - keyedLogger.Error(err, "Failed to get object from store") + logger.Error(err, "Failed to get FederatedObject from store") return worker.StatusError } if apierrors.IsNotFound(err) || fedObject.GetDeletionTimestamp() != nil { - keyedLogger.V(3).Info("Observed object deletion") + logger.V(3).Info("Observed FederatedObject deletion") return worker.StatusAllOK } - fedObject = fedObject.DeepCopy() + fedObject = fedObject.DeepCopyGenericFederatedObject() + + sourceGVK, err := fedObject.GetSpec().GetTemplateGVK() + if err != nil { + logger.Error(err, "Failed to get source GVK from FederatedObject") + return worker.StatusError + } + ctx, logger = logging.InjectLoggerValues(ctx, "source-gvk", sourceGVK) + + ftc, exists := s.informerManager.GetResourceFTC(sourceGVK) + if !exists { + logger.V(3).Info("FTC for FederatedObject source type does not exist, will skip scheduling") + return worker.StatusAllOK + } + ctx, logger = logging.InjectLoggerValues(ctx, "ftc", ftc.GetName()) - policy, clusters, schedulingProfile, earlyReturnResult := s.prepareToSchedule(ctx, fedObject) + policy, clusters, schedulingProfile, earlyReturnResult := s.prepareToSchedule(ctx, fedObject, ftc) if earlyReturnResult != nil { return *earlyReturnResult } if policy != nil { - keyedLogger = keyedLogger.WithValues("policy", common.NewQualifiedName(policy).String()) + ctx, logger = logging.InjectLoggerValues(ctx, "policy", common.NewQualifiedName(policy).String()) } if schedulingProfile != nil { - keyedLogger = keyedLogger.WithValues("schedulingProfile", common.NewQualifiedName(schedulingProfile).String()) + ctx, logger = logging.InjectLoggerValues( + ctx, + "schedulingProfile", + common.NewQualifiedName(schedulingProfile).String(), + ) } - ctx = klog.NewContext(ctx, keyedLogger) - result, earlyReturnWorkerResult := s.schedule(ctx, fedObject, policy, schedulingProfile, clusters) + result, earlyReturnWorkerResult := s.schedule(ctx, ftc, fedObject, policy, schedulingProfile, clusters) if earlyReturnWorkerResult != nil { return *earlyReturnWorkerResult } - keyedLogger = keyedLogger.WithValues("result", result.String()) - keyedLogger.V(2).Info("Scheduling result obtained") + ctx, logger = logging.InjectLoggerValues(ctx, "result", result.String()) + logger.V(2).Info("Scheduling result obtained") auxInfo := &auxiliarySchedulingInformation{ enableFollowerScheduling: false, @@ -297,49 +316,53 @@ func (s *Scheduler) reconcile(qualifiedName common.QualifiedName) (status worker spec := policy.GetSpec() auxInfo.enableFollowerScheduling = !spec.DisableFollowerScheduling - keyedLogger = keyedLogger.WithValues("enableFollowerScheduling", auxInfo.enableFollowerScheduling) + ctx, logger = logging.InjectLoggerValues(ctx, "enableFollowerScheduling", auxInfo.enableFollowerScheduling) if autoMigration := spec.AutoMigration; autoMigration != nil { auxInfo.unschedulableThreshold = pointer.Duration(autoMigration.Trigger.PodUnschedulableDuration.Duration) - keyedLogger = keyedLogger.WithValues("unschedulableThreshold", auxInfo.unschedulableThreshold.String()) + ctx, logger = logging.InjectLoggerValues( + ctx, + "unschedulableThreshold", + auxInfo.unschedulableThreshold.String(), + ) } } - ctx = klog.NewContext(ctx, keyedLogger) - return s.persistSchedulingResult(ctx, fedObject, *result, auxInfo) + return s.persistSchedulingResult(ctx, ftc, fedObject, *result, auxInfo) } func (s *Scheduler) prepareToSchedule( ctx context.Context, - fedObject *unstructured.Unstructured, + fedObject fedcorev1a1.GenericFederatedObject, + ftc *fedcorev1a1.FederatedTypeConfig, ) ( fedcorev1a1.GenericPropagationPolicy, []*fedcorev1a1.FederatedCluster, *fedcorev1a1.SchedulingProfile, *worker.Result, ) { - keyedLogger := klog.FromContext(ctx) + logger := klog.FromContext(ctx) // check pending controllers if ok, err := pendingcontrollers.ControllerDependenciesFulfilled(fedObject, PrefixedGlobalSchedulerName); err != nil { - keyedLogger.Error(err, "Failed to check controller dependencies") + logger.Error(err, "Failed to check controller dependencies") return nil, nil, nil, &worker.StatusError } else if !ok { - keyedLogger.V(3).Info("Controller dependencies not fulfilled") + logger.V(3).Info("Controller dependencies not fulfilled") return nil, nil, nil, &worker.StatusAllOK } // check whether to skip scheduling - allClusters, err := s.clusterLister.List(labels.Everything()) + allClusters, err := s.federatedClusterInformer.Lister().List(labels.Everything()) if err != nil { - keyedLogger.Error(err, "Failed to get clusters from store") + logger.Error(err, "Failed to get clusters from store") return nil, nil, nil, &worker.StatusError } clusters := make([]*fedcorev1a1.FederatedCluster, 0) for _, cluster := range allClusters { - if util.IsClusterJoined(&cluster.Status) { + if clusterutil.IsClusterJoined(&cluster.Status) { clusters = append(clusters, cluster) } } @@ -347,13 +370,13 @@ func (s *Scheduler) prepareToSchedule( var policy fedcorev1a1.GenericPropagationPolicy var schedulingProfile *fedcorev1a1.SchedulingProfile - policyKey, hasSchedulingPolicy := MatchedPolicyKey(fedObject, s.typeConfig.GetNamespaced()) + policyKey, hasSchedulingPolicy := GetMatchedPolicyKey(fedObject) if hasSchedulingPolicy { - keyedLogger = keyedLogger.WithValues("policy", policyKey.String()) + ctx, logger = logging.InjectLoggerValues(ctx, "policy", policyKey.String()) if policy, err = s.policyFromStore(policyKey); err != nil { - keyedLogger.Error(err, "Failed to find matched policy") + logger.Error(err, "Failed to find matched policy") if apierrors.IsNotFound(err) { // do not retry since the object will be reenqueued after the policy is subsequently created // emit an event to warn users that the assigned propagation policy does not exist @@ -361,7 +384,7 @@ func (s *Scheduler) prepareToSchedule( fedObject, corev1.EventTypeWarning, EventReasonScheduleFederatedObject, - "object propagation policy %s not found", + "PropagationPolicy %s not found", policyKey.String(), ) return nil, nil, nil, &worker.StatusAllOK @@ -371,15 +394,15 @@ func (s *Scheduler) prepareToSchedule( profileName := policy.GetSpec().SchedulingProfile if len(profileName) > 0 { - keyedLogger = keyedLogger.WithValues("profile", profileName) - schedulingProfile, err = s.schedulingProfileLister.Get(profileName) + ctx, logger = logging.InjectLoggerValues(ctx, "profile", profileName) + schedulingProfile, err = s.schedulingProfileInformer.Lister().Get(profileName) if err != nil { - keyedLogger.Error(err, "Failed to get scheduling profile") + logger.Error(err, "Failed to get scheduling profile") s.eventRecorder.Eventf( fedObject, corev1.EventTypeWarning, EventReasonScheduleFederatedObject, - "failed to schedule object: %v", + "Failed to schedule object: %v", fmt.Errorf("failed to get scheduling profile %s: %w", profileName, err), ) @@ -392,15 +415,15 @@ func (s *Scheduler) prepareToSchedule( } } - triggerHash, err := s.computeSchedulingTriggerHash(fedObject, policy, clusters) + triggerHash, err := s.computeSchedulingTriggerHash(ftc, fedObject, policy, clusters) if err != nil { - keyedLogger.Error(err, "Failed to compute scheduling trigger hash") + logger.Error(err, "Failed to compute scheduling trigger hash") return nil, nil, nil, &worker.StatusError } - triggersChanged, err := annotationutil.AddAnnotation(fedObject, SchedulingTriggerHashAnnotation, triggerHash) + triggersChanged, err := annotation.AddAnnotation(fedObject, SchedulingTriggerHashAnnotation, triggerHash) if err != nil { - keyedLogger.Error(err, "Failed to update scheduling trigger hash") + logger.Error(err, "Failed to update scheduling trigger hash") return nil, nil, nil, &worker.StatusError } @@ -408,11 +431,11 @@ func (s *Scheduler) prepareToSchedule( if !triggersChanged { // scheduling triggers have not changed, skip scheduling shouldSkipScheduling = true - keyedLogger.V(3).Info("Scheduling triggers not changed, skip scheduling") + logger.V(3).Info("Scheduling triggers not changed, skip scheduling") } else if len(fedObject.GetAnnotations()[common.NoSchedulingAnnotation]) > 0 { // skip scheduling if no-scheduling annotation is found shouldSkipScheduling = true - keyedLogger.V(3).Info("No-scheduling annotation found, skip scheduling") + logger.V(3).Info("no-scheduling annotation found, skip scheduling") s.eventRecorder.Eventf( fedObject, corev1.EventTypeNormal, @@ -422,14 +445,17 @@ func (s *Scheduler) prepareToSchedule( } if shouldSkipScheduling { - if updated, err := s.updatePendingControllers(fedObject, false); err != nil { - keyedLogger.Error(err, "Failed to update pending controllers") + if updated, err := s.updatePendingControllers(ftc, fedObject, false); err != nil { + logger.Error(err, "Failed to update pending controllers") return nil, nil, nil, &worker.StatusError } else if updated { - if _, err := s.federatedObjectClient.Namespace(fedObject.GetNamespace()).Update( - ctx, fedObject, metav1.UpdateOptions{}, + if _, err := fedobjectadapters.Update( + ctx, + s.fedClient.CoreV1alpha1(), + fedObject, + metav1.UpdateOptions{}, ); err != nil { - keyedLogger.Error(err, "Failed to update pending controllers") + logger.Error(err, "Failed to update pending controllers") if apierrors.IsConflict(err) { return nil, nil, nil, &worker.StatusConflict } @@ -445,46 +471,45 @@ func (s *Scheduler) prepareToSchedule( func (s *Scheduler) schedule( ctx context.Context, - fedObject *unstructured.Unstructured, + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, policy fedcorev1a1.GenericPropagationPolicy, schedulingProfile *fedcorev1a1.SchedulingProfile, clusters []*fedcorev1a1.FederatedCluster, ) (*core.ScheduleResult, *worker.Result) { - keyedLogger := klog.FromContext(ctx) + logger := klog.FromContext(ctx) if policy == nil { // deschedule the federated object if there is no policy attached - keyedLogger.V(2).Info("No policy specified, scheduling to no clusters") + logger.V(2).Info("No policy specified, scheduling to no clusters") s.eventRecorder.Eventf( fedObject, corev1.EventTypeNormal, EventReasonScheduleFederatedObject, - "no scheduling policy specified, will schedule object to no clusters", + "No scheduling policy specified, will schedule object to no clusters", ) - return &core.ScheduleResult{ - SuggestedClusters: make(map[string]*int64), - }, nil + return &core.ScheduleResult{SuggestedClusters: make(map[string]*int64)}, nil } // schedule according to matched policy - keyedLogger.V(2).Info("Matched policy found, start scheduling") + logger.V(2).Info("Matched policy found, start scheduling") s.eventRecorder.Eventf( fedObject, corev1.EventTypeNormal, EventReasonScheduleFederatedObject, - "scheduling policy %s specified, scheduling object", + "Scheduling policy %s specified, scheduling object", common.NewQualifiedName(policy).String(), ) - schedulingUnit, err := schedulingUnitForFedObject(s.typeConfig, fedObject, policy) + schedulingUnit, err := schedulingUnitForFedObject(ftc, fedObject, policy) if err != nil { - keyedLogger.Error(err, "Failed to get scheduling unit") + logger.Error(err, "Failed to get scheduling unit") s.eventRecorder.Eventf( fedObject, corev1.EventTypeWarning, EventReasonScheduleFederatedObject, - "failed to schedule object: %v", + "Failed to schedule object: %v", fmt.Errorf("failed to get scheduling unit: %w", err), ) return nil, &worker.StatusError @@ -492,27 +517,27 @@ func (s *Scheduler) schedule( framework, err := s.createFramework(schedulingProfile, s.buildFrameworkHandle()) if err != nil { - keyedLogger.Error(err, "Failed to construct scheduling profile") + logger.Error(err, "Failed to construct scheduling profile") s.eventRecorder.Eventf( fedObject, corev1.EventTypeWarning, EventReasonScheduleFederatedObject, - "failed to schedule object: %v", + "Failed to schedule object: %v", fmt.Errorf("failed to construct scheduling profile: %w", err), ) return nil, &worker.StatusError } - ctx = klog.NewContext(ctx, keyedLogger) + ctx = klog.NewContext(ctx, logger) result, err := s.algorithm.Schedule(ctx, framework, *schedulingUnit, clusters) if err != nil { - keyedLogger.Error(err, "Failed to compute scheduling result") + logger.Error(err, "Failed to compute scheduling result") s.eventRecorder.Eventf( fedObject, corev1.EventTypeWarning, EventReasonScheduleFederatedObject, - "failed to schedule object: %v", + "Failed to schedule object: %v", fmt.Errorf("failed to compute scheduling result: %w", err), ) return nil, &worker.StatusError @@ -523,15 +548,16 @@ func (s *Scheduler) schedule( func (s *Scheduler) persistSchedulingResult( ctx context.Context, - fedObject *unstructured.Unstructured, + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, result core.ScheduleResult, auxInfo *auxiliarySchedulingInformation, ) worker.Result { - keyedLogger := klog.FromContext(ctx) + logger := klog.FromContext(ctx) - schedulingResultsChanged, err := s.applySchedulingResult(fedObject, result, auxInfo) + schedulingResultsChanged, err := s.applySchedulingResult(ftc, fedObject, result, auxInfo) if err != nil { - keyedLogger.Error(err, "Failed to apply scheduling result") + logger.Error(err, "Failed to apply scheduling result") s.eventRecorder.Eventf( fedObject, corev1.EventTypeWarning, @@ -541,9 +567,9 @@ func (s *Scheduler) persistSchedulingResult( ) return worker.StatusError } - _, err = s.updatePendingControllers(fedObject, schedulingResultsChanged) + _, err = s.updatePendingControllers(ftc, fedObject, schedulingResultsChanged) if err != nil { - keyedLogger.Error(err, "Failed to update pending controllers") + logger.Error(err, "Failed to update pending controllers") s.eventRecorder.Eventf( fedObject, corev1.EventTypeWarning, @@ -556,13 +582,14 @@ func (s *Scheduler) persistSchedulingResult( // We always update the federated object because the fact that scheduling even occurred minimally implies that the // scheduling trigger hash must have changed. - keyedLogger.V(1).Info("Updating federated object") - if _, err := s.federatedObjectClient.Namespace(fedObject.GetNamespace()).Update( + logger.V(1).Info("Updating federated object") + if _, err := fedobjectadapters.Update( ctx, + s.fedClient.CoreV1alpha1(), fedObject, metav1.UpdateOptions{}, ); err != nil { - keyedLogger.Error(err, "Failed to update federated object") + logger.Error(err, "Failed to update federated object") if apierrors.IsConflict(err) { return worker.StatusConflict } @@ -576,7 +603,7 @@ func (s *Scheduler) persistSchedulingResult( return worker.StatusError } - keyedLogger.V(1).Info("Updated federated object") + logger.V(1).Info("Updated federated object") s.eventRecorder.Eventf( fedObject, corev1.EventTypeNormal, @@ -588,38 +615,19 @@ func (s *Scheduler) persistSchedulingResult( return worker.StatusAllOK } -// federatedObjectFromStore uses the given qualified name to retrieve a federated object from the scheduler's lister, it will help to -// resolve the object's scope and namespace based on the scheduler's type config. -func (s *Scheduler) federatedObjectFromStore(qualifiedName common.QualifiedName) (*unstructured.Unstructured, error) { - var obj pkgruntime.Object - var err error - - if s.typeConfig.GetNamespaced() { - obj, err = s.federatedObjectLister.ByNamespace(qualifiedName.Namespace).Get(qualifiedName.Name) - } else { - obj, err = s.federatedObjectLister.Get(qualifiedName.Name) - } - - return obj.(*unstructured.Unstructured), err -} - -// policyFromStore uses the given qualified name to retrieve a policy from the scheduler's policy listers. -func (s *Scheduler) policyFromStore(qualifiedName common.QualifiedName) (fedcorev1a1.GenericPropagationPolicy, error) { - if len(qualifiedName.Namespace) > 0 { - return s.propagationPolicyLister.PropagationPolicies(qualifiedName.Namespace).Get(qualifiedName.Name) - } - return s.clusterPropagationPolicyLister.Get(qualifiedName.Name) -} - -// updatePendingControllers removes the scheduler from the object's pending controller annotation. If wasModified is true (the scheduling -// result was not modified), it will additionally set the downstream processors to notify them to reconcile the changes made by the -// scheduler. -func (s *Scheduler) updatePendingControllers(fedObject *unstructured.Unstructured, wasModified bool) (bool, error) { +// updatePendingControllers removes the scheduler from the object's pending controller annotation. If wasModified is +// true (the scheduling result was not modified), it will additionally set the downstream processors to notify them to +// reconcile the changes made by the scheduler. +func (s *Scheduler) updatePendingControllers( + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, + wasModified bool, +) (bool, error) { return pendingcontrollers.UpdatePendingControllers( fedObject, PrefixedGlobalSchedulerName, wasModified, - s.typeConfig.GetControllers(), + ftc.GetControllers(), ) } @@ -628,37 +636,38 @@ type auxiliarySchedulingInformation struct { unschedulableThreshold *time.Duration } -// applySchedulingResult updates the federated object with the scheduling result and the enableFollowerScheduling annotation, it returns a -// bool indicating if the scheduling result has changed. +// applySchedulingResult updates the federated object with the scheduling result and the enableFollowerScheduling +// annotation, it returns a bool indicating if the scheduling result has changed. func (s *Scheduler) applySchedulingResult( - fedObject *unstructured.Unstructured, + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, result core.ScheduleResult, auxInfo *auxiliarySchedulingInformation, ) (bool, error) { objectModified := false clusterSet := result.ClusterSet() - // set placements - placementUpdated, err := util.SetPlacementClusterNames(fedObject, PrefixedGlobalSchedulerName, clusterSet) - if err != nil { - return false, err - } + // 1. Set placements + + placementUpdated := fedObject.GetSpec().SetControllerPlacement(PrefixedGlobalSchedulerName, sets.List(clusterSet)) objectModified = objectModified || placementUpdated - // set replicas overrides + // 2. Set replicas overrides + desiredOverrides := map[string]int64{} for clusterName, replicaCount := range result.SuggestedClusters { if replicaCount != nil { desiredOverrides[clusterName] = *replicaCount } } - overridesUpdated, err := UpdateReplicasOverride(s.typeConfig, fedObject, desiredOverrides) + overridesUpdated, err := UpdateReplicasOverride(ftc, fedObject, desiredOverrides) if err != nil { return false, err } objectModified = objectModified || overridesUpdated - // set annotations + // 3. Set annotations + annotations := fedObject.GetAnnotations() if annotations == nil { annotations = make(map[string]string, 2) @@ -694,3 +703,132 @@ func (s *Scheduler) applySchedulingResult( return objectModified, nil } + +func (s *Scheduler) enqueueFederatedObjectsForPolicy(policy metav1.Object) { + policyAccessor, ok := policy.(fedcorev1a1.GenericPropagationPolicy) + if !ok { + s.logger.Error( + fmt.Errorf("policy is not a valid type (%T)", policy), + "Failed to enqueue federated object for policy", + ) + return + } + + policyKey := common.NewQualifiedName(policyAccessor) + logger := s.logger.WithValues("policy", policyKey.String()) + logger.V(2).Info("Enqueue FederatedObjects and ClusterFederatedObjects for policy") + + allObjects := []metav1.Object{} + + if len(policyKey.Namespace) > 0 { + // If the policy is namespaced, we only need to scan FederatedObjects in the same namespace. + fedObjects, err := s.fedObjectInformer.Lister().FederatedObjects(policyKey.Namespace).List(labels.Everything()) + if err != nil { + s.logger.Error(err, "Failed to enqueue FederatedObjects for policy") + return + } + for _, obj := range fedObjects { + allObjects = append(allObjects, obj) + } + } else { + // If the policy is cluster-scoped, we need to scan all FederatedObjects and ClusterFederatedObjects + fedObjects, err := s.fedObjectInformer.Lister().List(labels.Everything()) + if err != nil { + s.logger.Error(err, "Failed to enqueue FederatedObjects for policy") + return + } + for _, obj := range fedObjects { + allObjects = append(allObjects, obj) + } + + clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) + if err != nil { + s.logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") + return + } + for _, obj := range clusterFedObjects { + allObjects = append(allObjects, obj) + } + } + + for _, obj := range allObjects { + if policyKey, found := GetMatchedPolicyKey(obj); !found { + continue + } else if policyKey.Name == policyAccessor.GetName() && policyKey.Namespace == policyAccessor.GetNamespace() { + s.worker.EnqueueObject(obj) + } + } +} + +func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.FederatedCluster) { + logger := s.logger.WithValues("cluster", cluster.GetName()) + + if !clusterutil.IsClusterJoined(&cluster.Status) { + s.logger.WithValues("cluster", cluster.Name). + V(3). + Info("Skip enqueue federated objects for cluster, cluster not joined") + return + } + + logger.V(2).Info("Enqueue federated objects for cluster") + + fedObjects, err := s.fedObjectInformer.Lister().List(labels.Everything()) + if err != nil { + s.logger.Error(err, "Failed to enquue FederatedObjects for policy") + return + } + for _, obj := range fedObjects { + s.worker.EnqueueObject(obj) + } + clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) + if err != nil { + s.logger.Error(err, "Failed to enquue ClusterFederatedObjects for policy") + return + } + for _, obj := range clusterFedObjects { + s.worker.EnqueueObject(obj) + } +} + +func (s *Scheduler) enqueueFederatedObjectsForFTC(ftc *fedcorev1a1.FederatedTypeConfig) { + logger := s.logger.WithValues("ftc", ftc.GetName()) + + logger.V(2).Info("Enqueue federated objects for FTC") + + allObjects := []fedcorev1a1.GenericFederatedObject{} + fedObjects, err := s.fedObjectInformer.Lister().List(labels.Everything()) + if err != nil { + s.logger.Error(err, "Failed to enquue FederatedObjects for policy") + return + } + for _, obj := range fedObjects { + allObjects = append(allObjects, obj) + } + clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) + if err != nil { + s.logger.Error(err, "Failed to enquue ClusterFederatedObjects for policy") + return + } + for _, obj := range clusterFedObjects { + allObjects = append(allObjects, obj) + } + + for _, obj := range allObjects { + sourceGVK, err := obj.GetSpec().GetTemplateGVK() + if err != nil { + s.logger.Error(err, "Failed to get source GVK from FederatedObject, will not enqueue") + continue + } + if sourceGVK == ftc.GetSourceTypeGVK() { + s.worker.EnqueueObject(obj) + } + } +} + +// policyFromStore uses the given qualified name to retrieve a policy from the scheduler's policy listers. +func (s *Scheduler) policyFromStore(qualifiedName common.QualifiedName) (fedcorev1a1.GenericPropagationPolicy, error) { + if len(qualifiedName.Namespace) > 0 { + return s.propagationPolicyInformer.Lister().PropagationPolicies(qualifiedName.Namespace).Get(qualifiedName.Name) + } + return s.clusterPropagationPolicyInformer.Lister().Get(qualifiedName.Name) +} diff --git a/pkg/controllers/scheduler/schedulingtriggers.go b/pkg/controllers/scheduler/schedulingtriggers.go index e1e1bab8..118a624d 100644 --- a/pkg/controllers/scheduler/schedulingtriggers.go +++ b/pkg/controllers/scheduler/schedulingtriggers.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. @@ -26,16 +25,12 @@ import ( "golang.org/x/exp/constraints" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/labels" - pkgruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/sets" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util" - utilunstructured "github.com/kubewharf/kubeadmiral/pkg/controllers/util/unstructured" + unstructuredutil "github.com/kubewharf/kubeadmiral/pkg/util/unstructured" ) /* @@ -105,7 +100,8 @@ type schedulingTriggers struct { } func (s *Scheduler) computeSchedulingTriggerHash( - fedObject *unstructured.Unstructured, + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, policy fedcorev1a1.GenericPropagationPolicy, clusters []*fedcorev1a1.FederatedCluster, ) (string, error) { @@ -114,7 +110,7 @@ func (s *Scheduler) computeSchedulingTriggerHash( var err error trigger.SchedulingAnnotations = getSchedulingAnnotations(fedObject) - if trigger.ReplicaCount, err = getReplicaCount(s.typeConfig, fedObject); err != nil { + if trigger.ReplicaCount, err = getReplicaCount(ftc, fedObject); err != nil { return "", fmt.Errorf("failed to get object replica count: %w", err) } trigger.ResourceRequest = getResourceRequest(fedObject) @@ -159,7 +155,7 @@ var knownSchedulingAnnotations = sets.New( FollowsObjectAnnotation, ) -func getSchedulingAnnotations(fedObject *unstructured.Unstructured) []keyValue[string, string] { +func getSchedulingAnnotations(fedObject fedcorev1a1.GenericFederatedObject) []keyValue[string, string] { annotations := fedObject.GetAnnotations() // this is a deep copy for k := range annotations { if !knownSchedulingAnnotations.Has(k) { @@ -169,16 +165,20 @@ func getSchedulingAnnotations(fedObject *unstructured.Unstructured) []keyValue[s return sortMap(annotations) } -func getReplicaCount(typeConfig *fedcorev1a1.FederatedTypeConfig, fedObject *unstructured.Unstructured) (int64, error) { - if len(typeConfig.Spec.PathDefinition.ReplicasSpec) == 0 { +func getReplicaCount( + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, +) (int64, error) { + if len(ftc.Spec.PathDefinition.ReplicasSpec) == 0 { return 0, nil } - value, err := utilunstructured.GetInt64FromPath( - fedObject, - typeConfig.Spec.PathDefinition.ReplicasSpec, - common.TemplatePath, - ) + template, err := fedObject.GetSpec().GetTemplateAsUnstructured() + if err != nil { + return 0, err + } + + value, err := unstructuredutil.GetInt64FromPath(template, ftc.Spec.PathDefinition.ReplicasSpec, nil) if err != nil || value == nil { return 0, err } @@ -186,7 +186,7 @@ func getReplicaCount(typeConfig *fedcorev1a1.FederatedTypeConfig, fedObject *uns return *value, nil } -func getResourceRequest(fedObject *unstructured.Unstructured) framework.Resource { +func getResourceRequest(fedObject fedcorev1a1.GenericFederatedObject) framework.Resource { // TODO: update once we have a proper way to obtian resource request from federated objects return framework.Resource{} } @@ -231,7 +231,9 @@ func getClusterTaints(clusters []*fedcorev1a1.FederatedCluster) []keyValue[strin return sortMap(ret) } -func getClusterAPIResourceTypes(clusters []*fedcorev1a1.FederatedCluster) []keyValue[string, []fedcorev1a1.APIResource] { +func getClusterAPIResourceTypes( + clusters []*fedcorev1a1.FederatedCluster, +) []keyValue[string, []fedcorev1a1.APIResource] { ret := make(map[string][]fedcorev1a1.APIResource) for _, cluster := range clusters { @@ -261,53 +263,3 @@ func getClusterAPIResourceTypes(clusters []*fedcorev1a1.FederatedCluster) []keyV } return sortMap(ret) } - -// enqueueFederatedObjectsForPolicy enqueues federated objects which match the policy -func (s *Scheduler) enqueueFederatedObjectsForPolicy(policy pkgruntime.Object) { - policyAccessor, ok := policy.(fedcorev1a1.GenericPropagationPolicy) - if !ok { - s.logger.Error(fmt.Errorf("policy is not a valid type (%T)", policy), "Failed to enqueue federated object for policy") - return - } - - s.logger.WithValues("policy", policyAccessor.GetName()).V(2).Info("Enqueue federated objects for policy") - - fedObjects, err := s.federatedObjectLister.List(labels.Everything()) - if err != nil { - s.logger.WithValues("policy", policyAccessor.GetName()).Error(err, "Failed to enqueue federated objects for policy") - return - } - - for _, fedObject := range fedObjects { - fedObject := fedObject.(*unstructured.Unstructured) - policyKey, found := MatchedPolicyKey(fedObject, s.typeConfig.GetNamespaced()) - if !found { - continue - } - - if policyKey.Name == policyAccessor.GetName() && policyKey.Namespace == policyAccessor.GetNamespace() { - s.worker.EnqueueObject(fedObject) - } - } -} - -// enqueueFederatedObjectsForCluster enqueues all federated objects only if the cluster is joined -func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster pkgruntime.Object) { - clusterObj := cluster.(*fedcorev1a1.FederatedCluster) - if !util.IsClusterJoined(&clusterObj.Status) { - s.logger.WithValues("cluster", clusterObj.Name).V(3).Info("Skip enqueue federated objects for cluster, cluster not joined") - return - } - - s.logger.WithValues("cluster", clusterObj.Name).V(2).Info("Enqueue federated objects for cluster") - - fedObjects, err := s.federatedObjectLister.List(labels.Everything()) - if err != nil { - s.logger.Error(err, "Failed to enqueue federated object for cluster") - return - } - - for _, fedObject := range fedObjects { - s.worker.EnqueueObject(fedObject) - } -} diff --git a/pkg/controllers/scheduler/schedulingunit.go b/pkg/controllers/scheduler/schedulingunit.go index 6e2f3cb6..a69fc87f 100644 --- a/pkg/controllers/scheduler/schedulingunit.go +++ b/pkg/controllers/scheduler/schedulingunit.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. @@ -23,27 +22,23 @@ import ( "strconv" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/klog/v2" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util" - utilunstructured "github.com/kubewharf/kubeadmiral/pkg/controllers/util/unstructured" + unstructuredutil "github.com/kubewharf/kubeadmiral/pkg/util/unstructured" ) func schedulingUnitForFedObject( typeConfig *fedcorev1a1.FederatedTypeConfig, - fedObject *unstructured.Unstructured, + fedObject fedcorev1a1.GenericFederatedObject, policy fedcorev1a1.GenericPropagationPolicy, ) (*framework.SchedulingUnit, error) { - template, err := getTemplate(fedObject) + template, err := fedObject.GetSpec().GetTemplateAsUnstructured() if err != nil { - return nil, fmt.Errorf("error retrieving template: %w", err) + return nil, fmt.Errorf("error retrieving template from FederatedObject: %w", err) } schedulingMode := getSchedulingModeFromPolicy(policy) @@ -58,11 +53,7 @@ func schedulingUnitForFedObject( schedulingMode = fedcorev1a1.SchedulingModeDuplicate } if schedulingMode == fedcorev1a1.SchedulingModeDivide { - value, err := utilunstructured.GetInt64FromPath( - fedObject, - typeConfig.Spec.PathDefinition.ReplicasSpec, - common.TemplatePath, - ) + value, err := unstructuredutil.GetInt64FromPath(template, typeConfig.Spec.PathDefinition.ReplicasSpec, nil) if err != nil { return nil, err } @@ -75,11 +66,11 @@ func schedulingUnitForFedObject( return nil, err } - targetType := typeConfig.GetTargetType() + sourceType := typeConfig.GetSourceType() schedulingUnit := &framework.SchedulingUnit{ - GroupVersion: schema.GroupVersion{Group: targetType.Group, Version: targetType.Version}, - Kind: targetType.Kind, - Resource: targetType.Name, + GroupVersion: schema.GroupVersion{Group: sourceType.Group, Version: sourceType.Version}, + Kind: sourceType.Kind, + Resource: sourceType.Name, Namespace: template.GetNamespace(), Name: template.GetName(), Labels: template.GetLabels(), @@ -163,57 +154,33 @@ func schedulingUnitForFedObject( return schedulingUnit, nil } -func getTemplate(fedObject *unstructured.Unstructured) (*metav1.PartialObjectMetadata, error) { - templateContent, exists, err := unstructured.NestedMap(fedObject.Object, common.TemplatePath...) - if err != nil { - return nil, fmt.Errorf("error retrieving template: %w", err) - } - if !exists { - return nil, fmt.Errorf("template not found") - } - obj := metav1.PartialObjectMetadata{} - err = runtime.DefaultUnstructuredConverter.FromUnstructured(templateContent, &obj) - if err != nil { - return nil, fmt.Errorf("template cannot be converted from unstructured: %w", err) - } - return &obj, nil -} - func getCurrentReplicasFromObject( - typeConfig *fedcorev1a1.FederatedTypeConfig, - object *unstructured.Unstructured, + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, ) (map[string]*int64, error) { - placementObj, err := util.UnmarshalGenericPlacements(object) - if err != nil { - return nil, err - } + placements := fedObject.GetSpec().GetControllerPlacement(PrefixedGlobalSchedulerName) + overrides := fedObject.GetSpec().GetControllerOverrides(PrefixedGlobalSchedulerName) - var clusterNames map[string]struct{} - if placement := placementObj.Spec.GetPlacementOrNil(PrefixedGlobalSchedulerName); placement != nil { - clusterNames = placement.ClusterNames() - } + res := make(map[string]*int64, len(placements)) - clusterOverridesMap, err := util.GetOverrides(object, PrefixedGlobalSchedulerName) - if err != nil { - return nil, err + for _, placement := range placements { + res[placement.Cluster] = nil } - res := make(map[string]*int64, len(clusterNames)) - for cluster := range clusterNames { - res[cluster] = nil + replicasPath := unstructuredutil.ToSlashPath(ftc.Spec.PathDefinition.ReplicasSpec) - clusterOverrides, exists := clusterOverridesMap[cluster] - if !exists { + for _, override := range overrides { + if _, ok := res[override.Cluster]; !ok { continue } - for _, override := range clusterOverrides { - if override.Path == utilunstructured.ToSlashPath(typeConfig.Spec.PathDefinition.ReplicasSpec) && - (override.Op == operationReplace || override.Op == "") { - // The type of the value will be float64 due to how json - // marshalling works for interfaces. - replicas := int64(override.Value.(float64)) - res[cluster] = &replicas + for _, patch := range override.Patches { + if patch.Op == replicasPath && (patch.Op == overridePatchOpReplace || patch.Op == "") { + var replicas *int64 + if err := json.Unmarshal(patch.Value.Raw, replicas); err != nil { + continue + } + res[override.Cluster] = replicas break } } @@ -232,8 +199,8 @@ func getSchedulingModeFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) fe return DefaultSchedulingMode } -func getSchedulingModeFromObject(object *unstructured.Unstructured) (fedcorev1a1.SchedulingMode, bool) { - annotations := object.GetAnnotations() +func getSchedulingModeFromObject(fedObject fedcorev1a1.GenericFederatedObject) (fedcorev1a1.SchedulingMode, bool) { + annotations := fedObject.GetAnnotations() if annotations == nil { return "", false } @@ -254,12 +221,12 @@ func getSchedulingModeFromObject(object *unstructured.Unstructured) (fedcorev1a1 "Invalid value %s for scheduling mode annotation (%s) on fed object %s", annotation, SchedulingModeAnnotation, - object.GetName(), + fedObject.GetName(), ) return "", false } -func getAutoMigrationInfo(fedObject *unstructured.Unstructured) (*framework.AutoMigrationInfo, error) { +func getAutoMigrationInfo(fedObject fedcorev1a1.GenericFederatedObject) (*framework.AutoMigrationInfo, error) { value, exists := fedObject.GetAnnotations()[common.AutoMigrationInfoAnnotation] if !exists { return nil, nil @@ -276,7 +243,7 @@ func getIsStickyClusterFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) b return policy.GetSpec().StickyCluster } -func getIsStickyClusterFromObject(object *unstructured.Unstructured) (bool, bool) { +func getIsStickyClusterFromObject(object fedcorev1a1.GenericFederatedObject) (bool, bool) { // TODO: consider passing in the annotations directly to prevent incurring a deep copy for each call annotations := object.GetAnnotations() if annotations == nil { @@ -308,7 +275,7 @@ func getClusterSelectorFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) m return policy.GetSpec().ClusterSelector } -func getClusterSelectorFromObject(object *unstructured.Unstructured) (map[string]string, bool) { +func getClusterSelectorFromObject(object fedcorev1a1.GenericFederatedObject) (map[string]string, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false @@ -351,7 +318,7 @@ func getAffinityFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) *framewo return affinity } -func getAffinityFromObject(object *unstructured.Unstructured) (*framework.Affinity, bool) { +func getAffinityFromObject(object fedcorev1a1.GenericFederatedObject) (*framework.Affinity, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false @@ -381,7 +348,7 @@ func getTolerationsFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) []cor return policy.GetSpec().Tolerations } -func getTolerationsFromObject(object *unstructured.Unstructured) ([]corev1.Toleration, bool) { +func getTolerationsFromObject(object fedcorev1a1.GenericFederatedObject) ([]corev1.Toleration, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false @@ -411,7 +378,7 @@ func getMaxClustersFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) *int6 return policy.GetSpec().MaxClusters } -func getMaxClustersFromObject(object *unstructured.Unstructured) (*int64, bool) { +func getMaxClustersFromObject(object fedcorev1a1.GenericFederatedObject) (*int64, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false @@ -463,7 +430,7 @@ func getWeightsFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) map[strin return weights } -func getWeightsFromObject(object *unstructured.Unstructured) (map[string]int64, bool) { +func getWeightsFromObject(object fedcorev1a1.GenericFederatedObject) (map[string]int64, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false @@ -474,7 +441,7 @@ func getWeightsFromObject(object *unstructured.Unstructured) (map[string]int64, return nil, false } - var placements []fedcorev1a1.ClusterReference + var placements []fedcorev1a1.DesiredPlacement err := json.Unmarshal([]byte(annotation), &placements) if err != nil { klog.Errorf( @@ -521,7 +488,7 @@ func getMinReplicasFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) map[s return minReplicas } -func getMinReplicasFromObject(object *unstructured.Unstructured) (map[string]int64, bool) { +func getMinReplicasFromObject(object fedcorev1a1.GenericFederatedObject) (map[string]int64, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false @@ -532,7 +499,7 @@ func getMinReplicasFromObject(object *unstructured.Unstructured) (map[string]int return nil, false } - var placements []fedcorev1a1.ClusterReference + var placements []fedcorev1a1.DesiredPlacement err := json.Unmarshal([]byte(annotation), &placements) if err != nil { klog.Errorf( @@ -579,7 +546,7 @@ func getMaxReplicasFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) map[s return maxReplicas } -func getMaxReplicasFromObject(object *unstructured.Unstructured) (map[string]int64, bool) { +func getMaxReplicasFromObject(object fedcorev1a1.GenericFederatedObject) (map[string]int64, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false @@ -590,7 +557,7 @@ func getMaxReplicasFromObject(object *unstructured.Unstructured) (map[string]int return nil, false } - var placements []fedcorev1a1.ClusterReference + var placements []fedcorev1a1.DesiredPlacement err := json.Unmarshal([]byte(annotation), &placements) if err != nil { klog.Errorf( @@ -637,7 +604,7 @@ func getClusterNamesFromPolicy(policy fedcorev1a1.GenericPropagationPolicy) map[ return clusterNames } -func getClusterNamesFromObject(object *unstructured.Unstructured) (map[string]struct{}, bool) { +func getClusterNamesFromObject(object fedcorev1a1.GenericFederatedObject) (map[string]struct{}, bool) { annotations := object.GetAnnotations() if annotations == nil { return nil, false diff --git a/pkg/controllers/scheduler/util.go b/pkg/controllers/scheduler/util.go index 2a2b69d9..0bd63424 100644 --- a/pkg/controllers/scheduler/util.go +++ b/pkg/controllers/scheduler/util.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. @@ -18,170 +17,64 @@ limitations under the License. package scheduler import ( - "sync" + "fmt" - "github.com/pkg/errors" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/client-go/dynamic" - - fedtypesv1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/types/v1alpha1" + v1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/json" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util" - utilunstructured "github.com/kubewharf/kubeadmiral/pkg/controllers/util/unstructured" + "github.com/kubewharf/kubeadmiral/pkg/util/unstructured" ) const ( - operationReplace = "replace" + overridePatchOpReplace = "replace" ) -func MatchedPolicyKey(obj fedcorev1a1.GenericFederatedObject, isNamespaced bool) (result common.QualifiedName, ok bool) { +func GetMatchedPolicyKey(obj metav1.Object) (result common.QualifiedName, ok bool) { labels := obj.GetLabels() + isNamespaced := len(obj.GetNamespace()) > 0 - if policyName, exists := labels[PropagationPolicyNameLabel]; exists && isNamespaced { + if policyName, exists := labels[common.PropagationPolicyNameLabel]; exists && isNamespaced { return common.QualifiedName{Namespace: obj.GetNamespace(), Name: policyName}, true } - if policyName, exists := labels[ClusterPropagationPolicyNameLabel]; exists { + if policyName, exists := labels[common.ClusterPropagationPolicyNameLabel]; exists { return common.QualifiedName{Namespace: "", Name: policyName}, true } return common.QualifiedName{}, false } -type ClusterClients struct { - clients sync.Map -} - -func (c *ClusterClients) Get(cluster string) dynamic.Interface { - val, ok := c.clients.Load(cluster) - if ok { - return val.(dynamic.Interface) - } - return nil -} - -func (c *ClusterClients) Set(cluster string, client dynamic.Interface) { - c.clients.Store(cluster, client) -} - -func (c *ClusterClients) Delete(cluster string) { - c.clients.Delete(cluster) -} - func UpdateReplicasOverride( - typeConfig *fedcorev1a1.FederatedTypeConfig, - fedObject *unstructured.Unstructured, + ftc *fedcorev1a1.FederatedTypeConfig, + fedObject fedcorev1a1.GenericFederatedObject, result map[string]int64, ) (updated bool, err error) { - overridesMap, err := util.GetOverrides(fedObject, PrefixedGlobalSchedulerName) - if err != nil { - return updated, errors.Wrapf( - err, - "Error reading cluster overrides for %s/%s", - fedObject.GetNamespace(), - fedObject.GetName(), - ) - } + replicasPath := unstructured.ToSlashPath(ftc.Spec.PathDefinition.ReplicasSpec) - if OverrideUpdateNeeded(typeConfig, overridesMap, result) { - err := setOverrides(typeConfig, fedObject, overridesMap, result) + newOverrides := []fedcorev1a1.ClusterReferenceWithPatches{} + for cluster, replicas := range result { + replicasRaw, err := json.Marshal(replicas) if err != nil { - return updated, err + return false, fmt.Errorf("failed to marshal replicas value: %w", err) } - updated = true - } - return updated, nil -} - -func setOverrides( - typeConfig *fedcorev1a1.FederatedTypeConfig, - obj *unstructured.Unstructured, - overridesMap util.OverridesMap, - replicasMap map[string]int64, -) error { - if overridesMap == nil { - overridesMap = make(util.OverridesMap) - } - updateOverridesMap(typeConfig, overridesMap, replicasMap) - return util.SetOverrides(obj, PrefixedGlobalSchedulerName, overridesMap) -} - -func updateOverridesMap( - typeConfig *fedcorev1a1.FederatedTypeConfig, - overridesMap util.OverridesMap, - replicasMap map[string]int64, -) { - replicasPath := utilunstructured.ToSlashPath(typeConfig.Spec.PathDefinition.ReplicasSpec) - - // Remove replicas override for clusters that are not scheduled - for clusterName, clusterOverrides := range overridesMap { - if _, ok := replicasMap[clusterName]; !ok { - for i, overrideItem := range clusterOverrides { - if overrideItem.Path == replicasPath { - clusterOverrides = append(clusterOverrides[:i], clusterOverrides[i+1:]...) - if len(clusterOverrides) == 0 { - // delete empty ClusterOverrides item - delete(overridesMap, clusterName) - } else { - overridesMap[clusterName] = clusterOverrides - } - break - } - } - } - } - // Add/update replicas override for clusters that are scheduled - for clusterName, replicas := range replicasMap { - replicasOverrideFound := false - for idx, overrideItem := range overridesMap[clusterName] { - if overrideItem.Path == replicasPath { - overridesMap[clusterName][idx].Value = replicas - replicasOverrideFound = true - break - } - } - if !replicasOverrideFound { - clusterOverrides, exist := overridesMap[clusterName] - if !exist { - clusterOverrides = fedtypesv1a1.OverridePatches{} - } - clusterOverrides = append(clusterOverrides, fedtypesv1a1.OverridePatch{Path: replicasPath, Value: replicas}) - overridesMap[clusterName] = clusterOverrides + override := fedcorev1a1.ClusterReferenceWithPatches{ + Cluster: cluster, + Patches: fedcorev1a1.OverridePatches{ + { + Op: overridePatchOpReplace, + Path: replicasPath, + Value: v1.JSON{ + Raw: replicasRaw, + }, + }, + }, } + newOverrides = append(newOverrides, override) } -} - -func OverrideUpdateNeeded( - typeConfig *fedcorev1a1.FederatedTypeConfig, - overridesMap util.OverridesMap, - result map[string]int64, -) bool { - resultLen := len(result) - checkLen := 0 - for clusterName, clusterOverridesMap := range overridesMap { - for _, overrideItem := range clusterOverridesMap { - path := overrideItem.Path - rawValue := overrideItem.Value - if path != utilunstructured.ToSlashPath(typeConfig.Spec.PathDefinition.ReplicasSpec) { - continue - } - // The type of the value will be float64 due to how json - // marshalling works for interfaces. - floatValue, ok := rawValue.(float64) - if !ok { - return true - } - value := int64(floatValue) - replicas, ok := result[clusterName] - if !ok || value != replicas { - return true - } - checkLen += 1 - } - } - - return checkLen != resultLen + updated = fedObject.GetSpec().SetControllerOverrides(PrefixedGlobalSchedulerName, newOverrides) + return updated, nil } diff --git a/pkg/controllers/scheduler/util_test.go b/pkg/controllers/scheduler/util_test.go index 2073c9de..88f68b9b 100644 --- a/pkg/controllers/scheduler/util_test.go +++ b/pkg/controllers/scheduler/util_test.go @@ -94,7 +94,7 @@ func TestMatchedPolicyKey(t *testing.T) { } object.SetLabels(labels) - policy, found := MatchedPolicyKey(object, object.GetNamespace() != "") + policy, found := GetMatchedPolicyKey(object, object.GetNamespace() != "") if found != testCase.expectedPolicyFound { t.Fatalf("found = %v, but expectedPolicyFound = %v", found, testCase.expectedPolicyFound) } diff --git a/pkg/controllers/scheduler/webhook.go b/pkg/controllers/scheduler/webhook.go index ee978c51..f287d206 100644 --- a/pkg/controllers/scheduler/webhook.go +++ b/pkg/controllers/scheduler/webhook.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. diff --git a/pkg/controllers/util/clusterselector/util.go b/pkg/util/clusterselector/util.go similarity index 100% rename from pkg/controllers/util/clusterselector/util.go rename to pkg/util/clusterselector/util.go diff --git a/pkg/util/eventhandlers/eventhandler.go b/pkg/util/eventhandlers/eventhandler.go index 1e2cdf5a..f8e5046a 100644 --- a/pkg/util/eventhandlers/eventhandler.go +++ b/pkg/util/eventhandlers/eventhandler.go @@ -19,6 +19,7 @@ package eventhandlers import ( "reflect" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" ) @@ -85,3 +86,37 @@ func NewTriggerOnChanges[Source any, Key any]( }, } } + +// NewTriggerOnGenerationChanges returns a cache.ResourceEventHandlerFuncs that will call the given triggerFunc on +// object generation changes. The object is first transformed with the given keyFunc. triggerFunc is also called for add +// and delete events. +func NewTriggerOnGenerationChanges[Source any, Key any]( + keyFunc func(Source) Key, + triggerFunc func(Key), +) *cache.ResourceEventHandlerFuncs { + return &cache.ResourceEventHandlerFuncs{ + DeleteFunc: func(old interface{}) { + if deleted, ok := old.(cache.DeletedFinalStateUnknown); ok { + // This object might be stale but ok for our current usage. + old = deleted.Obj + if old == nil { + return + } + } + oldObj := old.(Source) + triggerFunc(keyFunc(oldObj)) + }, + AddFunc: func(cur interface{}) { + curObj := cur.(Source) + triggerFunc(keyFunc(curObj)) + }, + UpdateFunc: func(old, cur interface{}) { + oldObj := old.(metav1.Object) + curObj := cur.(metav1.Object) + + if oldObj.GetGeneration() != curObj.GetGeneration() { + triggerFunc(keyFunc(cur.(Source))) + } + }, + } +} diff --git a/pkg/util/fedobjectadapters/adapters.go b/pkg/util/fedobjectadapters/adapters.go index c7761adf..d4c7c945 100644 --- a/pkg/util/fedobjectadapters/adapters.go +++ b/pkg/util/fedobjectadapters/adapters.go @@ -10,7 +10,6 @@ import ( fedcorev1a1client "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned/typed/core/v1alpha1" fedcorev1a1listers "github.com/kubewharf/kubeadmiral/pkg/client/listers/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" - "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler" ) func ensureNilInterface( @@ -127,11 +126,11 @@ func Delete( func MatchedPolicyKey(obj fedcorev1a1.GenericFederatedObject, isNamespaced bool) (result common.QualifiedName, ok bool) { labels := obj.GetLabels() - if policyName, exists := labels[scheduler.PropagationPolicyNameLabel]; exists && isNamespaced { + if policyName, exists := labels[common.PropagationPolicyNameLabel]; exists && isNamespaced { return common.QualifiedName{Namespace: obj.GetNamespace(), Name: policyName}, true } - if policyName, exists := labels[scheduler.ClusterPropagationPolicyNameLabel]; exists { + if policyName, exists := labels[common.ClusterPropagationPolicyNameLabel]; exists { return common.QualifiedName{Namespace: "", Name: policyName}, true } diff --git a/pkg/controllers/util/unstructured/unstructured.go b/pkg/util/unstructured/unstructured.go similarity index 98% rename from pkg/controllers/util/unstructured/unstructured.go rename to pkg/util/unstructured/unstructured.go index f1ab977f..de9de421 100644 --- a/pkg/controllers/util/unstructured/unstructured.go +++ b/pkg/util/unstructured/unstructured.go @@ -1,4 +1,3 @@ -//go:build exclude /* Copyright 2023 The KubeAdmiral Authors. @@ -15,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package utilunstructured +package unstructured import ( "fmt" From 89c37206b63c2d84aacdff1f3bd4f55f744be991 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Sat, 22 Jul 2023 20:19:56 +0800 Subject: [PATCH 06/22] fix(scheduler): trigger schedulingprofile informer start --- pkg/controllers/federate/controller.go | 2 -- pkg/controllers/scheduler/scheduler.go | 6 ++++-- pkg/controllers/scheduler/schedulingtriggers.go | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pkg/controllers/federate/controller.go b/pkg/controllers/federate/controller.go index bf6ea4a0..63eb2c94 100644 --- a/pkg/controllers/federate/controller.go +++ b/pkg/controllers/federate/controller.go @@ -408,8 +408,6 @@ func (c *FederateController) reconcile(ctx context.Context, key workerKey) (stat "Federated object updated: %s", fedObject.GetName(), ) - } else { - logger.V(3).Info("No updates required to the federated object") } return worker.StatusAllOK diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index 0c4a2645..cf43b8f2 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -165,7 +165,8 @@ func NewScheduler( s.enqueueFederatedObjectsForCluster, )) - s.webhookConfigurationSynced = webhookConfigurationInformer.Informer().HasSynced + schedulingProfileInformer.Informer() + webhookConfigurationInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { s.cacheWebhookPlugin(obj.(*fedcorev1a1.SchedulerPluginWebhookConfiguration)) @@ -190,6 +191,7 @@ func NewScheduler( s.webhookPlugins.Delete(obj.(*fedcorev1a1.SchedulerPluginWebhookConfiguration).Name) }, }) + s.webhookConfigurationSynced = webhookConfigurationInformer.Informer().HasSynced informerManager.AddFTCUpdateHandler(func(lastObserved, latest *fedcorev1a1.FederatedTypeConfig) { if lastObserved == nil && latest != nil { @@ -603,7 +605,7 @@ func (s *Scheduler) persistSchedulingResult( return worker.StatusError } - logger.V(1).Info("Updated federated object") + logger.V(1).Info("Scheduling success") s.eventRecorder.Eventf( fedObject, corev1.EventTypeNormal, diff --git a/pkg/controllers/scheduler/schedulingtriggers.go b/pkg/controllers/scheduler/schedulingtriggers.go index 118a624d..db00812b 100644 --- a/pkg/controllers/scheduler/schedulingtriggers.go +++ b/pkg/controllers/scheduler/schedulingtriggers.go @@ -156,13 +156,13 @@ var knownSchedulingAnnotations = sets.New( ) func getSchedulingAnnotations(fedObject fedcorev1a1.GenericFederatedObject) []keyValue[string, string] { - annotations := fedObject.GetAnnotations() // this is a deep copy - for k := range annotations { - if !knownSchedulingAnnotations.Has(k) { - delete(annotations, k) + result := map[string]string{} + for k, v := range fedObject.GetAnnotations() { + if knownSchedulingAnnotations.Has(k) { + result[k] = v } } - return sortMap(annotations) + return sortMap(result) } func getReplicaCount( From 937a2bd0ab991dd9108cf6f60a9aeb1cb448a01a Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 13:06:05 +0800 Subject: [PATCH 07/22] refactor(eventhandlers): refactor event handlers --- pkg/controllers/federate/controller.go | 13 +--- .../federatedcluster/controller.go | 5 +- pkg/controllers/nsautoprop/controller.go | 3 - pkg/controllers/policyrc/controller.go | 32 ++++----- pkg/controllers/scheduler/scheduler.go | 21 +++--- pkg/controllers/status/controller.go | 19 ++--- pkg/util/eventhandlers/eventhandler.go | 72 ++++++++++++------- 7 files changed, 82 insertions(+), 83 deletions(-) diff --git a/pkg/controllers/federate/controller.go b/pkg/controllers/federate/controller.go index 63eb2c94..b965ea92 100644 --- a/pkg/controllers/federate/controller.go +++ b/pkg/controllers/federate/controller.go @@ -130,14 +130,13 @@ func NewFederateController( return uns.GetNamespace() != fedSystemNamespace }, Handler: eventhandlers.NewTriggerOnAllChanges( - func(uns *unstructured.Unstructured) workerKey { - return workerKey{ + func(uns *unstructured.Unstructured) { + c.worker.Enqueue(workerKey{ name: uns.GetName(), namespace: uns.GetNamespace(), gvk: ftc.GetSourceTypeGVK(), - } + }) }, - c.worker.Enqueue, ), } }, @@ -154,9 +153,6 @@ func NewFederateController( return fedObj.Namespace != fedSystemNamespace }, Handler: eventhandlers.NewTriggerOnAllChanges( - func(fedObj *fedcorev1a1.FederatedObject) *fedcorev1a1.FederatedObject { - return fedObj - }, func(fedObj *fedcorev1a1.FederatedObject) { srcMeta, err := fedObj.Spec.GetTemplateAsUnstructured() if err != nil { @@ -183,9 +179,6 @@ func NewFederateController( if _, err := clusterFedObjectInformer.Informer().AddEventHandler( eventhandlers.NewTriggerOnAllChanges( - func(fedObj *fedcorev1a1.ClusterFederatedObject) *fedcorev1a1.ClusterFederatedObject { - return fedObj - }, func(fedObj *fedcorev1a1.ClusterFederatedObject) { srcMeta, err := fedObj.Spec.GetTemplateAsUnstructured() if err != nil { diff --git a/pkg/controllers/federatedcluster/controller.go b/pkg/controllers/federatedcluster/controller.go index 9be0041d..78abd27b 100644 --- a/pkg/controllers/federatedcluster/controller.go +++ b/pkg/controllers/federatedcluster/controller.go @@ -155,8 +155,9 @@ func NewFederatedClusterController( } return false }, - common.NewQualifiedName, - c.worker.Enqueue, + func(cluster metav1.Object) { + c.worker.Enqueue(common.NewQualifiedName(cluster)) + }, )) return c, nil diff --git a/pkg/controllers/nsautoprop/controller.go b/pkg/controllers/nsautoprop/controller.go index ed138372..cc929855 100644 --- a/pkg/controllers/nsautoprop/controller.go +++ b/pkg/controllers/nsautoprop/controller.go @@ -133,9 +133,6 @@ func NewNamespaceAutoPropagationController( if _, err := c.clusterFedObjectInformer.Informer().AddEventHandlerWithResyncPeriod( eventhandlers.NewTriggerOnAllChanges( - func(obj *fedcorev1a1.ClusterFederatedObject) *fedcorev1a1.ClusterFederatedObject { - return obj - }, func(obj *fedcorev1a1.ClusterFederatedObject) { srcMeta, err := obj.Spec.GetTemplateAsUnstructured() if err != nil { diff --git a/pkg/controllers/policyrc/controller.go b/pkg/controllers/policyrc/controller.go index 869ef57e..04571969 100644 --- a/pkg/controllers/policyrc/controller.go +++ b/pkg/controllers/policyrc/controller.go @@ -82,14 +82,14 @@ func NewPolicyRCController( logger: logger.WithValues("controller", ControllerName), } - if _, err := c.fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + if _, err := c.fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( common.NewQualifiedName, c.countWorker.Enqueue, )); err != nil { return nil, err } - if _, err := c.clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + if _, err := c.clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( common.NewQualifiedName, c.countWorker.Enqueue, )); err != nil { @@ -141,31 +141,27 @@ func NewPolicyRCController( metrics, ) - if _, err := c.propagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( - common.NewQualifiedName, - c.persistPpWorker.Enqueue, - )); err != nil { + if _, err := c.propagationPolicyInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnAllChangesWithTransform(common.NewQualifiedName, c.persistPpWorker.Enqueue), + ); err != nil { return nil, err } - if _, err := c.clusterPropagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( - common.NewQualifiedName, - c.persistPpWorker.Enqueue, - )); err != nil { + if _, err := c.clusterPropagationPolicyInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnAllChangesWithTransform(common.NewQualifiedName, c.persistPpWorker.Enqueue), + ); err != nil { return nil, err } - if _, err := c.overridePolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( - common.NewQualifiedName, - c.persistOpWorker.Enqueue, - )); err != nil { + if _, err := c.overridePolicyInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnAllChangesWithTransform(common.NewQualifiedName, c.persistOpWorker.Enqueue), + ); err != nil { return nil, err } - if _, err := c.clusterOverridePolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( - common.NewQualifiedName, - c.persistPpWorker.Enqueue, - )); err != nil { + if _, err := c.clusterOverridePolicyInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnAllChangesWithTransform(common.NewQualifiedName, c.persistPpWorker.Enqueue), + ); err != nil { return nil, err } diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index cf43b8f2..b7076747 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -135,23 +135,21 @@ func NewScheduler( metrics, ) - fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( common.NewQualifiedName, s.worker.Enqueue, )) - clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( + clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( common.NewQualifiedName, s.worker.Enqueue, )) - propagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnGenerationChanges( - func(obj metav1.Object) metav1.Object { return obj }, - s.enqueueFederatedObjectsForPolicy, - )) - clusterPropagationPolicyInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnGenerationChanges( - func(obj metav1.Object) metav1.Object { return obj }, - s.enqueueFederatedObjectsForPolicy, - )) + propagationPolicyInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnGenerationChanges(s.enqueueFederatedObjectsForPolicy), + ) + clusterPropagationPolicyInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnGenerationChanges(s.enqueueFederatedObjectsForPolicy), + ) federatedClusterInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnChanges( func(oldCluster, curCluster *fedcorev1a1.FederatedCluster) bool { @@ -159,9 +157,6 @@ func NewScheduler( !equality.Semantic.DeepEqual(oldCluster.Spec.Taints, curCluster.Spec.Taints) || !equality.Semantic.DeepEqual(oldCluster.Status.APIResourceTypes, curCluster.Status.APIResourceTypes) }, - func(cluster *fedcorev1a1.FederatedCluster) *fedcorev1a1.FederatedCluster { - return cluster - }, s.enqueueFederatedObjectsForCluster, )) diff --git a/pkg/controllers/status/controller.go b/pkg/controllers/status/controller.go index 15959145..6f3a7838 100644 --- a/pkg/controllers/status/controller.go +++ b/pkg/controllers/status/controller.go @@ -152,7 +152,7 @@ func NewStatusController( // Build queue for triggering cluster reconciliations. s.clusterQueue = workqueue.NewNamedDelayingQueue("status-controller-cluster-queue") - fedObjectHandler := eventhandlers.NewTriggerOnAllChanges( + fedObjectHandler := eventhandlers.NewTriggerOnAllChangesWithTransform( common.NewQualifiedName, func(key common.QualifiedName) { s.enqueueEnableCollectedStatusObject(key, 0) @@ -167,17 +167,15 @@ func NewStatusController( return nil, err } - if _, err := s.collectedStatusInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( - common.NewQualifiedName, - s.worker.Enqueue, - )); err != nil { + if _, err := s.collectedStatusInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnAllChangesWithTransform(common.NewQualifiedName, s.worker.Enqueue), + ); err != nil { return nil, err } - if _, err := s.clusterCollectedStatusInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChanges( - common.NewQualifiedName, - s.worker.Enqueue, - )); err != nil { + if _, err := s.clusterCollectedStatusInformer.Informer().AddEventHandler( + eventhandlers.NewTriggerOnAllChangesWithTransform(common.NewQualifiedName, s.worker.Enqueue), + ); err != nil { return nil, err } @@ -194,9 +192,6 @@ func NewStatusController( } return eventhandlers.NewTriggerOnAllChanges( - func(uns *unstructured.Unstructured) *unstructured.Unstructured { - return uns - }, func(uns *unstructured.Unstructured) { ftc, exists := s.ftcManager.GetResourceFTC(uns.GroupVersionKind()) if !exists { diff --git a/pkg/util/eventhandlers/eventhandler.go b/pkg/util/eventhandlers/eventhandler.go index f8e5046a..d02dda13 100644 --- a/pkg/util/eventhandlers/eventhandler.go +++ b/pkg/util/eventhandlers/eventhandler.go @@ -24,11 +24,8 @@ import ( ) // NewTriggerOnAllChanges returns a cache.ResourceEventHandlerFuncs that will call the given triggerFunc on all object -// changes. The object is first transformed with the given keyFunc. triggerFunc is also called for add or delete events. -func NewTriggerOnAllChanges[Source any, Key any]( - keyFunc func(Source) Key, - triggerFunc func(Key), -) *cache.ResourceEventHandlerFuncs { +// changes. triggerFunc is also called for add or delete events. +func NewTriggerOnAllChanges[Source any](triggerFunc func(Source)) *cache.ResourceEventHandlerFuncs { return &cache.ResourceEventHandlerFuncs{ DeleteFunc: func(old interface{}) { if deleted, ok := old.(cache.DeletedFinalStateUnknown); ok { @@ -38,28 +35,57 @@ func NewTriggerOnAllChanges[Source any, Key any]( } } oldSource := old.(Source) - triggerFunc(keyFunc(oldSource)) + triggerFunc(oldSource) }, AddFunc: func(cur interface{}) { curObj := cur.(Source) - triggerFunc(keyFunc(curObj)) + triggerFunc(curObj) }, UpdateFunc: func(old, cur interface{}) { if !reflect.DeepEqual(old, cur) { curObj := cur.(Source) - triggerFunc(keyFunc(curObj)) + triggerFunc(curObj) + } + }, + } +} + +// NewTriggerOnAllChangesWithTransform returns a cache.ResourceEventHandler that will call the given triggerFunc on all +// object changes. triggerFunc is also called for add or delete events. transformFunc will first be used to transform +// the original object into the target type. +func NewTriggerOnAllChangesWithTransform[Source any, Target any]( + transformFunc func(Source) Target, + triggerFunc func(Target), +) cache.ResourceEventHandler { + return &cache.ResourceEventHandlerFuncs{ + DeleteFunc: func(old interface{}) { + if deleted, ok := old.(cache.DeletedFinalStateUnknown); ok { + old = deleted.Obj + if old == nil { + return + } + } + oldSource := transformFunc(old.(Source)) + triggerFunc(oldSource) + }, + AddFunc: func(cur interface{}) { + curObj := transformFunc(cur.(Source)) + triggerFunc(curObj) + }, + UpdateFunc: func(old, cur interface{}) { + if !reflect.DeepEqual(old, cur) { + curObj := transformFunc(cur.(Source)) + triggerFunc(curObj) } }, } } // NewTriggerOnChanges returns a cache.ResourceEventHandlerFuncs that will call the given triggerFunc on object changes -// that passes the given predicate. The object is first transformed with the given keyFunc. triggerFunc is also called -// for add and delete events. -func NewTriggerOnChanges[Source any, Key any]( +// that passes the given predicate. triggerFunc is also called for add and delete events. +func NewTriggerOnChanges[Source any]( predicate func(old, cur Source) bool, - keyFunc func(Source) Key, - triggerFunc func(Key), + triggerFunc func(Source), ) *cache.ResourceEventHandlerFuncs { return &cache.ResourceEventHandlerFuncs{ DeleteFunc: func(old interface{}) { @@ -71,29 +97,25 @@ func NewTriggerOnChanges[Source any, Key any]( } } oldObj := old.(Source) - triggerFunc(keyFunc(oldObj)) + triggerFunc(oldObj) }, AddFunc: func(cur interface{}) { curObj := cur.(Source) - triggerFunc(keyFunc(curObj)) + triggerFunc(curObj) }, UpdateFunc: func(old, cur interface{}) { oldObj := old.(Source) curObj := cur.(Source) if predicate(oldObj, curObj) { - triggerFunc(keyFunc(curObj)) + triggerFunc(curObj) } }, } } // NewTriggerOnGenerationChanges returns a cache.ResourceEventHandlerFuncs that will call the given triggerFunc on -// object generation changes. The object is first transformed with the given keyFunc. triggerFunc is also called for add -// and delete events. -func NewTriggerOnGenerationChanges[Source any, Key any]( - keyFunc func(Source) Key, - triggerFunc func(Key), -) *cache.ResourceEventHandlerFuncs { +// object generation changes. triggerFunc is also called for add and delete events. +func NewTriggerOnGenerationChanges[Source any](triggerFunc func(Source)) *cache.ResourceEventHandlerFuncs { return &cache.ResourceEventHandlerFuncs{ DeleteFunc: func(old interface{}) { if deleted, ok := old.(cache.DeletedFinalStateUnknown); ok { @@ -104,18 +126,18 @@ func NewTriggerOnGenerationChanges[Source any, Key any]( } } oldObj := old.(Source) - triggerFunc(keyFunc(oldObj)) + triggerFunc(oldObj) }, AddFunc: func(cur interface{}) { curObj := cur.(Source) - triggerFunc(keyFunc(curObj)) + triggerFunc(curObj) }, UpdateFunc: func(old, cur interface{}) { oldObj := old.(metav1.Object) curObj := cur.(metav1.Object) if oldObj.GetGeneration() != curObj.GetGeneration() { - triggerFunc(keyFunc(cur.(Source))) + triggerFunc(cur.(Source)) } }, } From 94ca742c01e201d04bb9bb9572803c0740eb6621 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 13:09:49 +0800 Subject: [PATCH 08/22] refactor(core): change override controller name --- cmd/controller-manager/app/controllermanager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/controller-manager/app/controllermanager.go b/cmd/controller-manager/app/controllermanager.go index a23c4fbc..fdb5f2ef 100644 --- a/cmd/controller-manager/app/controllermanager.go +++ b/cmd/controller-manager/app/controllermanager.go @@ -39,7 +39,7 @@ const ( FederateControllerName = "federate" FollowerControllerName = "follower" PolicyRCControllerName = "policyrc" - OverrideControllerName = "overridepolicy" + OverrideControllerName = "override" NamespaceAutoPropagationControllerName = "nsautoprop" StatusControllerName = "status" SchedulerName = "scheduler" From 338ce5f56b774c1dabb3f18ca18430833ad8bd55 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 13:28:25 +0800 Subject: [PATCH 09/22] feat(override): deprecate override util package --- .../override/overridepolicy_controller.go | 11 +- pkg/controllers/override/util.go | 37 +++- pkg/controllers/override/util_test.go | 1 - pkg/controllers/util/overrides.go | 203 ------------------ 4 files changed, 35 insertions(+), 217 deletions(-) delete mode 100644 pkg/controllers/util/overrides.go diff --git a/pkg/controllers/override/overridepolicy_controller.go b/pkg/controllers/override/overridepolicy_controller.go index 3d870150..0ab9db33 100644 --- a/pkg/controllers/override/overridepolicy_controller.go +++ b/pkg/controllers/override/overridepolicy_controller.go @@ -339,7 +339,7 @@ func (c *Controller) reconcile(ctx context.Context, qualifiedName common.Qualifi return worker.StatusError } - var overrides util.OverridesMap + var overrides overridesMap // Apply overrides from each policy in order for _, policy := range policies { newOverrides, err := parseOverrides(policy, placedClusters) @@ -358,16 +358,11 @@ func (c *Controller) reconcile(ctx context.Context, qualifiedName common.Qualifi overrides = mergeOverrides(overrides, newOverrides) } - currentOverrides, err := util.GetOverrides(fedObject, PrefixedControllerName) - if err != nil { - keyedLogger.Error(err, "Failed to get overrides") - return worker.StatusError - } - + currentOverrides := fedObject.GetSpec().GetControllerOverrides(PrefixedControllerName) needsUpdate := !equality.Semantic.DeepEqual(overrides, currentOverrides) if needsUpdate { - err = util.SetOverrides(fedObject, PrefixedControllerName, overrides) + err = setOverrides(fedObject, overrides) if err != nil { keyedLogger.Error(err, "Failed to set overrides") return worker.StatusError diff --git a/pkg/controllers/override/util.go b/pkg/controllers/override/util.go index 3652f153..519d41f7 100644 --- a/pkg/controllers/override/util.go +++ b/pkg/controllers/override/util.go @@ -26,10 +26,11 @@ import ( fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" fedcorev1a1listers "github.com/kubewharf/kubeadmiral/pkg/client/listers/core/v1alpha1" - "github.com/kubewharf/kubeadmiral/pkg/controllers/util" "github.com/kubewharf/kubeadmiral/pkg/util/clusterselector" ) +type overridesMap map[string]fedcorev1a1.OverridePatches + /* lookForMatchedPolicies looks for OverridePolicy and/or ClusterOverridePolicy that match the obj in the stores. @@ -90,8 +91,8 @@ func lookForMatchedPolicies( func parseOverrides( policy fedcorev1a1.GenericOverridePolicy, clusters []*fedcorev1a1.FederatedCluster, -) (util.OverridesMap, error) { - overridesMap := make(util.OverridesMap) +) (overridesMap, error) { + overridesMap := make(overridesMap) for _, cluster := range clusters { patches := make(fedcorev1a1.OverridePatches, 0) @@ -130,9 +131,9 @@ func parseOverrides( return overridesMap, nil } -func mergeOverrides(dest, src util.OverridesMap) util.OverridesMap { +func mergeOverrides(dest, src overridesMap) overridesMap { if dest == nil { - dest = make(util.OverridesMap) + dest = make(overridesMap) } for clusterName, srcOverrides := range src { @@ -229,3 +230,29 @@ func policyJsonPatchOverriderToOverridePatch( return overridePatch, nil } + +func setOverrides(federatedObj fedcorev1a1.GenericFederatedObject, overridesMap overridesMap) error { + for clusterName, clusterOverrides := range overridesMap { + if len(clusterOverrides) == 0 { + delete(overridesMap, clusterName) + } + } + + if len(overridesMap) == 0 { + federatedObj.GetSpec().DeleteControllerOverrides(PrefixedControllerName) + return nil + } + + overrides := []fedcorev1a1.ClusterReferenceWithPatches{} + + for clusterName, clusterOverrides := range overridesMap { + overrides = append(overrides, fedcorev1a1.ClusterReferenceWithPatches{ + Cluster: clusterName, + Patches: clusterOverrides, + }) + } + + federatedObj.GetSpec().SetControllerOverrides(PrefixedControllerName, overrides) + + return nil +} diff --git a/pkg/controllers/override/util_test.go b/pkg/controllers/override/util_test.go index 75d836b4..d2d6f4d6 100644 --- a/pkg/controllers/override/util_test.go +++ b/pkg/controllers/override/util_test.go @@ -27,7 +27,6 @@ import ( "k8s.io/client-go/tools/cache" fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" - fedtypesv1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/types/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/util" ) diff --git a/pkg/controllers/util/overrides.go b/pkg/controllers/util/overrides.go deleted file mode 100644 index 76a2cbc4..00000000 --- a/pkg/controllers/util/overrides.go +++ /dev/null @@ -1,203 +0,0 @@ -/* -Copyright 2018 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -This file may have been modified by The KubeAdmiral Authors -("KubeAdmiral Modifications"). All KubeAdmiral Modifications -are Copyright 2023 The KubeAdmiral Authors. -*/ - -package util - -import ( - "encoding/json" - "sort" - - jsonpatch "github.com/evanphx/json-patch" - "github.com/pkg/errors" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/util/sets" - - fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" -) - -// Namespace and name may not be overridden since these fields are the -// primary mechanism of association between a federated resource in -// the host cluster and the target resources in the member clusters. -// -// Kind should always be sourced from the FTC and not vary across -// member clusters. -// -// apiVersion can be overridden to support managing resources like -// Ingress which can exist in different groups at different -// versions. Users will need to take care not to abuse this -// capability. -var invalidPaths = sets.NewString( - "/metadata/namespace", - "/metadata/name", - "/metadata/generateName", - "/kind", -) - -// Mapping of clusterName to overrides for the cluster -type OverridesMap map[string]fedcorev1a1.OverridePatches - -// GetOverrides returns a map of overrides populated from the given -// unstructured object. -func GetOverrides(federatedObj fedcorev1a1.GenericFederatedObject, controller string) (OverridesMap, error) { - overridesMap := make(OverridesMap) - - if federatedObj == nil || federatedObj.GetSpec().Overrides == nil { - // No overrides defined for the federated type - return overridesMap, nil - } - - overrides := federatedObj.GetSpec().Overrides - var clusterOverrides []fedcorev1a1.ClusterReferenceWithPatches - for i := range overrides { - if overrides[i].Controller == controller { - clusterOverrides = overrides[i].Override - break - } - } - - if clusterOverrides == nil { - return overridesMap, nil - } - - for _, overrideItem := range clusterOverrides { - clusterName := overrideItem.Cluster - if _, ok := overridesMap[clusterName]; ok { - return nil, errors.Errorf("cluster %q appears more than once", clusterName) - } - - for i, pathEntry := range overrideItem.Patches { - path := pathEntry.Path - if invalidPaths.Has(path) { - return nil, errors.Errorf("override[%d] for cluster %q has an invalid path: %s", i, clusterName, path) - } - } - overridesMap[clusterName] = overrideItem.Patches - } - - return overridesMap, nil -} - -// SetOverrides sets the spec.overrides field of the unstructured -// object from the provided overrides map. -// -// This function takes ownership of the `overridesMap` and may mutate it arbitrarily. -func SetOverrides(federatedObj fedcorev1a1.GenericFederatedObject, controller string, overridesMap OverridesMap) error { - for clusterName, clusterOverrides := range overridesMap { - if len(clusterOverrides) == 0 { - delete(overridesMap, clusterName) - } - } - - index := -1 - for i, overrides := range federatedObj.GetSpec().Overrides { - if overrides.Controller == controller { - index = i - break - } - } - - if len(overridesMap) == 0 { - // delete index - if index != -1 { - federatedObj.GetSpec().Overrides = append(federatedObj.GetSpec().Overrides[:index], federatedObj.GetSpec().Overrides[(index+1):]...) - } - } else { - if index == -1 { - index = len(federatedObj.GetSpec().Overrides) - federatedObj.GetSpec().Overrides = append(federatedObj.GetSpec().Overrides, fedcorev1a1.OverrideWithController{ - Controller: controller, - }) - } - - overrides := &federatedObj.GetSpec().Overrides[index] - overrides.Override = nil - - // Write in ascending order of cluster names for better readability - clusterNames := make([]string, 0, len(overridesMap)) - for clusterName := range overridesMap { - clusterNames = append(clusterNames, clusterName) - } - sort.Strings(clusterNames) - for _, clusterName := range clusterNames { - clusterOverrides := overridesMap[clusterName] - overrides.Override = append(overrides.Override, fedcorev1a1.ClusterReferenceWithPatches{ - Cluster: clusterName, - Patches: clusterOverrides, - }) - } - } - - return nil -} - -// UnstructuredToInterface converts an unstructured object to the -// provided interface by json marshalling/unmarshalling. -func UnstructuredToInterface(rawObj *unstructured.Unstructured, obj interface{}) error { - content, err := rawObj.MarshalJSON() - if err != nil { - return err - } - return json.Unmarshal(content, obj) -} - -// InterfaceToUnstructured converts the provided object to an -// unstructured by json marshalling/unmarshalling. -func InterfaceToUnstructured(obj interface{}) (ret interface{}, err error) { - var buf []byte - buf, err = json.Marshal(obj) - if err != nil { - return - } - - err = json.Unmarshal(buf, &ret) - return -} - -// ApplyJsonPatch applies the override on to the given unstructured object. -func ApplyJsonPatch(obj *unstructured.Unstructured, overrides fedcorev1a1.OverridePatches) error { - // TODO: Do the defaulting of "op" field to "replace" in API defaulting - for i, overrideItem := range overrides { - if overrideItem.Op == "" { - overrides[i].Op = "replace" - } - } - jsonPatchBytes, err := json.Marshal(overrides) - if err != nil { - return err - } - - patch, err := jsonpatch.DecodePatch(jsonPatchBytes) - if err != nil { - return err - } - - ObjectJSONBytes, err := obj.MarshalJSON() - if err != nil { - return err - } - - patchedObjectJSONBytes, err := patch.Apply(ObjectJSONBytes) - if err != nil { - return err - } - - err = obj.UnmarshalJSON(patchedObjectJSONBytes) - return err -} From d4038e83310dbacf263c5e4f84e678466ef4a540 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 13:45:56 +0800 Subject: [PATCH 10/22] fix(policyrc): fix imports --- pkg/controllers/common/constants.go | 3 --- pkg/controllers/federate/util.go | 4 ++-- pkg/controllers/policyrc/controller.go | 3 ++- pkg/controllers/scheduler/scheduler.go | 4 +++- pkg/controllers/scheduler/util.go | 4 ++-- pkg/util/fedobjectadapters/adapters.go | 14 -------------- 6 files changed, 9 insertions(+), 23 deletions(-) diff --git a/pkg/controllers/common/constants.go b/pkg/controllers/common/constants.go index 41d9c783..810817e5 100644 --- a/pkg/controllers/common/constants.go +++ b/pkg/controllers/common/constants.go @@ -140,9 +140,6 @@ const ( // TemplateGeneratorMergePatchAnnotation indicates the merge patch document capable of converting // the source object to the template object. TemplateGeneratorMergePatchAnnotation = FederateControllerPrefix + "template-generator-merge-patch" - - PropagationPolicyNameLabel = DefaultPrefix + "propagation-policy-name" - ClusterPropagationPolicyNameLabel = DefaultPrefix + "cluster-propagation-policy-name" ) // PropagatedAnnotationKeys and PropagatedLabelKeys are used to store the keys of annotations and labels that are present diff --git a/pkg/controllers/federate/util.go b/pkg/controllers/federate/util.go index 104831de..96802afd 100644 --- a/pkg/controllers/federate/util.go +++ b/pkg/controllers/federate/util.go @@ -270,8 +270,8 @@ var ( ) federatedLabelSet = sets.New[string]( - common.PropagationPolicyNameLabel, - common.ClusterPropagationPolicyNameLabel, + scheduler.PropagationPolicyNameLabel, + scheduler.ClusterPropagationPolicyNameLabel, override.OverridePolicyNameLabel, override.ClusterOverridePolicyNameLabel, ) diff --git a/pkg/controllers/policyrc/controller.go b/pkg/controllers/policyrc/controller.go index 04571969..2041af21 100644 --- a/pkg/controllers/policyrc/controller.go +++ b/pkg/controllers/policyrc/controller.go @@ -32,6 +32,7 @@ import ( fedcorev1a1informers "github.com/kubewharf/kubeadmiral/pkg/client/informers/externalversions/core/v1alpha1" "github.com/kubewharf/kubeadmiral/pkg/controllers/common" "github.com/kubewharf/kubeadmiral/pkg/controllers/override" + "github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler" "github.com/kubewharf/kubeadmiral/pkg/stats" "github.com/kubewharf/kubeadmiral/pkg/util/eventhandlers" "github.com/kubewharf/kubeadmiral/pkg/util/fedobjectadapters" @@ -236,7 +237,7 @@ func (c *Controller) reconcileCount(ctx context.Context, qualifiedName common.Qu var newPps []PolicyKey if fedObj != nil { - newPolicy, newHasPolicy := fedobjectadapters.MatchedPolicyKey(fedObj, fedObj.GetNamespace() != "") + newPolicy, newHasPolicy := scheduler.GetMatchedPolicyKey(fedObj) if newHasPolicy { newPps = []PolicyKey{PolicyKey(newPolicy)} } diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index b7076747..b20fda85 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -56,7 +56,9 @@ import ( ) const ( - SchedulerName = "scheduler" + SchedulerName = "scheduler" + PropagationPolicyNameLabel = common.DefaultPrefix + "propagation-policy-name" + ClusterPropagationPolicyNameLabel = common.DefaultPrefix + "cluster-propagation-policy-name" ) type ClusterWeight struct { diff --git a/pkg/controllers/scheduler/util.go b/pkg/controllers/scheduler/util.go index 0bd63424..dd82630d 100644 --- a/pkg/controllers/scheduler/util.go +++ b/pkg/controllers/scheduler/util.go @@ -36,11 +36,11 @@ func GetMatchedPolicyKey(obj metav1.Object) (result common.QualifiedName, ok boo labels := obj.GetLabels() isNamespaced := len(obj.GetNamespace()) > 0 - if policyName, exists := labels[common.PropagationPolicyNameLabel]; exists && isNamespaced { + if policyName, exists := labels[PropagationPolicyNameLabel]; exists && isNamespaced { return common.QualifiedName{Namespace: obj.GetNamespace(), Name: policyName}, true } - if policyName, exists := labels[common.ClusterPropagationPolicyNameLabel]; exists { + if policyName, exists := labels[ClusterPropagationPolicyNameLabel]; exists { return common.QualifiedName{Namespace: "", Name: policyName}, true } diff --git a/pkg/util/fedobjectadapters/adapters.go b/pkg/util/fedobjectadapters/adapters.go index d4c7c945..b30f52ff 100644 --- a/pkg/util/fedobjectadapters/adapters.go +++ b/pkg/util/fedobjectadapters/adapters.go @@ -9,7 +9,6 @@ import ( fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1" fedcorev1a1client "github.com/kubewharf/kubeadmiral/pkg/client/clientset/versioned/typed/core/v1alpha1" fedcorev1a1listers "github.com/kubewharf/kubeadmiral/pkg/client/listers/core/v1alpha1" - "github.com/kubewharf/kubeadmiral/pkg/controllers/common" ) func ensureNilInterface( @@ -123,16 +122,3 @@ func Delete( } } -func MatchedPolicyKey(obj fedcorev1a1.GenericFederatedObject, isNamespaced bool) (result common.QualifiedName, ok bool) { - labels := obj.GetLabels() - - if policyName, exists := labels[common.PropagationPolicyNameLabel]; exists && isNamespaced { - return common.QualifiedName{Namespace: obj.GetNamespace(), Name: policyName}, true - } - - if policyName, exists := labels[common.ClusterPropagationPolicyNameLabel]; exists { - return common.QualifiedName{Namespace: "", Name: policyName}, true - } - - return common.QualifiedName{}, false -} From ed50baf8c7769b9577dc1eb2a667c92c6372988c Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 14:07:46 +0800 Subject: [PATCH 11/22] fix(cluster-controller): status update on cache not synced --- .../federatedcluster/clusterstatus.go | 28 +++++++++++-------- test/e2e/federatedcluster/clusterstatus.go | 20 ++++++++++--- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/pkg/controllers/federatedcluster/clusterstatus.go b/pkg/controllers/federatedcluster/clusterstatus.go index 80c6fa8f..94608bf8 100644 --- a/pkg/controllers/federatedcluster/clusterstatus.go +++ b/pkg/controllers/federatedcluster/clusterstatus.go @@ -33,6 +33,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/discovery" corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" @@ -68,24 +69,14 @@ func (c *FederatedClusterController) collectIndividualClusterStatus( if !exists { return 0, fmt.Errorf("failed to get cluster client: FederatedInformerManager not yet up-to-date") } - podLister, podsSynced, exists := c.federatedInformerManager.GetPodLister(cluster.Name) if !exists { return 0, fmt.Errorf("failed to get pod lister: FederatedInformerManager not yet up-to-date") } - if !podsSynced() { - logger.V(3).Info("Pod informer not synced, will reenqueue") - return 100 * time.Millisecond, nil - } - nodeLister, nodesSynced, exists := c.federatedInformerManager.GetNodeLister(cluster.Name) if !exists { return 0, fmt.Errorf("failed to get node lister: FederatedInformerManager not yet up-to-date") } - if !nodesSynced() { - logger.V(3).Info("Pod informer not synced, will reenqueue") - return 100 * time.Millisecond, nil - } discoveryClient := clusterKubeClient.Discovery() @@ -108,7 +99,14 @@ func (c *FederatedClusterController) collectIndividualClusterStatus( // We skip updating cluster resources and api resources if cluster is not ready if readyStatus == corev1.ConditionTrue { - if err := updateClusterResources(ctx, &cluster.Status, podLister, nodeLister); err != nil { + if err := updateClusterResources( + ctx, + &cluster.Status, + podLister, + podsSynced, + nodeLister, + nodesSynced, + ); err != nil { logger.Error(err, "Failed to update cluster resources") readyStatus = corev1.ConditionFalse readyReason = ClusterResourceCollectionFailedReason @@ -174,8 +172,16 @@ func updateClusterResources( ctx context.Context, clusterStatus *fedcorev1a1.FederatedClusterStatus, podLister corev1listers.PodLister, + podsSynced cache.InformerSynced, nodeLister corev1listers.NodeLister, + nodesSynced cache.InformerSynced, ) error { + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + if !cache.WaitForCacheSync(ctx.Done(), podsSynced, nodesSynced) { + return fmt.Errorf("timeout waiting for node and pod informer sync") + } + nodes, err := nodeLister.List(labels.Everything()) if err != nil { return fmt.Errorf("failed to list nodes: %w", err) diff --git a/test/e2e/federatedcluster/clusterstatus.go b/test/e2e/federatedcluster/clusterstatus.go index 98ceedc3..e74dc7e3 100644 --- a/test/e2e/federatedcluster/clusterstatus.go +++ b/test/e2e/federatedcluster/clusterstatus.go @@ -39,7 +39,10 @@ var _ = ginkgo.Describe("Cluster Status", federatedClusterTestLabels, func() { waitForClusterJoin := func(ctx context.Context) { gomega.Eventually(func(g gomega.Gomega, ctx context.Context) { - cluster, err := f.HostFedClient().CoreV1alpha1().FederatedClusters().Get(ctx, cluster.Name, metav1.GetOptions{}) + cluster, err := f.HostFedClient(). + CoreV1alpha1(). + FederatedClusters(). + Get(ctx, cluster.Name, metav1.GetOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred(), framework.MessageUnexpectedError) g.Expect(cluster).To(gomega.Satisfy(clusterfwk.ClusterJoined)) }).WithTimeout(clusterJoinTimeout).WithContext(ctx).Should(gomega.Succeed(), "Timed out waiting for cluster join") @@ -48,7 +51,10 @@ var _ = ginkgo.Describe("Cluster Status", federatedClusterTestLabels, func() { waitForFirstStatusUpdate := func(ctx context.Context) { // check initial status update gomega.Eventually(func(g gomega.Gomega, ctx context.Context) { - cluster, err := f.HostFedClient().CoreV1alpha1().FederatedClusters().Get(ctx, cluster.Name, metav1.GetOptions{}) + cluster, err := f.HostFedClient(). + CoreV1alpha1(). + FederatedClusters(). + Get(ctx, cluster.Name, metav1.GetOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred(), framework.MessageUnexpectedError) g.Expect(cluster).To(gomega.Satisfy(statusCollected)) }).WithTimeout(clusterStatusCollectTimeout).WithContext(ctx).Should(gomega.Succeed(), "Timed out waiting for first status update") @@ -58,7 +64,10 @@ var _ = ginkgo.Describe("Cluster Status", federatedClusterTestLabels, func() { waitForStatusConvergence := func(ctx context.Context) { gomega.Eventually(func(g gomega.Gomega, ctx context.Context) { var err error - cluster, err := f.HostFedClient().CoreV1alpha1().FederatedClusters().Get(ctx, cluster.Name, metav1.GetOptions{}) + cluster, err := f.HostFedClient(). + CoreV1alpha1(). + FederatedClusters(). + Get(ctx, cluster.Name, metav1.GetOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred(), framework.MessageUnexpectedError) g.Expect(cluster).To(gomega.Satisfy(clusterfwk.ClusterReachable)) g.Expect(cluster.Status.APIResourceTypes).ToNot(gomega.BeEmpty()) @@ -118,7 +127,10 @@ var _ = ginkgo.Describe("Cluster Status", federatedClusterTestLabels, func() { waitForStatusConvergence := func(ctx context.Context) { gomega.Eventually(func(g gomega.Gomega, ctx context.Context) { var err error - cluster, err := f.HostFedClient().CoreV1alpha1().FederatedClusters().Get(ctx, cluster.Name, metav1.GetOptions{}) + cluster, err := f.HostFedClient(). + CoreV1alpha1(). + FederatedClusters(). + Get(ctx, cluster.Name, metav1.GetOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred(), framework.MessageUnexpectedError) g.Expect(cluster).To(gomega.Satisfy(clusterfwk.ClusterUnreachable)) }).WithTimeout(clusterStatusUpdateInterval*2).WithContext(ctx).Should(gomega.Succeed(), "Timed out waiting for status convergence") From cfcaea9cc0877b924bc0f61c95493deac6a68d7e Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 14:20:12 +0800 Subject: [PATCH 12/22] fix(cluster-controller): cluster controller synced --- pkg/controllers/federatedcluster/controller.go | 6 +++--- pkg/util/informermanager/federatedinformermanager.go | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/controllers/federatedcluster/controller.go b/pkg/controllers/federatedcluster/controller.go index 78abd27b..b491c0d0 100644 --- a/pkg/controllers/federatedcluster/controller.go +++ b/pkg/controllers/federatedcluster/controller.go @@ -96,14 +96,14 @@ func NewFederatedClusterController( logger klog.Logger, clusterJoinTimeout time.Duration, workerCount int, - fedsystemNamespace string, + fedSystemNamespace string, ) (*FederatedClusterController, error) { c := &FederatedClusterController{ clusterInformer: clusterInformer, federatedInformerManager: federatedInformerManager, kubeClient: kubeClient, fedClient: fedClient, - fedSystemNamespace: fedsystemNamespace, + fedSystemNamespace: fedSystemNamespace, clusterHealthCheckConfig: &ClusterHealthCheckConfig{ Period: time.Minute, }, @@ -164,7 +164,7 @@ func NewFederatedClusterController( } func (c *FederatedClusterController) HasSynced() bool { - return c.clusterInformer.Informer().HasSynced() + return c.clusterInformer.Informer().HasSynced() && c.federatedInformerManager.HasSynced() } func (c *FederatedClusterController) IsControllerReady() bool { diff --git a/pkg/util/informermanager/federatedinformermanager.go b/pkg/util/informermanager/federatedinformermanager.go index 0f433d23..593105dd 100644 --- a/pkg/util/informermanager/federatedinformermanager.go +++ b/pkg/util/informermanager/federatedinformermanager.go @@ -436,8 +436,11 @@ func (m *federatedInformerManager) Start(ctx context.Context) { // Populate the initial snapshot of clusters clusters := m.clusterInformer.Informer().GetStore().List() - for _, cluster := range clusters { - m.initialClusters.Insert(cluster.(*fedcorev1a1.FederatedCluster).GetName()) + for _, clusterObj := range clusters { + cluster := clusterObj.(*fedcorev1a1.FederatedCluster) + if clusterutil.IsClusterJoined(&cluster.Status) { + m.initialClusters.Insert(cluster.GetName()) + } } for _, handler := range m.clusterEventHandlers { From 4c9895a021a1531733e5840ad5c2e86d9e7a08fb Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 14:24:42 +0800 Subject: [PATCH 13/22] fix(scheduler): typos and unit test --- pkg/controllers/scheduler/scheduler.go | 4 ++-- pkg/controllers/scheduler/util_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index b20fda85..7fbc7cbd 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -773,7 +773,7 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder fedObjects, err := s.fedObjectInformer.Lister().List(labels.Everything()) if err != nil { - s.logger.Error(err, "Failed to enquue FederatedObjects for policy") + s.logger.Error(err, "Failed to enqueue FederatedObjects for policy") return } for _, obj := range fedObjects { @@ -781,7 +781,7 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder } clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) if err != nil { - s.logger.Error(err, "Failed to enquue ClusterFederatedObjects for policy") + s.logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") return } for _, obj := range clusterFedObjects { diff --git a/pkg/controllers/scheduler/util_test.go b/pkg/controllers/scheduler/util_test.go index 88f68b9b..26e862dd 100644 --- a/pkg/controllers/scheduler/util_test.go +++ b/pkg/controllers/scheduler/util_test.go @@ -94,7 +94,7 @@ func TestMatchedPolicyKey(t *testing.T) { } object.SetLabels(labels) - policy, found := GetMatchedPolicyKey(object, object.GetNamespace() != "") + policy, found := GetMatchedPolicyKey(object) if found != testCase.expectedPolicyFound { t.Fatalf("found = %v, but expectedPolicyFound = %v", found, testCase.expectedPolicyFound) } From abe213fe9028a94fe58917f752a7164a768ad804 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 14:32:24 +0800 Subject: [PATCH 14/22] fix(policyrc): nil pointer on controller initialization --- pkg/controllers/policyrc/controller.go | 29 +++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/pkg/controllers/policyrc/controller.go b/pkg/controllers/policyrc/controller.go index 2041af21..083a93df 100644 --- a/pkg/controllers/policyrc/controller.go +++ b/pkg/controllers/policyrc/controller.go @@ -83,20 +83,6 @@ func NewPolicyRCController( logger: logger.WithValues("controller", ControllerName), } - if _, err := c.fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( - common.NewQualifiedName, - c.countWorker.Enqueue, - )); err != nil { - return nil, err - } - - if _, err := c.clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( - common.NewQualifiedName, - c.countWorker.Enqueue, - )); err != nil { - return nil, err - } - c.countWorker = worker.NewReconcileWorker[common.QualifiedName]( "policyrc-controller-count-worker", nil, @@ -142,6 +128,21 @@ func NewPolicyRCController( metrics, ) + if _, err := c.fedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( + common.NewQualifiedName, + c.countWorker.Enqueue, + )); err != nil { + return nil, err + } + + if _, err := c.clusterFedObjectInformer.Informer().AddEventHandler(eventhandlers.NewTriggerOnAllChangesWithTransform( + common.NewQualifiedName, + c.countWorker.Enqueue, + )); err != nil { + return nil, err + } + + if _, err := c.propagationPolicyInformer.Informer().AddEventHandler( eventhandlers.NewTriggerOnAllChangesWithTransform(common.NewQualifiedName, c.persistPpWorker.Enqueue), ); err != nil { From 5285e18e5b6dd331831eba29dddb1fde179bdb58 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 14:35:18 +0800 Subject: [PATCH 15/22] fix(scheduler): nil pointer in cluster join --- pkg/controllers/federatedcluster/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controllers/federatedcluster/controller.go b/pkg/controllers/federatedcluster/controller.go index b491c0d0..779b8a01 100644 --- a/pkg/controllers/federatedcluster/controller.go +++ b/pkg/controllers/federatedcluster/controller.go @@ -292,7 +292,7 @@ func (c *FederatedClusterController) reconcile( // Trigger initial status collection if successfully joined if joined, alreadyFailed := isClusterJoined(&cluster.Status); joined && !alreadyFailed { - c.statusCollectWorker.EnqueueObject(cluster) + c.statusCollectWorker.Enqueue(common.NewQualifiedName(cluster)) } return worker.StatusAllOK From 44367f0b3af39e8ddc73756b01524cb3692de6a3 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 14:37:13 +0800 Subject: [PATCH 16/22] fix(scheduler): nil pointer in enqueue --- pkg/controllers/federatedcluster/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controllers/federatedcluster/controller.go b/pkg/controllers/federatedcluster/controller.go index 779b8a01..adddc24b 100644 --- a/pkg/controllers/federatedcluster/controller.go +++ b/pkg/controllers/federatedcluster/controller.go @@ -492,7 +492,7 @@ func (c *FederatedClusterController) enqueueAllJoinedClusters() { for _, cluster := range clusters { if clusterutil.IsClusterJoined(&cluster.Status) { - c.statusCollectWorker.EnqueueObject(cluster) + c.statusCollectWorker.Enqueue(common.NewQualifiedName(cluster)) } } } From 8c75b1c0072b5ed02c8ddf4c3fe49eee86a2e504 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 14:41:48 +0800 Subject: [PATCH 17/22] fix(worker): remove enqueueObject and keyFunc --- pkg/controllers/federate/controller.go | 1 - pkg/controllers/federatedcluster/controller.go | 2 -- pkg/controllers/nsautoprop/controller.go | 1 - pkg/controllers/override/overridepolicy_controller.go | 1 - pkg/controllers/policyrc/controller.go | 3 --- pkg/controllers/scheduler/scheduler.go | 9 ++++----- pkg/controllers/status/controller.go | 1 - pkg/util/worker/worker.go | 10 ---------- 8 files changed, 4 insertions(+), 24 deletions(-) diff --git a/pkg/controllers/federate/controller.go b/pkg/controllers/federate/controller.go index b965ea92..c81eeac7 100644 --- a/pkg/controllers/federate/controller.go +++ b/pkg/controllers/federate/controller.go @@ -111,7 +111,6 @@ func NewFederateController( c.eventRecorder = eventsink.NewDefederatingRecorderMux(kubeClient, FederateControllerName, 6) c.worker = worker.NewReconcileWorker[workerKey]( FederateControllerName, - nil, c.reconcile, worker.RateLimiterOptions{}, workerCount, diff --git a/pkg/controllers/federatedcluster/controller.go b/pkg/controllers/federatedcluster/controller.go index adddc24b..9a8264db 100644 --- a/pkg/controllers/federatedcluster/controller.go +++ b/pkg/controllers/federatedcluster/controller.go @@ -124,7 +124,6 @@ func NewFederatedClusterController( c.worker = worker.NewReconcileWorker[common.QualifiedName]( FederatedClusterControllerName, - nil, c.reconcile, worker.RateLimiterOptions{}, workerCount, @@ -133,7 +132,6 @@ func NewFederatedClusterController( c.statusCollectWorker = worker.NewReconcileWorker[common.QualifiedName]( FederatedClusterControllerName, - nil, c.collectClusterStatus, worker.RateLimiterOptions{ InitialDelay: 50 * time.Millisecond, diff --git a/pkg/controllers/nsautoprop/controller.go b/pkg/controllers/nsautoprop/controller.go index cc929855..a0066869 100644 --- a/pkg/controllers/nsautoprop/controller.go +++ b/pkg/controllers/nsautoprop/controller.go @@ -124,7 +124,6 @@ func NewNamespaceAutoPropagationController( c.worker = worker.NewReconcileWorker[common.QualifiedName]( NamespaceAutoPropagationControllerName, - nil, c.reconcile, worker.RateLimiterOptions{}, workerCount, diff --git a/pkg/controllers/override/overridepolicy_controller.go b/pkg/controllers/override/overridepolicy_controller.go index 0ab9db33..9d23c041 100644 --- a/pkg/controllers/override/overridepolicy_controller.go +++ b/pkg/controllers/override/overridepolicy_controller.go @@ -99,7 +99,6 @@ func NewOverridePolicyController( c.eventRecorder = eventsink.NewDefederatingRecorderMux(kubeClient, ControllerName, 4) c.worker = worker.NewReconcileWorker[common.QualifiedName]( ControllerName, - nil, c.reconcile, worker.RateLimiterOptions{}, workerCount, diff --git a/pkg/controllers/policyrc/controller.go b/pkg/controllers/policyrc/controller.go index 083a93df..3d80877e 100644 --- a/pkg/controllers/policyrc/controller.go +++ b/pkg/controllers/policyrc/controller.go @@ -85,7 +85,6 @@ func NewPolicyRCController( c.countWorker = worker.NewReconcileWorker[common.QualifiedName]( "policyrc-controller-count-worker", - nil, c.reconcileCount, worker.RateLimiterOptions{}, 1, // currently only one worker is meaningful due to the global mutex @@ -94,7 +93,6 @@ func NewPolicyRCController( c.persistPpWorker = worker.NewReconcileWorker[common.QualifiedName]( "policyrc-controller-persist-worker", - nil, func(ctx context.Context, qualifiedName common.QualifiedName) worker.Result { return c.reconcilePersist( ctx, @@ -112,7 +110,6 @@ func NewPolicyRCController( c.persistOpWorker = worker.NewReconcileWorker[common.QualifiedName]( "policyrc-controller-persist-worker", - nil, func(ctx context.Context, qualifiedName common.QualifiedName) worker.Result { return c.reconcilePersist( ctx, diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index 7fbc7cbd..330626d7 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -130,7 +130,6 @@ func NewScheduler( s.eventRecorder = eventsink.NewDefederatingRecorderMux(kubeClient, SchedulerName, 6) s.worker = worker.NewReconcileWorker[common.QualifiedName]( SchedulerName, - nil, s.reconcile, worker.RateLimiterOptions{}, workerCount, @@ -754,7 +753,7 @@ func (s *Scheduler) enqueueFederatedObjectsForPolicy(policy metav1.Object) { if policyKey, found := GetMatchedPolicyKey(obj); !found { continue } else if policyKey.Name == policyAccessor.GetName() && policyKey.Namespace == policyAccessor.GetNamespace() { - s.worker.EnqueueObject(obj) + s.worker.Enqueue(common.NewQualifiedName(obj)) } } } @@ -777,7 +776,7 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder return } for _, obj := range fedObjects { - s.worker.EnqueueObject(obj) + s.worker.Enqueue(common.NewQualifiedName(obj)) } clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) if err != nil { @@ -785,7 +784,7 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder return } for _, obj := range clusterFedObjects { - s.worker.EnqueueObject(obj) + s.worker.Enqueue(common.NewQualifiedName(obj)) } } @@ -819,7 +818,7 @@ func (s *Scheduler) enqueueFederatedObjectsForFTC(ftc *fedcorev1a1.FederatedType continue } if sourceGVK == ftc.GetSourceTypeGVK() { - s.worker.EnqueueObject(obj) + s.worker.Enqueue(common.NewQualifiedName(obj)) } } } diff --git a/pkg/controllers/status/controller.go b/pkg/controllers/status/controller.go index 6f3a7838..ed94f7c2 100644 --- a/pkg/controllers/status/controller.go +++ b/pkg/controllers/status/controller.go @@ -142,7 +142,6 @@ func NewStatusController( s.worker = worker.NewReconcileWorker( StatusControllerName, - nil, s.reconcile, worker.RateLimiterOptions{}, workerCount, diff --git a/pkg/util/worker/worker.go b/pkg/util/worker/worker.go index 2cb24766..96b6b1de 100644 --- a/pkg/util/worker/worker.go +++ b/pkg/util/worker/worker.go @@ -38,7 +38,6 @@ type KeyFunc[Key any] func(metav1.Object) Key type ReconcileWorker[Key any] interface { Enqueue(key Key) - EnqueueObject(obj metav1.Object) EnqueueWithBackoff(key Key) EnqueueWithDelay(key Key, delay time.Duration) Run(ctx context.Context) @@ -59,9 +58,6 @@ type asyncWorker[Key any] struct { // Name of this reconcile worker. name string - // Function to extract queue key from a metav1.Object - keyFunc KeyFunc[Key] - // Work queue holding keys to be processed. queue workqueue.RateLimitingInterface @@ -78,7 +74,6 @@ type asyncWorker[Key any] struct { func NewReconcileWorker[Key any]( name string, - keyFunc KeyFunc[Key], reconcile ReconcileFunc[Key], timing RateLimiterOptions, workerCount int, @@ -109,7 +104,6 @@ func NewReconcileWorker[Key any]( return &asyncWorker[Key]{ name: name, - keyFunc: keyFunc, reconcile: reconcile, queue: queue, workerCount: workerCount, @@ -121,10 +115,6 @@ func (w *asyncWorker[Key]) Enqueue(key Key) { w.queue.Add(key) } -func (w *asyncWorker[Key]) EnqueueObject(obj metav1.Object) { - w.Enqueue(w.keyFunc(obj)) -} - func (w *asyncWorker[Key]) EnqueueWithBackoff(key Key) { w.queue.AddRateLimited(key) } From fb44559d797550b45ee967d68fe1941f6838c802 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 15:00:19 +0800 Subject: [PATCH 18/22] chore(cluster-controller): adjust healthcheck period --- pkg/controllers/federatedcluster/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controllers/federatedcluster/controller.go b/pkg/controllers/federatedcluster/controller.go index 9a8264db..3436c481 100644 --- a/pkg/controllers/federatedcluster/controller.go +++ b/pkg/controllers/federatedcluster/controller.go @@ -105,7 +105,7 @@ func NewFederatedClusterController( fedClient: fedClient, fedSystemNamespace: fedSystemNamespace, clusterHealthCheckConfig: &ClusterHealthCheckConfig{ - Period: time.Minute, + Period: time.Second*30, }, clusterJoinTimeout: clusterJoinTimeout, metrics: metrics, From af546b360805b18a099648ce93247ad488f6a96e Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 17:47:52 +0800 Subject: [PATCH 19/22] feat(scheduler): use label selectors for listing --- pkg/controllers/nsautoprop/controller.go | 2 +- pkg/controllers/scheduler/scheduler.go | 63 ++++++++++++++++-------- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/pkg/controllers/nsautoprop/controller.go b/pkg/controllers/nsautoprop/controller.go index a0066869..8744e23a 100644 --- a/pkg/controllers/nsautoprop/controller.go +++ b/pkg/controllers/nsautoprop/controller.go @@ -135,7 +135,7 @@ func NewNamespaceAutoPropagationController( func(obj *fedcorev1a1.ClusterFederatedObject) { srcMeta, err := obj.Spec.GetTemplateAsUnstructured() if err != nil { - logger.Error( + c.logger.Error( err, "Failed to get source object's metadata from ClusterFederatedObject", "object", diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index 330626d7..1326fab2 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -30,6 +30,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" @@ -293,7 +294,7 @@ func (s *Scheduler) reconcile(ctx context.Context, key common.QualifiedName) (st if schedulingProfile != nil { ctx, logger = logging.InjectLoggerValues( ctx, - "schedulingProfile", + "scheduling-profile", common.NewQualifiedName(schedulingProfile).String(), ) } @@ -314,13 +315,13 @@ func (s *Scheduler) reconcile(ctx context.Context, key common.QualifiedName) (st spec := policy.GetSpec() auxInfo.enableFollowerScheduling = !spec.DisableFollowerScheduling - ctx, logger = logging.InjectLoggerValues(ctx, "enableFollowerScheduling", auxInfo.enableFollowerScheduling) + ctx, logger = logging.InjectLoggerValues(ctx, "enable-follower-scheduling", auxInfo.enableFollowerScheduling) if autoMigration := spec.AutoMigration; autoMigration != nil { auxInfo.unschedulableThreshold = pointer.Duration(autoMigration.Trigger.PodUnschedulableDuration.Duration) ctx, logger = logging.InjectLoggerValues( ctx, - "unschedulableThreshold", + "unschedulable-threshold", auxInfo.unschedulableThreshold.String(), ) } @@ -716,13 +717,22 @@ func (s *Scheduler) enqueueFederatedObjectsForPolicy(policy metav1.Object) { logger := s.logger.WithValues("policy", policyKey.String()) logger.V(2).Info("Enqueue FederatedObjects and ClusterFederatedObjects for policy") + isPolicyNamespaced := len(policyKey.Namespace) > 0 + allObjects := []metav1.Object{} - if len(policyKey.Namespace) > 0 { + var labelSelector labels.Selector + if isPolicyNamespaced { + labelSelector = labels.Set{PropagationPolicyNameLabel: policyKey.Name}.AsSelector() + } else { + labelSelector = labels.Set{ClusterPropagationPolicyNameLabel: policyKey.Name}.AsSelector() + } + + if isPolicyNamespaced { // If the policy is namespaced, we only need to scan FederatedObjects in the same namespace. - fedObjects, err := s.fedObjectInformer.Lister().FederatedObjects(policyKey.Namespace).List(labels.Everything()) + fedObjects, err := s.fedObjectInformer.Lister().FederatedObjects(policyKey.Namespace).List(labelSelector) if err != nil { - s.logger.Error(err, "Failed to enqueue FederatedObjects for policy") + logger.Error(err, "Failed to enqueue FederatedObjects for policy") return } for _, obj := range fedObjects { @@ -730,18 +740,18 @@ func (s *Scheduler) enqueueFederatedObjectsForPolicy(policy metav1.Object) { } } else { // If the policy is cluster-scoped, we need to scan all FederatedObjects and ClusterFederatedObjects - fedObjects, err := s.fedObjectInformer.Lister().List(labels.Everything()) + fedObjects, err := s.fedObjectInformer.Lister().List(labelSelector) if err != nil { - s.logger.Error(err, "Failed to enqueue FederatedObjects for policy") + logger.Error(err, "Failed to enqueue FederatedObjects for policy") return } for _, obj := range fedObjects { allObjects = append(allObjects, obj) } - clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) + clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labelSelector) if err != nil { - s.logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") + logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") return } for _, obj := range clusterFedObjects { @@ -762,25 +772,38 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder logger := s.logger.WithValues("cluster", cluster.GetName()) if !clusterutil.IsClusterJoined(&cluster.Status) { - s.logger.WithValues("cluster", cluster.Name). - V(3). - Info("Skip enqueue federated objects for cluster, cluster not joined") + logger.V(3).Info("Skip enqueue federated objects for cluster, cluster not joined") return } logger.V(2).Info("Enqueue federated objects for cluster") - fedObjects, err := s.fedObjectInformer.Lister().List(labels.Everything()) + hasPropagationPolicy, err := labels.NewRequirement(PropagationPolicyNameLabel, selection.Exists, []string{}) if err != nil { - s.logger.Error(err, "Failed to enqueue FederatedObjects for policy") + logger.Error(err, "Failed to generate label selector for federated objects") + return + } + hasClusterPropagationPolicy, err := labels.NewRequirement( + ClusterPropagationPolicyNameLabel, + selection.Exists, + []string{}, + ) + if err != nil { + logger.Error(err, "Failed to generate label selector for federated objects") + } + labelSelector := labels.NewSelector().Add(*hasPropagationPolicy).Add(*hasClusterPropagationPolicy) + + fedObjects, err := s.fedObjectInformer.Lister().List(labelSelector) + if err != nil { + logger.Error(err, "Failed to enqueue FederatedObjects for policy") return } for _, obj := range fedObjects { s.worker.Enqueue(common.NewQualifiedName(obj)) } - clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) + clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labelSelector) if err != nil { - s.logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") + logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") return } for _, obj := range clusterFedObjects { @@ -796,7 +819,7 @@ func (s *Scheduler) enqueueFederatedObjectsForFTC(ftc *fedcorev1a1.FederatedType allObjects := []fedcorev1a1.GenericFederatedObject{} fedObjects, err := s.fedObjectInformer.Lister().List(labels.Everything()) if err != nil { - s.logger.Error(err, "Failed to enquue FederatedObjects for policy") + logger.Error(err, "Failed to enqueue FederatedObjects for policy") return } for _, obj := range fedObjects { @@ -804,7 +827,7 @@ func (s *Scheduler) enqueueFederatedObjectsForFTC(ftc *fedcorev1a1.FederatedType } clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labels.Everything()) if err != nil { - s.logger.Error(err, "Failed to enquue ClusterFederatedObjects for policy") + logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") return } for _, obj := range clusterFedObjects { @@ -814,7 +837,7 @@ func (s *Scheduler) enqueueFederatedObjectsForFTC(ftc *fedcorev1a1.FederatedType for _, obj := range allObjects { sourceGVK, err := obj.GetSpec().GetTemplateGVK() if err != nil { - s.logger.Error(err, "Failed to get source GVK from FederatedObject, will not enqueue") + logger.Error(err, "Failed to get source GVK from FederatedObject, will not enqueue") continue } if sourceGVK == ftc.GetSourceTypeGVK() { From 1b642dbccea7707625d5648a77a9f16e2d6e6757 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 18:03:54 +0800 Subject: [PATCH 20/22] fix(scheduler): use label selectors for listing --- .../override/overridepolicy_controller.go | 2 +- pkg/controllers/scheduler/scheduler.go | 57 +++++++++---------- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/pkg/controllers/override/overridepolicy_controller.go b/pkg/controllers/override/overridepolicy_controller.go index 9d23c041..4c744196 100644 --- a/pkg/controllers/override/overridepolicy_controller.go +++ b/pkg/controllers/override/overridepolicy_controller.go @@ -234,7 +234,7 @@ func (c *Controller) enqueueFedObjectsUsingPolicy(policy fedcorev1a1.GenericOver func (c *Controller) reconcileOnClusterChange(cluster *fedcorev1a1.FederatedCluster) { logger := c.logger.WithValues("federated-cluster", cluster.GetName()) - logger.V(2).Info("observed a cluster change") + logger.V(2).Info("Observed a cluster change") opRequirement, _ := labels.NewRequirement(OverridePolicyNameLabel, selection.Exists, nil) copRequirement, _ := labels.NewRequirement(ClusterOverridePolicyNameLabel, selection.Exists, nil) diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index 1326fab2..a9093636 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -715,7 +715,7 @@ func (s *Scheduler) enqueueFederatedObjectsForPolicy(policy metav1.Object) { policyKey := common.NewQualifiedName(policyAccessor) logger := s.logger.WithValues("policy", policyKey.String()) - logger.V(2).Info("Enqueue FederatedObjects and ClusterFederatedObjects for policy") + logger.V(2).Info("Enqueue federated objects for policy") isPolicyNamespaced := len(policyKey.Namespace) > 0 @@ -778,36 +778,33 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder logger.V(2).Info("Enqueue federated objects for cluster") - hasPropagationPolicy, err := labels.NewRequirement(PropagationPolicyNameLabel, selection.Exists, []string{}) - if err != nil { - logger.Error(err, "Failed to generate label selector for federated objects") - return - } - hasClusterPropagationPolicy, err := labels.NewRequirement( - ClusterPropagationPolicyNameLabel, - selection.Exists, - []string{}, - ) - if err != nil { - logger.Error(err, "Failed to generate label selector for federated objects") - } - labelSelector := labels.NewSelector().Add(*hasPropagationPolicy).Add(*hasClusterPropagationPolicy) + for _, policyLabel := range []string{PropagationPolicyNameLabel, ClusterPropagationPolicyNameLabel} { + hasPolicy, err := labels.NewRequirement(policyLabel, selection.Exists, []string{}) + if err != nil { + logger.Error(err, "Failed to generate label selector for federated objects") + return + } + if err != nil { + logger.Error(err, "Failed to generate label selector for federated objects") + } + labelSelector := labels.NewSelector().Add(*hasPolicy) - fedObjects, err := s.fedObjectInformer.Lister().List(labelSelector) - if err != nil { - logger.Error(err, "Failed to enqueue FederatedObjects for policy") - return - } - for _, obj := range fedObjects { - s.worker.Enqueue(common.NewQualifiedName(obj)) - } - clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labelSelector) - if err != nil { - logger.Error(err, "Failed to enqueue ClusterFederatedObjects for policy") - return - } - for _, obj := range clusterFedObjects { - s.worker.Enqueue(common.NewQualifiedName(obj)) + fedObjects, err := s.fedObjectInformer.Lister().List(labelSelector) + if err != nil { + logger.Error(err, "Failed to enqueue FederatedObjects for cluster") + return + } + for _, obj := range fedObjects { + s.worker.Enqueue(common.NewQualifiedName(obj)) + } + clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labelSelector) + if err != nil { + logger.Error(err, "Failed to enqueue ClusterFederatedObjects for cluster") + return + } + for _, obj := range clusterFedObjects { + s.worker.Enqueue(common.NewQualifiedName(obj)) + } } } From 76e5baa0607f379798cf24fe41e3f9539dd7e791 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 20:11:35 +0800 Subject: [PATCH 21/22] fix(scheduler): fix extra error check and double reenque --- pkg/controllers/scheduler/scheduler.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index a9093636..46c5f96d 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -778,15 +778,13 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder logger.V(2).Info("Enqueue federated objects for cluster") + allObjects := sets.New[common.QualifiedName]() for _, policyLabel := range []string{PropagationPolicyNameLabel, ClusterPropagationPolicyNameLabel} { hasPolicy, err := labels.NewRequirement(policyLabel, selection.Exists, []string{}) if err != nil { logger.Error(err, "Failed to generate label selector for federated objects") return } - if err != nil { - logger.Error(err, "Failed to generate label selector for federated objects") - } labelSelector := labels.NewSelector().Add(*hasPolicy) fedObjects, err := s.fedObjectInformer.Lister().List(labelSelector) @@ -795,7 +793,7 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder return } for _, obj := range fedObjects { - s.worker.Enqueue(common.NewQualifiedName(obj)) + allObjects.Insert(common.NewQualifiedName(obj)) } clusterFedObjects, err := s.clusterFedObjectInformer.Lister().List(labelSelector) if err != nil { @@ -803,9 +801,13 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder return } for _, obj := range clusterFedObjects { - s.worker.Enqueue(common.NewQualifiedName(obj)) + allObjects.Insert(common.NewQualifiedName(obj)) } } + + for obj := range allObjects { + s.worker.Enqueue(obj) + } } func (s *Scheduler) enqueueFederatedObjectsForFTC(ftc *fedcorev1a1.FederatedTypeConfig) { From ab72f36093ac96854facc09a7ab191ad4e3cb7b9 Mon Sep 17 00:00:00 2001 From: "hawjia.lim" Date: Fri, 28 Jul 2023 20:36:05 +0800 Subject: [PATCH 22/22] chore(scheduler): fix formatting --- pkg/controllers/scheduler/scheduler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controllers/scheduler/scheduler.go b/pkg/controllers/scheduler/scheduler.go index 46c5f96d..4e180b53 100644 --- a/pkg/controllers/scheduler/scheduler.go +++ b/pkg/controllers/scheduler/scheduler.go @@ -805,7 +805,7 @@ func (s *Scheduler) enqueueFederatedObjectsForCluster(cluster *fedcorev1a1.Feder } } - for obj := range allObjects { + for obj := range allObjects { s.worker.Enqueue(obj) } }