diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go index e6ff41de6279..b9b5d6c32607 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go @@ -24,17 +24,21 @@ package mcm import ( "context" "fmt" + "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/eligibility" + "slices" "strconv" "strings" + "sync" "time" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" - "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -67,15 +71,16 @@ const ( // MCMCloudProvider implements the cloud provider interface for machine-controller-manager // Reference: https://github.com/gardener/machine-controller-manager type mcmCloudProvider struct { - mcmManager *McmManager - machinedeployments map[types.NamespacedName]*MachineDeployment - resourceLimiter *cloudprovider.ResourceLimiter + mcmManager *McmManager + resourceLimiter *cloudprovider.ResourceLimiter } +var _ cloudprovider.CloudProvider = (*mcmCloudProvider)(nil) + // BuildMcmCloudProvider builds CloudProvider implementation for machine-controller-manager. func BuildMcmCloudProvider(mcmManager *McmManager, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) { if mcmManager.discoveryOpts.StaticDiscoverySpecified() { - return buildStaticallyDiscoveringProvider(mcmManager, mcmManager.discoveryOpts.NodeGroupSpecs, resourceLimiter) + return buildStaticallyDiscoveringProvider(mcmManager, resourceLimiter) } return nil, fmt.Errorf("Failed to build an mcm cloud provider: Either node group specs or node group auto discovery spec must be specified") } @@ -96,55 +101,32 @@ func BuildMCM(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover return provider } -func buildStaticallyDiscoveringProvider(mcmManager *McmManager, specs []string, resourceLimiter *cloudprovider.ResourceLimiter) (*mcmCloudProvider, error) { +func buildStaticallyDiscoveringProvider(mcmManager *McmManager, resourceLimiter *cloudprovider.ResourceLimiter) (*mcmCloudProvider, error) { mcm := &mcmCloudProvider{ - mcmManager: mcmManager, - machinedeployments: make(map[types.NamespacedName]*MachineDeployment), - resourceLimiter: resourceLimiter, - } - for _, spec := range specs { - if err := mcm.addNodeGroup(spec); err != nil { - return nil, err - } + mcmManager: mcmManager, + resourceLimiter: resourceLimiter, } return mcm, nil } -// Cleanup stops the go routine that is handling the current view of the MachineDeployment in the form of a cache +// Cleanup stops the go routine that is handling the current view of the nodeGroup in the form of a cache func (mcm *mcmCloudProvider) Cleanup() error { mcm.mcmManager.Cleanup() return nil } -// addNodeGroup adds node group defined in string spec. Format: -// minNodes:maxNodes:namespace.machineDeploymentName -func (mcm *mcmCloudProvider) addNodeGroup(spec string) error { - machinedeployment, err := buildMachineDeploymentFromSpec(spec, mcm.mcmManager) - if err != nil { - return err - } - mcm.addMachineDeployment(machinedeployment) - return nil -} - -func (mcm *mcmCloudProvider) addMachineDeployment(machinedeployment *MachineDeployment) { - key := types.NamespacedName{Namespace: machinedeployment.Namespace, Name: machinedeployment.Name} - mcm.machinedeployments[key] = machinedeployment - return -} - func (mcm *mcmCloudProvider) Name() string { return "machine-controller-manager" } // NodeGroups returns all node groups configured for this cloud provider. func (mcm *mcmCloudProvider) NodeGroups() []cloudprovider.NodeGroup { - result := make([]cloudprovider.NodeGroup, 0, len(mcm.machinedeployments)) - for _, machinedeployment := range mcm.machinedeployments { - if machinedeployment.maxSize == 0 { + result := make([]cloudprovider.NodeGroup, 0, len(mcm.mcmManager.nodeGroups)) + for _, ng := range mcm.mcmManager.nodeGroups { + if ng.maxSize == 0 { continue } - result = append(result, machinedeployment) + result = append(result, ng) } return result } @@ -156,29 +138,29 @@ func (mcm *mcmCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.N return nil, nil } - ref, err := ReferenceFromProviderID(mcm.mcmManager, node.Spec.ProviderID) + mInfo, err := mcm.mcmManager.getMachineInfo(node) if err != nil { return nil, err } - if ref == nil { + if mInfo == nil { klog.V(4).Infof("Skipped node %v, it's either been removed or it's not managed by this controller", node.Spec.ProviderID) return nil, nil } - md, err := mcm.mcmManager.GetMachineDeploymentForMachine(ref) + ng, err := mcm.mcmManager.getNodeGroup(mInfo.Key) if err != nil { return nil, err } - key := types.NamespacedName{Namespace: md.Namespace, Name: md.Name} - _, isManaged := mcm.machinedeployments[key] + key := types.NamespacedName{Namespace: ng.Namespace, Name: ng.Name} + _, isManaged := mcm.mcmManager.nodeGroups[key] if !isManaged { klog.V(4).Infof("Skipped node %v, it's not managed by this controller", node.Spec.ProviderID) return nil, nil } - return md, nil + return ng, nil } // HasInstance returns whether a given node has a corresponding instance in this cloud provider @@ -248,112 +230,78 @@ func (mcm *mcmCloudProvider) GetNodeGpuConfig(*apiv1.Node) *cloudprovider.GpuCon return nil } -// Ref contains a reference to the name of the machine-deployment. -type Ref struct { - Name string - Namespace string -} - -// ReferenceFromProviderID extracts the Ref from providerId. It returns corresponding machine-name to providerid. -func ReferenceFromProviderID(m *McmManager, id string) (*Ref, error) { - machines, err := m.machineLister.Machines(m.namespace).List(labels.Everything()) - if err != nil { - return nil, fmt.Errorf("Could not list machines due to error: %s", err) - } - - var Name, Namespace string - for _, machine := range machines { - machineID := strings.Split(machine.Spec.ProviderID, "/") - nodeID := strings.Split(id, "/") - // If registered, the ID will match the cloudprovider instance ID. - // If unregistered, the ID will match the machine name. - if machineID[len(machineID)-1] == nodeID[len(nodeID)-1] || - nodeID[len(nodeID)-1] == machine.Name { - - Name = machine.Name - Namespace = machine.Namespace - break - } - } - - if Name == "" { - // Could not find any machine corresponds to node %+v", id - klog.V(4).Infof("No machine found for node ID %q", id) - return nil, nil - } - return &Ref{ - Name: Name, - Namespace: Namespace, - }, nil -} - -// MachineDeployment implements NodeGroup interface. -type MachineDeployment struct { - Ref +// nodeGroup implements NodeGroup interface. +type nodeGroup struct { + types.NamespacedName mcmManager *McmManager - minSize int - maxSize int + scalingMutex sync.Mutex + minSize int + maxSize int } +var _ cloudprovider.NodeGroup = (*nodeGroup)(nil) + // MaxSize returns maximum size of the node group. -func (machinedeployment *MachineDeployment) MaxSize() int { - return machinedeployment.maxSize +func (ngImpl *nodeGroup) MaxSize() int { + return ngImpl.maxSize } // MinSize returns minimum size of the node group. -func (machinedeployment *MachineDeployment) MinSize() int { - return machinedeployment.minSize +func (ngImpl *nodeGroup) MinSize() int { + return ngImpl.minSize } // TargetSize returns the current TARGET size of the node group. It is possible that the // number is different from the number of nodes registered in Kubernetes. -func (machinedeployment *MachineDeployment) TargetSize() (int, error) { - size, err := machinedeployment.mcmManager.GetMachineDeploymentSize(machinedeployment) +func (ngImpl *nodeGroup) TargetSize() (int, error) { + size, err := ngImpl.mcmManager.GetMachineDeploymentSize(ngImpl.Name) return int(size), err } // Exist checks if the node group really exists on the cloud provider side. Allows to tell the // theoretical node group from the real one. // TODO: Implement this to check if machine-deployment really exists. -func (machinedeployment *MachineDeployment) Exist() bool { +func (ngImpl *nodeGroup) Exist() bool { return true } // Create creates the node group on the cloud provider side. -func (machinedeployment *MachineDeployment) Create() (cloudprovider.NodeGroup, error) { +func (ngImpl *nodeGroup) Create() (cloudprovider.NodeGroup, error) { return nil, cloudprovider.ErrAlreadyExist } // Autoprovisioned returns true if the node group is autoprovisioned. -func (machinedeployment *MachineDeployment) Autoprovisioned() bool { +func (ngImpl *nodeGroup) Autoprovisioned() bool { return false } // Delete deletes the node group on the cloud provider side. // This will be executed only for autoprovisioned node groups, once their size drops to 0. -func (machinedeployment *MachineDeployment) Delete() error { +func (ngImpl *nodeGroup) Delete() error { return cloudprovider.ErrNotImplemented } // IncreaseSize of the Machinedeployment. -func (machinedeployment *MachineDeployment) IncreaseSize(delta int) error { - klog.V(0).Infof("Received request to increase size of machine deployment %s by %d", machinedeployment.Name, delta) +func (ngImpl *nodeGroup) IncreaseSize(delta int) error { + klog.V(0).Infof("Received request to increase size of machine deployment %s by %d", ngImpl.Name, delta) if delta <= 0 { return fmt.Errorf("size increase must be positive") } - size, err := machinedeployment.mcmManager.GetMachineDeploymentSize(machinedeployment) + release := ngImpl.AcquireScalingMutex(fmt.Sprintf("IncreaseSize by #%d", delta)) + defer release() + size, err := ngImpl.mcmManager.GetMachineDeploymentSize(ngImpl.Name) if err != nil { return err } targetSize := int(size) + delta - if targetSize > machinedeployment.MaxSize() { - return fmt.Errorf("size increase too large - desired:%d max:%d", targetSize, machinedeployment.MaxSize()) + if targetSize > ngImpl.MaxSize() { + return fmt.Errorf("size increase too large - desired:%d max:%d", targetSize, ngImpl.MaxSize()) } - return machinedeployment.mcmManager.retry(func(ctx context.Context) (bool, error) { - return machinedeployment.mcmManager.SetMachineDeploymentSize(ctx, machinedeployment, int64(targetSize)) - }, "MachineDeployment", "update", machinedeployment.Name) + return ngImpl.mcmManager.retry(func(ctx context.Context) (bool, error) { + return ngImpl.mcmManager.SetMachineDeploymentSize(ctx, ngImpl, int64(targetSize)) + }, "MachineDeployment", "update", ngImpl.Name) } // DecreaseTargetSize decreases the target size of the node group. This function @@ -361,75 +309,154 @@ func (machinedeployment *MachineDeployment) IncreaseSize(delta int) error { // request for new nodes that have not been yet fulfilled. Delta should be negative. // It is assumed that cloud provider will not delete the existing nodes if the size // when there is an option to just decrease the target. -func (machinedeployment *MachineDeployment) DecreaseTargetSize(delta int) error { - klog.V(0).Infof("Received request to decrease target size of machine deployment %s by %d", machinedeployment.Name, delta) +func (ngImpl *nodeGroup) DecreaseTargetSize(delta int) error { + klog.V(0).Infof("Received request to decrease target size of machine deployment %s by %d", ngImpl.Name, delta) if delta >= 0 { return fmt.Errorf("size decrease size must be negative") } - size, err := machinedeployment.mcmManager.GetMachineDeploymentSize(machinedeployment) + release := ngImpl.AcquireScalingMutex(fmt.Sprintf("DecreaseTargetSize by #%d", delta)) + defer release() + size, err := ngImpl.mcmManager.GetMachineDeploymentSize(ngImpl.Name) if err != nil { return err } decreaseAmount := int(size) + delta - if decreaseAmount < machinedeployment.minSize { - klog.Warningf("Cannot go below min size= %d for machineDeployment %s, requested target size= %d . Setting target size to min size", machinedeployment.minSize, machinedeployment.Name, size+int64(delta)) - decreaseAmount = machinedeployment.minSize + if decreaseAmount < ngImpl.minSize { + klog.Warningf("Cannot go below min size= %d for ngImpl %s, requested target size= %d . Setting target size to min size", ngImpl.minSize, ngImpl.Name, size+int64(delta)) + decreaseAmount = ngImpl.minSize } - return machinedeployment.mcmManager.retry(func(ctx context.Context) (bool, error) { - return machinedeployment.mcmManager.SetMachineDeploymentSize(ctx, machinedeployment, int64(decreaseAmount)) - }, "MachineDeployment", "update", machinedeployment.Name) + return ngImpl.mcmManager.retry(func(ctx context.Context) (bool, error) { + return ngImpl.mcmManager.SetMachineDeploymentSize(ctx, ngImpl, int64(decreaseAmount)) + }, "MachineDeployment", "update", ngImpl.Name) } -// Belongs returns true if the given node belongs to the NodeGroup. -// TODO: Implement this to iterate over machines under machinedeployment, and return true if node exists in list. -func (machinedeployment *MachineDeployment) Belongs(node *apiv1.Node) (bool, error) { - ref, err := ReferenceFromProviderID(machinedeployment.mcmManager, node.Spec.ProviderID) +// Refresh cordons the Nodes corresponding to the machines that have been marked for deletion in the TriggerDeletionByMCM annotation on the MachineDeployment +func (ngImpl *nodeGroup) Refresh() error { + mcd, err := ngImpl.mcmManager.GetMachineDeploymentObject(ngImpl.Name) if err != nil { - return false, err + return err } - targetMd, err := machinedeployment.mcmManager.GetMachineDeploymentForMachine(ref) + toBeDeletedMachineNames := getMachineNamesTriggeredForDeletion(mcd) + if len(toBeDeletedMachineNames) == 0 { + return nil + } + machinesOfNodeGroup, err := ngImpl.mcmManager.getMachinesForMachineDeployment(ngImpl.Name) if err != nil { - return false, err + klog.Warningf("NodeGroup.Refresh() of %q failed to get machines for MachineDeployment due to: %v", ngImpl.Name, err) + return nil + } + toBeDeletedMachines := filterMachinesMatchingNames(machinesOfNodeGroup, sets.New(toBeDeletedMachineNames...)) + if len(toBeDeletedMachines) == 0 { + klog.Warningf("NodeGroup.Refresh() of %q could not find Machine objects for toBeDeletedMachineNames %q", ngImpl.Name, toBeDeletedMachineNames) + return nil } - if targetMd == nil { - return false, fmt.Errorf("%s doesn't belong to a known MachinDeployment", node.Name) + toBeDeletedNodeNames := getNodeNamesFromMachines(toBeDeletedMachines) + if len(toBeDeletedNodeNames) == 0 { + klog.Warningf("NodeGroup.Refresh() of %q could not find toBeDeletedNodeNames for toBeDeletedMachineNames %q of MachineDeployment", ngImpl.Name, toBeDeletedMachineNames) + return nil } - if targetMd.Id() != machinedeployment.Id() { - return false, nil + err = ngImpl.mcmManager.cordonNodes(toBeDeletedNodeNames) + if err != nil { + // we do not return error since we don't want this to block CA operation. This is best-effort. + klog.Warningf("NodeGroup.Refresh() of %q ran into error cordoning nodes: %v", ngImpl.Name, err) } - return true, nil + return nil +} + +// belongs checks if the given node belongs to this NodeGroup and also returns its MachineInfo for its corresponding Machine +func (ngImpl *nodeGroup) belongs(node *apiv1.Node) (belongs bool, mInfo *machineInfo, err error) { + mInfo, err = ngImpl.mcmManager.getMachineInfo(node) + if err != nil || mInfo == nil { + return + } + targetNg, err := ngImpl.mcmManager.getNodeGroup(mInfo.Key) + if err != nil { + return + } + if targetNg == nil { + err = fmt.Errorf("%s doesn't belong to a known MachinDeployment", node.Name) + return + } + if targetNg.Id() == ngImpl.Id() { + belongs = true + } + return } // DeleteNodes deletes the nodes from the group. It is expected that this method will not be called // for nodes which are not part of ANY machine deployment. -func (machinedeployment *MachineDeployment) DeleteNodes(nodes []*apiv1.Node) error { - nodeNames := getNodeNames(nodes) - klog.V(0).Infof("Received request to delete nodes:- %v", nodeNames) - size, err := machinedeployment.mcmManager.GetMachineDeploymentSize(machinedeployment) +func (ngImpl *nodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + klog.V(0).Infof("for NodeGroup %q, Received request to delete nodes:- %v", ngImpl.Name, getNodeNames(nodes)) + size, err := ngImpl.mcmManager.GetMachineDeploymentSize(ngImpl.Name) if err != nil { return err } - if int(size) <= machinedeployment.MinSize() { + if int(size) <= ngImpl.MinSize() { return fmt.Errorf("min size reached, nodes will not be deleted") } - machines := make([]*Ref, 0, len(nodes)) + var toBeDeletedMachineInfos = make([]machineInfo, 0, len(nodes)) for _, node := range nodes { - belongs, err := machinedeployment.Belongs(node) + belongs, mInfo, err := ngImpl.belongs(node) if err != nil { return err } else if !belongs { - return fmt.Errorf("%s belongs to a different machinedeployment than %s", node.Name, machinedeployment.Id()) + return fmt.Errorf("%s belongs to a different MachineDeployment than %q", node.Name, ngImpl.Name) } - ref, err := ReferenceFromProviderID(machinedeployment.mcmManager, node.Spec.ProviderID) - if err != nil { - return fmt.Errorf("couldn't find the machine-name from provider-id %s", node.Spec.ProviderID) + if mInfo.FailedOrTerminating { + klog.V(3).Infof("for NodeGroup %q, Machine %q is already marked as terminating - skipping deletion", ngImpl.Name, mInfo.Key.Name) + continue + } + if eligibility.HasNoScaleDownAnnotation(node) { + klog.V(4).Infof("for NodeGroup %q, Node %q corresponding to Machine %q is marked with ScaleDownDisabledAnnotation %q - skipping deletion", ngImpl.Name, node.Name, mInfo.Key.Name, eligibility.ScaleDownDisabledKey) + continue } - machines = append(machines, ref) + toBeDeletedMachineInfos = append(toBeDeletedMachineInfos, *mInfo) + } + return ngImpl.deleteMachines(toBeDeletedMachineInfos) +} + +// deleteMachines annotates the corresponding MachineDeployment with machine names of toBeDeletedMachineInfos, reduces the desired replicas of the corresponding MachineDeployment and cordons corresponding nodes belonging to toBeDeletedMachineInfos +func (ngImpl *nodeGroup) deleteMachines(toBeDeletedMachineInfos []machineInfo) error { + numDeletionCandidates := len(toBeDeletedMachineInfos) + if numDeletionCandidates == 0 { + return nil + } + + release := ngImpl.AcquireScalingMutex(fmt.Sprintf("deleteMachines for %s", toBeDeletedMachineInfos)) + defer release() + + // get the machine deployment and return if rolling update is not finished + md, err := ngImpl.mcmManager.GetMachineDeploymentObject(ngImpl.Name) + if err != nil { + return err + } + if !isRollingUpdateFinished(md) { + return fmt.Errorf("MachineDeployment %s is under rolling update , cannot reduce replica count", ngImpl.Name) } - return machinedeployment.mcmManager.DeleteMachines(machines) + + // Trying to update the machineDeployment till the deadline + err = ngImpl.mcmManager.retry(func(ctx context.Context) (bool, error) { + return ngImpl.mcmManager.scaleDownMachineDeployment(ctx, ngImpl.Name, toBeDeletedMachineInfos) + }, "MachineDeployment", "update", ngImpl.Name) + if err != nil { + klog.Errorf("Unable to scale down MachineDeployment %s by %d and delete machines %q due to: %v", ngImpl.Name, numDeletionCandidates, toBeDeletedMachineInfos, err) + return fmt.Errorf("for NodeGroup %q, cannot scale down due to: %w", ngImpl.Name, err) + } + return nil } -func getNodeNames(nodes []*apiv1.Node) interface{} { +// AcquireScalingMutex acquires the scalingMutex associated with this NodeGroup and returns a function that releases the scalingMutex that is expected to be deferred by the caller. +func (ngImpl *nodeGroup) AcquireScalingMutex(operation string) (releaseFn func()) { + ngImpl.scalingMutex.Lock() + klog.V(3).Infof("%s has acquired scalingMutex of NodeGroup %q", operation, ngImpl.Name) + releaseFn = func() { + klog.V(3).Infof("%s is releasing scalingMutex of NodeGroup %q", operation, ngImpl.Name) + ngImpl.scalingMutex.Unlock() + } + return +} + +func getNodeNames(nodes []*apiv1.Node) []string { nodeNames := make([]string, 0, len(nodes)) for _, node := range nodes { nodeNames = append(nodeNames, node.Name) @@ -437,21 +464,32 @@ func getNodeNames(nodes []*apiv1.Node) interface{} { return nodeNames } -// Id returns machinedeployment id. -func (machinedeployment *MachineDeployment) Id() string { - return machinedeployment.Name +func getNodeNamesFromMachines(machines []*v1alpha1.Machine) []string { + var nodeNames []string + for _, m := range machines { + nodeName := m.Labels["node"] + if nodeName != "" { + nodeNames = append(nodeNames, nodeName) + } + } + return nodeNames +} + +// Id returns NodeGroup name +func (ngImpl *nodeGroup) Id() string { + return ngImpl.Name } // Debug returns a debug string for the Asg. -func (machinedeployment *MachineDeployment) Debug() string { - return fmt.Sprintf("%s (%d:%d)", machinedeployment.Id(), machinedeployment.MinSize(), machinedeployment.MaxSize()) +func (ngImpl *nodeGroup) Debug() string { + return fmt.Sprintf("%s (%d:%d)", ngImpl.Id(), ngImpl.MinSize(), ngImpl.MaxSize()) } // Nodes returns a list of all nodes that belong to this node group. -func (machinedeployment *MachineDeployment) Nodes() ([]cloudprovider.Instance, error) { - instances, err := machinedeployment.mcmManager.GetInstancesForMachineDeployment(machinedeployment) +func (ngImpl *nodeGroup) Nodes() ([]cloudprovider.Instance, error) { + instances, err := ngImpl.mcmManager.GetInstancesForMachineDeployment(ngImpl.Name) if err != nil { - return nil, fmt.Errorf("failed to get the cloudprovider.Instance for machines backed by the machinedeployment %q, error: %v", machinedeployment.Name, err) + return nil, fmt.Errorf("failed to get the cloudprovider.Instance for machines backed by the MachineDeployment %q, error: %v", ngImpl.Name, err) } erroneousInstanceInfos := make([]string, 0, len(instances)) for _, instance := range instances { @@ -468,9 +506,9 @@ func (machinedeployment *MachineDeployment) Nodes() ([]cloudprovider.Instance, e // GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular // NodeGroup. Returning a nil will result in using default options. // Implementation optional. -func (machinedeployment *MachineDeployment) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { +func (ngImpl *nodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { options := defaults - mcdAnnotations, err := machinedeployment.mcmManager.GetMachineDeploymentAnnotations(machinedeployment.Name) + mcdAnnotations, err := ngImpl.mcmManager.GetMachineDeploymentAnnotations(ngImpl.Name) if err != nil { return nil, err } @@ -504,49 +542,39 @@ func (machinedeployment *MachineDeployment) GetOptions(defaults config.NodeGroup } // TemplateNodeInfo returns a node template for this node group. -func (machinedeployment *MachineDeployment) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { +func (ngImpl *nodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { - nodeTemplate, err := machinedeployment.mcmManager.GetMachineDeploymentNodeTemplate(machinedeployment) + nodeTemplate, err := ngImpl.mcmManager.GetMachineDeploymentNodeTemplate(ngImpl.Name) if err != nil { return nil, err } - node, err := machinedeployment.mcmManager.buildNodeFromTemplate(machinedeployment.Name, nodeTemplate) + node, err := ngImpl.mcmManager.buildNodeFromTemplate(ngImpl.Name, nodeTemplate) if err != nil { return nil, err } - nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(machinedeployment.Name)) + nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(ngImpl.Name)) nodeInfo.SetNode(node) return nodeInfo, nil } // AtomicIncreaseSize is not implemented. -func (machinedeployment *MachineDeployment) AtomicIncreaseSize(delta int) error { +func (ngImpl *nodeGroup) AtomicIncreaseSize(delta int) error { return cloudprovider.ErrNotImplemented } -func buildMachineDeploymentFromSpec(value string, mcmManager *McmManager) (*MachineDeployment, error) { - spec, err := dynamic.SpecFromString(value, true) - - if err != nil { - return nil, fmt.Errorf("failed to parse node group spec: %v", err) +// getMachineNamesTriggeredForDeletion returns the set of machine names contained within the machineutils.TriggerDeletionByMCM annotation on the given MachineDeployment +// TODO: Move to using MCM annotations.GetMachineNamesTriggeredForDeletion after MCM release. +func getMachineNamesTriggeredForDeletion(mcd *v1alpha1.MachineDeployment) []string { + if mcd.Annotations == nil || mcd.Annotations[machineutils.TriggerDeletionByMCM] == "" { + return nil } - s := strings.Split(spec.Name, ".") - Namespace, Name := s[0], s[1] - - machinedeployment := buildMachineDeployment(mcmManager, spec.MinSize, spec.MaxSize, Namespace, Name) - return machinedeployment, nil + return strings.Split(mcd.Annotations[machineutils.TriggerDeletionByMCM], ",") } -func buildMachineDeployment(mcmManager *McmManager, minSize int, maxSize int, namespace string, name string) *MachineDeployment { - return &MachineDeployment{ - mcmManager: mcmManager, - minSize: minSize, - maxSize: maxSize, - Ref: Ref{ - Name: name, - Namespace: namespace, - }, - } +// TODO: Move to using MCM annotations.CreateMachinesTriggeredForDeletionAnnotValue after MCM release +func createMachinesTriggeredForDeletionAnnotValue(machineNames []string) string { + slices.Sort(machineNames) + return strings.Join(machineNames, ",") } diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go index 6752c2497852..e19c20a748fd 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go @@ -8,6 +8,7 @@ import ( "context" "errors" "fmt" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" "math" "strings" "testing" @@ -85,10 +86,11 @@ func TestDeleteNodes(t *testing.T) { node *corev1.Node } type expect struct { - machines []*v1alpha1.Machine - mdName string - mdReplicas int32 - err error + prio1Machines []*v1alpha1.Machine + mdName string + mdReplicas int32 + machinesTriggerDeletionAnnotationValue string + err error } type data struct { name string @@ -100,42 +102,44 @@ func TestDeleteNodes(t *testing.T) { { "should scale down machine deployment to remove a node", setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{false, false}), + nodes: newNodes(2, "fakeID"), + machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}), machineSets: newMachineSets(1, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), nodeGroups: []string{nodeGroup1}, }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - mdName: "machinedeployment-1", - mdReplicas: 1, - err: nil, + prio1Machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}), + mdName: "machinedeployment-1", + machinesTriggerDeletionAnnotationValue: createMachinesTriggeredForDeletionAnnotValue(generateNames("machine", 1)), + mdReplicas: 1, + err: nil, }, }, { "should scale down machine deployment to remove a placeholder node", setup{ nodes: nil, - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}, []bool{false}), + machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}), machineSets: newMachineSets(1, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), nodeGroups: []string{nodeGroup2}, }, - action{node: newNode("node-1", "requested://machine-1", true)}, + action{node: newNode("node-1", "requested://machine-1")}, expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - mdName: "machinedeployment-1", - mdReplicas: 0, - err: nil, + prio1Machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}), + machinesTriggerDeletionAnnotationValue: createMachinesTriggeredForDeletionAnnotValue(generateNames("machine", 1)), + mdName: "machinedeployment-1", + mdReplicas: 0, + err: nil, }, }, { "should not scale down a machine deployment when it is under rolling update", setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{false, false}), + nodes: newNodes(2, "fakeID"), + machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}), machineSets: newMachineSets(2, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 2, &v1alpha1.MachineDeploymentStatus{ Conditions: []v1alpha1.MachineDeploymentCondition{ @@ -144,19 +148,19 @@ func TestDeleteNodes(t *testing.T) { }, nil, nil), nodeGroups: []string{nodeGroup1}, }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: nil, - mdName: "machinedeployment-1", - mdReplicas: 2, - err: fmt.Errorf("MachineDeployment machinedeployment-1 is under rolling update , cannot reduce replica count"), + prio1Machines: nil, + mdName: "machinedeployment-1", + mdReplicas: 2, + err: fmt.Errorf("MachineDeployment machinedeployment-1 is under rolling update , cannot reduce replica count"), }, }, { - "should not scale down when machine deployment update call times out and should reset priority of the corresponding machine", + "should not scale down when machine deployment update call times out", setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{false, false}), + nodes: newNodes(2, "fakeID"), + machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}), machineSets: newMachineSets(1, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), nodeGroups: []string{nodeGroup1}, @@ -166,19 +170,18 @@ func TestDeleteNodes(t *testing.T) { }, }, }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}, []bool{false}), mdName: "machinedeployment-1", mdReplicas: 2, - err: errors.Join(nil, fmt.Errorf("unable to scale in machine deployment machinedeployment-1, Error: %w", errors.New(mdUpdateErrorMsg))), + err: fmt.Errorf("for NodeGroup %q, cannot scale down due to: %w", "machinedeployment-1", errors.New(mdUpdateErrorMsg)), }, }, { "should scale down when machine deployment update call fails but passes within the timeout period", setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{false, false}), + nodes: newNodes(2, "fakeID"), + machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}), machineSets: newMachineSets(1, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), nodeGroups: []string{nodeGroup1}, @@ -188,26 +191,26 @@ func TestDeleteNodes(t *testing.T) { }, }, }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - mdName: "machinedeployment-1", - mdReplicas: 1, - err: nil, + prio1Machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}), + machinesTriggerDeletionAnnotationValue: createMachinesTriggeredForDeletionAnnotValue(generateNames("machine", 1)), + mdName: "machinedeployment-1", + mdReplicas: 1, + err: nil, }, }, { "should not scale down a machine deployment when the corresponding machine is already in terminating state", setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{true, false}), + nodes: newNodes(2, "fakeID"), + machines: newMachines(2, "fakeID", &v1alpha1.MachineStatus{CurrentStatus: v1alpha1.CurrentStatus{Phase: v1alpha1.MachineTerminating}}, "machinedeployment-1", "machineset-1", []string{"3", "3"}), machineSets: newMachineSets(1, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), nodeGroups: []string{nodeGroup1}, }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}, []bool{true}), mdName: "machinedeployment-1", mdReplicas: 2, err: nil, @@ -216,15 +219,14 @@ func TestDeleteNodes(t *testing.T) { { "should not scale down a machine deployment when the corresponding machine is already in failed state", setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", &v1alpha1.MachineStatus{CurrentStatus: v1alpha1.CurrentStatus{Phase: v1alpha1.MachineFailed}}, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{false, false}), + nodes: newNodes(2, "fakeID"), + machines: newMachines(2, "fakeID", &v1alpha1.MachineStatus{CurrentStatus: v1alpha1.CurrentStatus{Phase: v1alpha1.MachineFailed}}, "machinedeployment-1", "machineset-1", []string{"3", "3"}), machineSets: newMachineSets(1, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), nodeGroups: []string{nodeGroup1}, }, - action{node: newNodes(1, "fakeID", []bool{false})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: newMachines(2, "fakeID", &v1alpha1.MachineStatus{CurrentStatus: v1alpha1.CurrentStatus{Phase: v1alpha1.MachineFailed}}, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{false, false}), mdName: "machinedeployment-1", mdReplicas: 2, err: nil, @@ -233,57 +235,35 @@ func TestDeleteNodes(t *testing.T) { { "should not scale down a machine deployment below the minimum", setup{ - nodes: newNodes(1, "fakeID", []bool{true}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}, []bool{false}), + nodes: newNodes(1, "fakeID"), + machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}), machineSets: newMachineSets(1, "machinedeployment-1"), machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), nodeGroups: []string{nodeGroup1}, }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: nil, - mdName: "machinedeployment-1", - mdReplicas: 1, - err: fmt.Errorf("min size reached, nodes will not be deleted"), - }, - }, - { - "no scale down of machine deployment if priority of the targeted machine cannot be updated to 1", - setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3"}, []bool{false, false}), - machineSets: newMachineSets(1, "machinedeployment-1"), - machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), - nodeGroups: []string{nodeGroup1}, - controlMachineFakeResourceActions: &customfake.ResourceActions{ - Machine: customfake.Actions{ - Update: customfake.CreateFakeResponse(math.MaxInt32, mcUpdateErrorMsg, 0), - }, - }, - }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, - expect{ - machines: nil, - mdName: "machinedeployment-1", - mdReplicas: 2, - err: fmt.Errorf("could not prioritize machine machine-1 for deletion, aborting scale in of machine deployment, Error: %s", mcUpdateErrorMsg), + prio1Machines: nil, + mdName: "machinedeployment-1", + mdReplicas: 1, + err: fmt.Errorf("min size reached, nodes will not be deleted"), }, }, { "should not scale down machine deployment if the node belongs to another machine deployment", setup{ - nodes: newNodes(2, "fakeID", []bool{true, false}), - machines: newMachines(2, "fakeID", nil, "machinedeployment-2", "machineset-1", []string{"3", "3"}, []bool{false, false}), + nodes: newNodes(2, "fakeID"), + machines: newMachines(2, "fakeID", nil, "machinedeployment-2", "machineset-1", []string{"3", "3"}), machineSets: newMachineSets(1, "machinedeployment-2"), machineDeployments: newMachineDeployments(2, 2, nil, nil, nil), nodeGroups: []string{nodeGroup2, nodeGroup3}, }, - action{node: newNodes(1, "fakeID", []bool{true})[0]}, + action{node: newNodes(1, "fakeID")[0]}, expect{ - machines: nil, - mdName: "machinedeployment-2", - mdReplicas: 2, - err: fmt.Errorf("node-1 belongs to a different machinedeployment than machinedeployment-1"), + prio1Machines: nil, + mdName: "machinedeployment-2", + mdReplicas: 2, + err: fmt.Errorf("node-1 belongs to a different MachineDeployment than %q", "machinedeployment-1"), }, }, } @@ -296,7 +276,7 @@ func TestDeleteNodes(t *testing.T) { stop := make(chan struct{}) defer close(stop) controlMachineObjects, targetCoreObjects, _ := setupEnv(&entry.setup) - m, trackers, hasSyncedCacheFns := createMcmManager(t, stop, testNamespace, nil, controlMachineObjects, targetCoreObjects, nil) + m, trackers, hasSyncedCacheFns := createMcmManager(t, stop, testNamespace, entry.setup.nodeGroups, controlMachineObjects, targetCoreObjects, nil) defer trackers.Stop() waitForCacheSync(t, stop, hasSyncedCacheFns) @@ -307,7 +287,7 @@ func TestDeleteNodes(t *testing.T) { trackers.ControlMachine.SetFailAtFakeResourceActions(entry.setup.controlMachineFakeResourceActions) } - md, err := buildMachineDeploymentFromSpec(entry.setup.nodeGroups[0], m) + md, err := buildNodeGroupFromSpec(entry.setup.nodeGroups[0], m) g.Expect(err).To(BeNil()) err = md.DeleteNodes([]*corev1.Node{entry.action.node}) @@ -321,34 +301,46 @@ func TestDeleteNodes(t *testing.T) { machineDeployment, err := m.machineClient.MachineDeployments(m.namespace).Get(context.TODO(), entry.expect.mdName, metav1.GetOptions{}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(machineDeployment.Spec.Replicas).To(BeNumerically("==", entry.expect.mdReplicas)) + g.Expect(machineDeployment.Annotations[machineutils.TriggerDeletionByMCM]).To(Equal(entry.expect.machinesTriggerDeletionAnnotationValue)) - machines, err := m.machineClient.Machines(m.namespace).List(context.TODO(), metav1.ListOptions{ - LabelSelector: metav1.FormatLabelSelector(&metav1.LabelSelector{ - MatchLabels: map[string]string{"name": md.Name}, - }), - }) - - for _, machine := range machines.Items { - flag := false - for _, entryMachineItem := range entry.expect.machines { - if entryMachineItem.Name == machine.Name { - g.Expect(machine.Annotations[machinePriorityAnnotation]).To(Equal(entryMachineItem.Annotations[machinePriorityAnnotation])) - flag = true - break - } - } - if !flag { - g.Expect(machine.Annotations[machinePriorityAnnotation]).To(Equal("3")) - } - } }) } } +func TestIdempotencyOfDeleteNodes(t *testing.T) { + setupObj := setup{ + nodes: newNodes(3, "fakeID"), + machines: newMachines(3, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3", "3", "3"}), + machineSets: newMachineSets(1, "machinedeployment-1"), + machineDeployments: newMachineDeployments(1, 3, nil, nil, nil), + nodeGroups: []string{nodeGroup1}, + } + g := NewWithT(t) + stop := make(chan struct{}) + defer close(stop) + controlMachineObjects, targetCoreObjects, _ := setupEnv(&setupObj) + m, trackers, hasSyncedCacheFns := createMcmManager(t, stop, testNamespace, setupObj.nodeGroups, controlMachineObjects, targetCoreObjects, nil) + defer trackers.Stop() + waitForCacheSync(t, stop, hasSyncedCacheFns) + md, err := buildNodeGroupFromSpec(setupObj.nodeGroups[0], m) + g.Expect(err).To(BeNil()) + + err = md.DeleteNodes(newNodes(1, "fakeID")) + g.Expect(err).To(BeNil()) + err = md.DeleteNodes(newNodes(1, "fakeID")) + g.Expect(err).To(BeNil()) + + machineDeployment, err := m.machineClient.MachineDeployments(m.namespace).Get(context.TODO(), setupObj.machineDeployments[0].Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(machineDeployment.Spec.Replicas).To(BeNumerically("==", 2)) + g.Expect(machineDeployment.Annotations[machineutils.TriggerDeletionByMCM]).To(Equal(createMachinesTriggeredForDeletionAnnotValue(generateNames("machine", 1)))) +} + func TestRefresh(t *testing.T) { type expect struct { - machines []*v1alpha1.Machine - err error + prio3Machines []string + machinesTriggerDeletionAnnotationValue string + err error } type data struct { name string @@ -359,8 +351,8 @@ func TestRefresh(t *testing.T) { { "should return an error if MCM has zero available replicas", setup{ - nodes: newNodes(1, "fakeID", []bool{false}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), + nodes: newNodes(1, "fakeID"), + machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}), machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), nodeGroups: []string{nodeGroup2}, mcmDeployment: newMCMDeployment(0), @@ -372,8 +364,8 @@ func TestRefresh(t *testing.T) { { "should return an error if MCM deployment is not found", setup{ - nodes: newNodes(1, "fakeID", []bool{false}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), + nodes: newNodes(1, "fakeID"), + machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}), machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), nodeGroups: []string{nodeGroup2}, }, @@ -382,100 +374,31 @@ func TestRefresh(t *testing.T) { }, }, { - "should reset priority of a machine to 3 if machine deployment is not scaled in", + "should reset priority of a machine if it is not present in trigger deletion annotation on machine deployment", setup{ - nodes: newNodes(1, "fakeID", []bool{false}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), + nodes: newNodes(1, "fakeID"), + machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}), machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), nodeGroups: []string{nodeGroup2}, mcmDeployment: newMCMDeployment(1), }, expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}, []bool{false}), - err: nil, + prio3Machines: generateNames("machine", 1), + err: nil, }, }, { - "should reset priority of a machine to 3 if machine deployment is not scaled in even if ToBeDeletedTaint is present on the corresponding node", + "should update the trigger deletion annotation and remove non-existing machines", setup{ - nodes: newNodes(1, "fakeID", []bool{true}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), + nodes: newNodes(1, "fakeID"), + machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}), + machineDeployments: newMachineDeployments(1, 0, nil, map[string]string{machineutils.TriggerDeletionByMCM: "machine-1,machine-2"}, nil), nodeGroups: []string{nodeGroup2}, mcmDeployment: newMCMDeployment(1), }, expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}, []bool{false}), - err: nil, - }, - }, - { - "should NOT skip paused machine deployment", - setup{ - nodes: newNodes(1, "fakeID", []bool{false}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - machineDeployments: newMachineDeployments(1, 1, &v1alpha1.MachineDeploymentStatus{ - Conditions: []v1alpha1.MachineDeploymentCondition{ - {Type: v1alpha1.MachineDeploymentProgressing, Status: v1alpha1.ConditionUnknown, Reason: machineDeploymentPausedReason}, - }, - }, nil, nil), - nodeGroups: []string{nodeGroup2}, - mcmDeployment: newMCMDeployment(1), - }, - expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"3"}, []bool{false}), - err: nil, - }, - }, - { - "should ignore terminating/failed machines in checking if number of annotated machines is more than desired", - setup{ - nodes: newNodes(1, "fakeID", []bool{true}), - machines: newMachines(1, "fakeID", &v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{Phase: v1alpha1.MachineFailed}, - }, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), - nodeGroups: []string{nodeGroup2}, - mcmDeployment: newMCMDeployment(1), - }, - expect{ - machines: newMachines(1, "fakeID", &v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{Phase: v1alpha1.MachineFailed}, - }, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - err: nil, - }, - }, - { - "should not reset priority of a machine to 3 if machine deployment is scaled in", - setup{ - nodes: newNodes(1, "fakeID", []bool{true}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - machineDeployments: newMachineDeployments(1, 0, nil, nil, nil), - nodeGroups: []string{nodeGroup2}, - mcmDeployment: newMCMDeployment(1), - }, - expect{ - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - err: nil, - }, - }, - { - "priority reset of machine fails", - setup{ - nodes: newNodes(1, "fakeID", []bool{false}), - machines: newMachines(1, "fakeID", nil, "machinedeployment-1", "machineset-1", []string{"1"}, []bool{false}), - machineDeployments: newMachineDeployments(1, 1, nil, nil, nil), - controlMachineFakeResourceActions: &customfake.ResourceActions{ - Machine: customfake.Actions{ - Update: customfake.CreateFakeResponse(math.MaxInt32, mcUpdateErrorMsg, 0), - }, - }, - nodeGroups: []string{nodeGroup2}, - mcmDeployment: newMCMDeployment(1), - }, - expect{ - machines: []*v1alpha1.Machine{newMachine("machine-1", "fakeID-1", nil, "machinedeployment-1", "machineset-1", "1", false, true)}, - err: errors.Join(nil, errors.Join(fmt.Errorf("could not reset priority annotation on machine machine-1, Error: %v", mcUpdateErrorMsg))), + machinesTriggerDeletionAnnotationValue: createMachinesTriggeredForDeletionAnnotValue(generateNames("machine", 1)), + err: nil, }, }, } @@ -505,11 +428,6 @@ func TestRefresh(t *testing.T) { } else { g.Expect(err).To(BeNil()) } - for _, mc := range entry.expect.machines { - machine, err := m.machineClient.Machines(m.namespace).Get(context.TODO(), mc.Name, metav1.GetOptions{}) - g.Expect(err).To(BeNil()) - g.Expect(mc.Annotations[machinePriorityAnnotation]).To(Equal(machine.Annotations[machinePriorityAnnotation])) - } }) } } @@ -554,14 +472,14 @@ func TestNodes(t *testing.T) { { "Correct instances should be returned for machine objects under the machinedeployment", setup{ - nodes: []*corev1.Node{newNode("node-1", "fakeID-1", false)}, + nodes: []*corev1.Node{newNode("node-1", "fakeID-1")}, machines: func() []*v1alpha1.Machine { allMachines := make([]*v1alpha1.Machine, 0, 5) - allMachines = append(allMachines, newMachine("machine-with-registered-node", "fakeID-1", nil, "machinedeployment-1", "", "", false, true)) - allMachines = append(allMachines, newMachine("machine-with-vm-but-no-node", "fakeID-2", nil, "machinedeployment-1", "", "", false, false)) - allMachines = append(allMachines, newMachine("machine-with-vm-creating", "", nil, "machinedeployment-1", "", "", false, false)) - allMachines = append(allMachines, newMachine("machine-with-vm-create-error-out-of-quota", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.ResourceExhausted.String(), Description: outOfQuotaMachineStatusErrorDescription}}, "machinedeployment-1", "", "", false, false)) - allMachines = append(allMachines, newMachine("machine-with-vm-create-error-invalid-credentials", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.Internal.String(), Description: invalidCredentialsMachineStatusErrorDescription}}, "machinedeployment-1", "", "", false, false)) + allMachines = append(allMachines, newMachine("machine-with-registered-node", "fakeID-1", nil, "machinedeployment-1", "", "", true)) + allMachines = append(allMachines, newMachine("machine-with-vm-but-no-node", "fakeID-2", nil, "machinedeployment-1", "", "", false)) + allMachines = append(allMachines, newMachine("machine-with-vm-creating", "", nil, "machinedeployment-1", "", "", false)) + allMachines = append(allMachines, newMachine("machine-with-vm-create-error-out-of-quota", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.ResourceExhausted.String(), Description: outOfQuotaMachineStatusErrorDescription}}, "machinedeployment-1", "", "", false)) + allMachines = append(allMachines, newMachine("machine-with-vm-create-error-invalid-credentials", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.Internal.String(), Description: invalidCredentialsMachineStatusErrorDescription}}, "machinedeployment-1", "", "", false)) return allMachines }(), machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), @@ -599,7 +517,7 @@ func TestNodes(t *testing.T) { trackers.ControlMachine.SetFailAtFakeResourceActions(entry.setup.controlMachineFakeResourceActions) } - md, err := buildMachineDeploymentFromSpec(entry.setup.nodeGroups[0], m) + md, err := buildNodeGroupFromSpec(entry.setup.nodeGroups[0], m) g.Expect(err).To(BeNil()) returnedInstances, err := md.Nodes() @@ -663,7 +581,7 @@ func TestGetOptions(t *testing.T) { nodeGroups: []string{nodeGroup1}, }, expect{ - err: fmt.Errorf("unable to fetch MachineDeployment object machinedeployment-1, Error: machinedeployment.machine.sapcloud.io \"machinedeployment-1\" not found"), + err: fmt.Errorf("unable to fetch MachineDeployment object \"machinedeployment-1\", Error: machinedeployment.machine.sapcloud.io \"machinedeployment-1\" not found"), }, }, { @@ -751,7 +669,7 @@ func TestGetOptions(t *testing.T) { defer trackers.Stop() waitForCacheSync(t, stop, hasSyncedCacheFns) - md, err := buildMachineDeploymentFromSpec(entry.setup.nodeGroups[0], m) + md, err := buildNodeGroupFromSpec(entry.setup.nodeGroups[0], m) g.Expect(err).To(BeNil()) options, err := md.GetOptions(ngAutoScalingOpDefaults) diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go index 399862c9e383..00b021352e12 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go @@ -27,6 +27,12 @@ import ( "errors" "flag" "fmt" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" + "k8s.io/apimachinery/pkg/types" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/eligibility" v1appslister "k8s.io/client-go/listers/apps/v1" "k8s.io/utils/pointer" "maps" @@ -36,6 +42,7 @@ import ( "slices" "strconv" "strings" + "sync" "time" awsapis "github.com/gardener/machine-controller-manager-provider-aws/pkg/aws/apis" @@ -56,18 +63,19 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" - "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" "k8s.io/client-go/discovery" appsinformers "k8s.io/client-go/informers" coreinformers "k8s.io/client-go/informers" + corev1 "k8s.io/client-go/kubernetes/typed/core/v1" corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2" kubeletapis "k8s.io/kubelet/pkg/apis" + // "github.com/gardener/machine-controller-manager/pkg/util/provider/" ) const ( @@ -77,11 +85,9 @@ const ( defaultResetAnnotationTimeout = 10 * time.Second // defaultPriorityValue is the default value for the priority annotation used by CA. It is set to 3 because MCM defaults the priority of machine it creates to 3. defaultPriorityValue = "3" - // priorityValueForCandidateMachines is the priority annotation value set on machines that the CA wants to be deleted. Its value is set to 1. - priorityValueForCandidateMachines = "1" - minResyncPeriodDefault = 1 * time.Hour - // machinePriorityAnnotation is the annotation to set machine priority while deletion - machinePriorityAnnotation = "machinepriority.machine.sapcloud.io" + // priorityValueForDeletionCandidateMachines is the priority annotation value set on machines that the CA wants to be deleted. Its value is set to 1. + priorityValueForDeletionCandidateMachines = "1" + minResyncPeriodDefault = 1 * time.Hour // kindMachineClass is the kind for generic machine class used by the OOT providers kindMachineClass = "MachineClass" // providerAWS is the provider type for AWS machine class objects @@ -98,6 +104,8 @@ const ( machineDeploymentPausedReason = "DeploymentPaused" // machineDeploymentNameLabel key for Machine Deployment name in machine labels machineDeploymentNameLabel = "name" + // poolNameLabel is the name of the label for gardener worker pool + poolNameLabel = "worker.gardener.cloud/pool" ) var ( @@ -124,6 +132,7 @@ type McmManager struct { namespace string interrupt chan struct{} discoveryOpts cloudprovider.NodeGroupDiscoveryOptions + nodeGroups map[types.NamespacedName]*nodeGroup deploymentLister v1appslister.DeploymentLister machineClient machineapi.MachineV1alpha1Interface machineDeploymentLister machinelisters.MachineDeploymentLister @@ -131,6 +140,7 @@ type McmManager struct { machineLister machinelisters.MachineLister machineClassLister machinelisters.MachineClassLister nodeLister corelisters.NodeLister + nodeInterface corev1.NodeInterface maxRetryTimeout time.Duration retryInterval time.Duration } @@ -154,6 +164,22 @@ type nodeTemplate struct { Taints []apiv1.Taint } +type machineInfo struct { + Key types.NamespacedName + NodeName string + FailedOrTerminating bool +} + +func (m machineInfo) String() string { + return fmt.Sprintf("(%s|%s)", m.Key, m.NodeName) +} + +type scaleDownData struct { + RevisedToBeDeletedMachineNames sets.Set[string] + RevisedScaledownAmount int + RevisedMachineDeployment *v1alpha1.MachineDeployment +} + func init() { controlBurst = flag.Int("control-apiserver-burst", rest.DefaultBurst, "Throttling burst configuration for the client to control cluster's apiserver.") controlQPS = flag.Float64("control-apiserver-qps", float64(rest.DefaultQPS), "Throttling QPS configuration for the client to control cluster's apiserver.") @@ -237,8 +263,9 @@ func createMCMManagerInternal(discoveryOpts cloudprovider.NodeGroupDiscoveryOpti targetCoreClientBuilder := ClientBuilder{ ClientConfig: targetKubeconfig, } + targetCoreClient := targetCoreClientBuilder.ClientOrDie("target-core-shared-informers") targetCoreInformerFactory := coreinformers.NewSharedInformerFactory( - targetCoreClientBuilder.ClientOrDie("target-core-shared-informers"), + targetCoreClient, *minResyncPeriod, ) @@ -249,6 +276,7 @@ func createMCMManagerInternal(discoveryOpts cloudprovider.NodeGroupDiscoveryOpti m := &McmManager{ namespace: namespace, interrupt: make(chan struct{}), + nodeGroups: make(map[types.NamespacedName]*nodeGroup), deploymentLister: deploymentLister, machineClient: controlMachineClient, machineClassLister: machineClassLister, @@ -256,11 +284,15 @@ func createMCMManagerInternal(discoveryOpts cloudprovider.NodeGroupDiscoveryOpti machineSetLister: machineSharedInformers.MachineSets().Lister(), machineDeploymentLister: machineSharedInformers.MachineDeployments().Lister(), nodeLister: coreSharedInformers.Nodes().Lister(), + nodeInterface: targetCoreClient.CoreV1().Nodes(), discoveryOpts: discoveryOpts, maxRetryTimeout: maxRetryTimeout, retryInterval: retryInterval, } - + err = m.generateMachineDeploymentMap() + if err != nil { + return nil, err + } targetCoreInformerFactory.Start(m.interrupt) controlMachineInformerFactory.Start(m.interrupt) appsInformerFactory.Start(m.interrupt) @@ -283,6 +315,27 @@ func createMCMManagerInternal(discoveryOpts cloudprovider.NodeGroupDiscoveryOpti return nil, fmt.Errorf("Unable to start cloud provider MCM for cluster autoscaler: API GroupVersion %q or %q or %q is not available; \nFound: %#v", machineGVR, machineSetGVR, machineDeploymentGVR, availableResources) } +func (m *McmManager) generateMachineDeploymentMap() error { + for _, spec := range m.discoveryOpts.NodeGroupSpecs { + if err := m.addNodeGroup(spec); err != nil { + return err + } + } + return nil +} + +// addNodeGroup adds node group defined in string spec. Format: +// minNodes:maxNodes:namespace.machineDeploymentName +func (m *McmManager) addNodeGroup(spec string) error { + ng, err := buildNodeGroupFromSpec(spec, m) + if err != nil { + return err + } + key := types.NamespacedName{Namespace: ng.Namespace, Name: ng.Name} + m.nodeGroups[key] = ng + return nil +} + // TODO: In general, any controller checking this needs to be dynamic so // users don't have to restart their controller manager if they change the apiserver. // Until we get there, the structure here needs to be exposed for the construction of a proper ControllerContext. @@ -342,119 +395,56 @@ func CreateMcmManager(discoveryOpts cloudprovider.NodeGroupDiscoveryOptions) (*M return createMCMManagerInternal(discoveryOpts, defaultRetryInterval, defaultMaxRetryTimeout) } -// GetMachineDeploymentForMachine returns the MachineDeployment for the Machine object. -func (m *McmManager) GetMachineDeploymentForMachine(machine *Ref) (*MachineDeployment, error) { - if machine.Name == "" { +// getNodeGroup returns the NodeGroup for the given fully-qualified machine name. +func (m *McmManager) getNodeGroup(machineKey types.NamespacedName) (*nodeGroup, error) { + if machineKey.Name == "" { // Considering the possibility when Machine has been deleted but due to cached Node object it appears here. - return nil, fmt.Errorf("Node does not Exists") + return nil, fmt.Errorf("node does not Exists") } - machineObject, err := m.machineLister.Machines(m.namespace).Get(machine.Name) + machineObject, err := m.machineLister.Machines(m.namespace).Get(machineKey.Name) if err != nil { if kube_errors.IsNotFound(err) { // Machine has been removed. klog.V(4).Infof("Machine was removed before it could be retrieved: %v", err) return nil, nil } - return nil, fmt.Errorf("Unable to fetch Machine object %s %v", machine.Name, err) + return nil, fmt.Errorf("unable to fetch Machine object for given Machine name %q due to %w", machineKey.Name, err) } var machineSetName, machineDeploymentName string if len(machineObject.OwnerReferences) > 0 { machineSetName = machineObject.OwnerReferences[0].Name } else { - return nil, fmt.Errorf("Unable to find parent MachineSet of given Machine object %s %v", machine.Name, err) + return nil, fmt.Errorf("unable to find parent MachineSet for given Machine name %q due to: %w", machineKey.Name, err) } machineSetObject, err := m.machineSetLister.MachineSets(m.namespace).Get(machineSetName) if err != nil { - return nil, fmt.Errorf("Unable to fetch MachineSet object %s %v", machineSetName, err) + return nil, fmt.Errorf("unable to fetch MachineSet object for name %q due to: %w", machineSetName, err) } if len(machineSetObject.OwnerReferences) > 0 { machineDeploymentName = machineSetObject.OwnerReferences[0].Name } else { - return nil, fmt.Errorf("unable to find parent MachineDeployment of given MachineSet object %s %v", machineSetName, err) - } - - mcmRef := Ref{ - Name: machineDeploymentName, - Namespace: m.namespace, + return nil, fmt.Errorf("unable to find parent MachineDeployment of given MachineSet name %q due to: %w", machineSetName, err) } - discoveryOpts := m.discoveryOpts - specs := discoveryOpts.NodeGroupSpecs - var min, max int - for _, spec := range specs { - s, err := dynamic.SpecFromString(spec, true) - if err != nil { - return nil, fmt.Errorf("Error occurred while parsing the spec") - } - - str := strings.Split(s.Name, ".") - _, Name := str[0], str[1] - - if Name == machineDeploymentName { - min = s.MinSize - max = s.MaxSize - break - } + lookupKey := types.NamespacedName{Namespace: m.namespace, Name: machineDeploymentName} + ng, ok := m.nodeGroups[lookupKey] + if !ok { + return nil, fmt.Errorf("could not find NodeGroup for MachineDeployment %q in the managed nodeGroups", machineDeploymentName) } - - return &MachineDeployment{ - mcmRef, - m, - min, - max, - }, nil + return ng, nil } -// Refresh method, for each machine deployment, will reset the priority of the machines if the number of annotated machines is more than desired. -// It will select the machines to reset the priority based on the descending order of creation timestamp. +// Refresh method for the McmManager that will invoke NodeGroup.Refresh for each node gorup and return collected errors. func (m *McmManager) Refresh() error { - machineDeployments, err := m.machineDeploymentLister.MachineDeployments(m.namespace).List(labels.Everything()) - if err != nil { - klog.Errorf("[Refresh] unable to list machine deployments") - return err - } - var collectiveError error - for _, machineDeployment := range machineDeployments { - // ignore the machine deployment if it is in rolling update - if !isRollingUpdateFinished(machineDeployment) { - klog.Infof("[Refresh] machine deployment %s is under rolling update, skipping", machineDeployment.Name) - continue - } - replicas := machineDeployment.Spec.Replicas - // check if number of annotated machine objects is more than desired and correspondingly reset the priority annotation value if needed. - machines, err := m.getMachinesForMachineDeployment(machineDeployment.Name) - if err != nil { - klog.Errorf("[Refresh] failed to get machines for machine deployment %s, hence skipping it. Err: %v", machineDeployment.Name, err.Error()) - collectiveError = errors.Join(collectiveError, err) - continue - } - var machinesMarkedForDeletion []*v1alpha1.Machine - for _, machine := range machines { - // no need to reset priority for machines already in termination or failed phase - if machine.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating || machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - continue - } - if annotValue, ok := machine.Annotations[machinePriorityAnnotation]; ok && annotValue == priorityValueForCandidateMachines { - machinesMarkedForDeletion = append(machinesMarkedForDeletion, machine) - } - } - if int(replicas) > len(machines)-len(machinesMarkedForDeletion) { - slices.SortStableFunc(machinesMarkedForDeletion, func(m1, m2 *v1alpha1.Machine) int { - return -m1.CreationTimestamp.Compare(m2.CreationTimestamp.Time) - }) - diff := int(replicas) - len(machines) + len(machinesMarkedForDeletion) - targetRefs := make([]*Ref, 0, diff) - for i := 0; i < min(diff, len(machinesMarkedForDeletion)); i++ { - targetRefs = append(targetRefs, &Ref{Name: machinesMarkedForDeletion[i].Name, Namespace: machinesMarkedForDeletion[i].Namespace}) - } - collectiveError = errors.Join(collectiveError, m.resetPriorityForMachines(targetRefs)) - } + var collectiveError []error + for _, ng := range m.nodeGroups { + collectiveError = append(collectiveError, ng.Refresh()) } - return collectiveError + return errors.Join(collectiveError...) } // Cleanup does nothing at the moment. @@ -463,20 +453,19 @@ func (m *McmManager) Cleanup() { return } -// GetMachineDeploymentSize returns the replicas field of the MachineDeployment -func (m *McmManager) GetMachineDeploymentSize(machinedeployment *MachineDeployment) (int64, error) { - md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(machinedeployment.Name) +// GetMachineDeploymentSize returns the replicas field of the MachineDeployment corresponding to the given node group. +func (m *McmManager) GetMachineDeploymentSize(nodeGroupName string) (int64, error) { + md, err := m.GetMachineDeploymentObject(nodeGroupName) if err != nil { - return 0, fmt.Errorf("Unable to fetch MachineDeployment object %s %v", machinedeployment.Name, err) + return 0, err } return int64(md.Spec.Replicas), nil } -// SetMachineDeploymentSize sets the desired size for the Machinedeployment. -func (m *McmManager) SetMachineDeploymentSize(ctx context.Context, machinedeployment *MachineDeployment, size int64) (bool, error) { - md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(machinedeployment.Name) +// SetMachineDeploymentSize sets the desired size for the backing MachineDeployment of the given nodeGroup. +func (m *McmManager) SetMachineDeploymentSize(ctx context.Context, nodeGroup *nodeGroup, size int64) (bool, error) { + md, err := m.GetMachineDeploymentObject(nodeGroup.Name) if err != nil { - klog.Errorf("Unable to fetch MachineDeployment object %s, Error: %v", machinedeployment.Name, err) return true, err } // don't scale down during rolling update, as that could remove ready node with workload @@ -486,100 +475,10 @@ func (m *McmManager) SetMachineDeploymentSize(ctx context.Context, machinedeploy clone := md.DeepCopy() clone.Spec.Replicas = int32(size) - _, err = m.machineClient.MachineDeployments(machinedeployment.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + _, err = m.machineClient.MachineDeployments(nodeGroup.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) return true, err } -// DeleteMachines annotates the target machines and also reduces the desired replicas of the MachineDeployment. -func (m *McmManager) DeleteMachines(targetMachineRefs []*Ref) error { - if len(targetMachineRefs) == 0 { - return nil - } - commonMachineDeployment, err := m.GetMachineDeploymentForMachine(targetMachineRefs[0]) - if err != nil { - return err - } - // get the machine deployment and return if rolling update is not finished - md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(commonMachineDeployment.Name) - if err != nil { - klog.Errorf("Unable to fetch MachineDeployment object %s, Error: %v", commonMachineDeployment.Name, err) - return err - } - if !isRollingUpdateFinished(md) { - return fmt.Errorf("MachineDeployment %s is under rolling update , cannot reduce replica count", commonMachineDeployment.Name) - } - // update priorities of machines to be deleted except the ones already in termination to 1 - scaleDownAmount, err := m.prioritizeMachinesForDeletion(targetMachineRefs) - if err != nil { - return err - } - // Trying to update the machineDeployment till the deadline - err = m.retry(func(ctx context.Context) (bool, error) { - return m.scaleDownMachineDeployment(ctx, commonMachineDeployment.Name, scaleDownAmount) - }, "MachineDeployment", "update", commonMachineDeployment.Name) - if err != nil { - klog.Errorf("unable to scale in machine deployment %s, will reset priority of target machines, Error: %v", commonMachineDeployment.Name, err) - return errors.Join(err, m.resetPriorityForMachines(targetMachineRefs)) - } - return nil -} - -// resetPriorityForMachines resets the priority of machines passed in the argument to defaultPriorityValue -func (m *McmManager) resetPriorityForMachines(mcRefs []*Ref) error { - var collectiveError error - for _, mcRef := range mcRefs { - machine, err := m.machineLister.Machines(m.namespace).Get(mcRef.Name) - if err != nil { - collectiveError = errors.Join(collectiveError, fmt.Errorf("unable to get Machine object %s, Error: %v", mcRef, err)) - continue - } - ctx, cancelFn := context.WithDeadline(context.Background(), time.Now().Add(defaultResetAnnotationTimeout)) - err = func() error { - defer cancelFn() - val, ok := machine.Annotations[machinePriorityAnnotation] - if ok && val != defaultPriorityValue { - _, err = m.updateAnnotationOnMachine(ctx, machine.Name, machinePriorityAnnotation, defaultPriorityValue) - return err - } - return nil - }() - if err != nil { - collectiveError = errors.Join(collectiveError, fmt.Errorf("could not reset priority annotation on machine %s, Error: %v", machine.Name, err)) - continue - } - } - return collectiveError -} - -// prioritizeMachinesForDeletion prioritizes the targeted machines by updating their priority annotation to 1 -func (m *McmManager) prioritizeMachinesForDeletion(targetMachineRefs []*Ref) (int, error) { - var expectedToTerminateMachineNodePairs = make(map[string]string) - for _, machineRef := range targetMachineRefs { - // Trying to update the priority of machineRef till m.maxRetryTimeout - if err := m.retry(func(ctx context.Context) (bool, error) { - mc, err := m.machineLister.Machines(m.namespace).Get(machineRef.Name) - if err != nil { - if kube_errors.IsNotFound(err) { - klog.Warningf("Machine %s not found, skipping prioritizing it for deletion", machineRef.Name) - return false, nil - } - klog.Errorf("Unable to fetch Machine object %s, Error: %v", machineRef.Name, err) - return true, err - } - if isMachineFailedOrTerminating(mc) { - return false, nil - } - expectedToTerminateMachineNodePairs[mc.Name] = mc.Labels["node"] - return m.updateAnnotationOnMachine(ctx, mc.Name, machinePriorityAnnotation, priorityValueForCandidateMachines) - }, "Machine", "update", machineRef.Name); err != nil { - klog.Errorf("could not prioritize machine %s for deletion, aborting scale in of machine deployment, Error: %v", machineRef.Name, err) - return 0, fmt.Errorf("could not prioritize machine %s for deletion, aborting scale in of machine deployment, Error: %v", machineRef.Name, err) - } - } - klog.V(2).Infof("Expected to remove following {machineRef: corresponding node} pairs %s", expectedToTerminateMachineNodePairs) - return len(expectedToTerminateMachineNodePairs), nil -} - // updateAnnotationOnMachine returns error only when updating the annotations on machine has been failing consequently and deadline is crossed func (m *McmManager) updateAnnotationOnMachine(ctx context.Context, mcName string, key, val string) (bool, error) { machine, err := m.machineLister.Machines(m.namespace).Get(mcName) @@ -592,16 +491,10 @@ func (m *McmManager) updateAnnotationOnMachine(ctx context.Context, mcName strin return true, err } clone := machine.DeepCopy() - if clone.Annotations != nil { - if clone.Annotations[key] == val { - klog.Infof("Machine %q priority is already set to 1, hence skipping the update", machine.Name) - return false, nil - } - clone.Annotations[key] = val - } else { + if clone.Annotations == nil { clone.Annotations = make(map[string]string) - clone.Annotations[key] = val } + clone.Annotations[key] = val _, err = m.machineClient.Machines(machine.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) if err == nil { klog.Infof("Machine %s marked with priority %s successfully", mcName, val) @@ -609,28 +502,51 @@ func (m *McmManager) updateAnnotationOnMachine(ctx context.Context, mcName strin return true, err } -// scaleDownMachineDeployment scales down the machine deployment by the provided scaleDownAmount and returns the updated spec.Replicas after scale down. -func (m *McmManager) scaleDownMachineDeployment(ctx context.Context, mdName string, scaleDownAmount int) (bool, error) { - md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(mdName) +// scaleDownMachineDeployment scales down the MachineDeployment for given name by the length of toDeleteMachineNames after removing machine names that +// are already marked for deletion in the machineutils.TriggerDeletionByMCM of the MachineDeployment. +// It then updates the machineutils.TriggerDeletionByMCM annotation with revised toBeDeletedMachineNames along with the replica count as a atomic operation. +// NOTE: Callers MUST take the NodeGroup scalingMutex before invoking this method. +func (m *McmManager) scaleDownMachineDeployment(ctx context.Context, mdName string, toBeDeletedMachineInfos []machineInfo) (bool, error) { + md, err := m.GetMachineDeploymentObject(mdName) if err != nil { - klog.Errorf("Unable to fetch MachineDeployment object %s, Error: %v", mdName, err) return true, err } - mdclone := md.DeepCopy() - expectedReplicas := mdclone.Spec.Replicas - int32(scaleDownAmount) - if expectedReplicas == mdclone.Spec.Replicas { - klog.Infof("MachineDeployment %q is already set to %d, skipping the update", mdclone.Name, expectedReplicas) + + numDeletionCandidates := len(toBeDeletedMachineInfos) + toBeDeletedMachineNames := make([]string, 0, numDeletionCandidates) + + for _, mInfo := range toBeDeletedMachineInfos { + toBeDeletedMachineNames = append(toBeDeletedMachineNames, mInfo.Key.Name) + } + + data := computeScaleDownData(md, toBeDeletedMachineNames) + if data.RevisedScaledownAmount == 0 { + klog.V(3).Infof("Skipping scaledown since MachineDeployment %q has already marked %v for deletion by MCM, skipping the scale-down", md.Name, toBeDeletedMachineNames) return false, nil - } else if expectedReplicas < 0 { - klog.Errorf("Cannot delete machines in machine deployment %s, expected decrease in replicas %d is more than current replicas %d", mdName, scaleDownAmount, mdclone.Spec.Replicas) - return false, fmt.Errorf("cannot delete machines in machine deployment %s, expected decrease in replicas %d is more than current replicas %d", mdName, scaleDownAmount, mdclone.Spec.Replicas) } - mdclone.Spec.Replicas = expectedReplicas - _, err = m.machineClient.MachineDeployments(mdclone.Namespace).Update(ctx, mdclone, metav1.UpdateOptions{}) + + if data.RevisedMachineDeployment == nil { + klog.V(3).Infof("Skipping scaledown for MachineDeployment %q for toBeDeletedMachineNames: %v", md.Name, toBeDeletedMachineNames) + return false, nil + } + updatedMd, err := m.machineClient.MachineDeployments(data.RevisedMachineDeployment.Namespace).Update(ctx, data.RevisedMachineDeployment, metav1.UpdateOptions{}) + if err != nil { + return true, err + } + klog.V(2).Infof("MachineDeployment %q size decreased from %d to %d, TriggerDeletionByMCM Annotation Value: %q", md.Name, md.Spec.Replicas, updatedMd.Spec.Replicas, updatedMd.Annotations[machineutils.TriggerDeletionByMCM]) + + toBeCordonedNodeNames := make([]string, 0, len(data.RevisedToBeDeletedMachineNames)) + for _, mInfo := range toBeDeletedMachineInfos { + if data.RevisedToBeDeletedMachineNames.Has(mInfo.Key.Name) { + toBeCordonedNodeNames = append(toBeCordonedNodeNames, mInfo.NodeName) + klog.V(2).Infof("For MachineDeployment %q, will cordon node: %q corresponding to machine %q", md.Name, mInfo.NodeName, mInfo.Key.Name) + } + } + err = m.cordonNodes(toBeCordonedNodeNames) if err != nil { - return true, fmt.Errorf("unable to scale in machine deployment %s, Error: %w", mdName, err) + // Do not return error as cordoning is best-effort + klog.Warningf("NodeGroup.deleteMachines() of %q ran into error cordoning nodes: %v", md.Name, err) } - klog.V(2).Infof("MachineDeployment %s size decreased to %d ", mdclone.Name, mdclone.Spec.Replicas) return false, nil } @@ -659,10 +575,10 @@ func (m *McmManager) retry(fn func(ctx context.Context) (bool, error), resourceT } } -// GetInstancesForMachineDeployment returns list of cloudprovider.Instance for machines which belongs to the MachineDeployment. -func (m *McmManager) GetInstancesForMachineDeployment(machinedeployment *MachineDeployment) ([]cloudprovider.Instance, error) { +// GetInstancesForMachineDeployment returns list of cloudprovider.Instance for machines with the given nodeGroupName. +func (m *McmManager) GetInstancesForMachineDeployment(nodeGroupName string) ([]cloudprovider.Instance, error) { var ( - list = []string{machinedeployment.Name} + list = []string{nodeGroupName} selector = labels.NewSelector() req, _ = labels.NewRequirement("name", selection.Equals, list) ) @@ -670,7 +586,7 @@ func (m *McmManager) GetInstancesForMachineDeployment(machinedeployment *Machine selector = selector.Add(*req) machineList, err := m.machineLister.Machines(m.namespace).List(selector) if err != nil { - return nil, fmt.Errorf("unable to fetch list of Machine objects %v for machinedeployment %q", err, machinedeployment.Name) + return nil, fmt.Errorf("unable to fetch list of Machine objects %v for MachineDeployment %q", err, nodeGroupName) } nodeList, err := m.nodeLister.List(labels.Everything()) @@ -756,21 +672,19 @@ func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error { // GetMachineDeploymentAnnotations returns the annotations present on the machine deployment for the provided machine deployment name func (m *McmManager) GetMachineDeploymentAnnotations(machineDeploymentName string) (map[string]string, error) { - md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(machineDeploymentName) + md, err := m.GetMachineDeploymentObject(machineDeploymentName) if err != nil { - return nil, fmt.Errorf("unable to fetch MachineDeployment object %s, Error: %v", machineDeploymentName, err) + return nil, err } - return md.Annotations, nil } -// GetMachineDeploymentNodeTemplate returns the NodeTemplate of a node belonging to the same worker pool as the machinedeployment +// GetMachineDeploymentNodeTemplate returns the NodeTemplate of a node belonging to the same worker pool as the MachineDeployment // If no node present then it forms the nodeTemplate using the one present in machineClass -func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *MachineDeployment) (*nodeTemplate, error) { - - md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(machinedeployment.Name) +func (m *McmManager) GetMachineDeploymentNodeTemplate(nodeGroupName string) (*nodeTemplate, error) { + md, err := m.GetMachineDeploymentObject(nodeGroupName) if err != nil { - return nil, fmt.Errorf("unable to fetch MachineDeployment object %s, Error: %v", machinedeployment.Name, err) + return nil, err } var ( @@ -846,12 +760,12 @@ func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *Machine var providerSpec *awsapis.AWSProviderSpec err = json.Unmarshal(mc.ProviderSpec.Raw, &providerSpec) if err != nil { - return nil, fmt.Errorf("Unable to convert from %s to %s for %s, Error: %v", kindMachineClass, providerAWS, machinedeployment.Name, err) + return nil, fmt.Errorf("unable to convert from %s to %s for %s, Error: %v", kindMachineClass, providerAWS, nodeGroupName, err) } awsInstance, exists := AWSInstanceTypes[providerSpec.MachineType] if !exists { - return nil, fmt.Errorf("Unable to fetch details for VM type %s", providerSpec.MachineType) + return nil, fmt.Errorf("unable to fetch details for VM type %s", providerSpec.MachineType) } instance = instanceType{ InstanceType: awsInstance.InstanceType, @@ -868,11 +782,11 @@ func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *Machine var providerSpec *azureapis.AzureProviderSpec err = json.Unmarshal(mc.ProviderSpec.Raw, &providerSpec) if err != nil { - return nil, fmt.Errorf("Unable to convert from %s to %s for %s, Error: %v", kindMachineClass, providerAzure, machinedeployment.Name, err) + return nil, fmt.Errorf("unable to convert from %s to %s for %s, Error: %v", kindMachineClass, providerAzure, nodeGroupName, err) } azureInstance, exists := AzureInstanceTypes[providerSpec.Properties.HardwareProfile.VMSize] if !exists { - return nil, fmt.Errorf("Unable to fetch details for VM type %s", providerSpec.Properties.HardwareProfile.VMSize) + return nil, fmt.Errorf("unable to fetch details for VM type %s", providerSpec.Properties.HardwareProfile.VMSize) } instance = instanceType{ InstanceType: azureInstance.InstanceType, @@ -916,6 +830,16 @@ func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *Machine return nodeTmpl, nil } +// GetMachineDeploymentObject returns the MachineDeployment object for the provided machine deployment name +func (m *McmManager) GetMachineDeploymentObject(mdName string) (*v1alpha1.MachineDeployment, error) { + md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(mdName) + if err != nil { + klog.Errorf("unable to fetch MachineDeployment object %q, Error: %v", mdName, err) + return nil, fmt.Errorf("unable to fetch MachineDeployment object %q, Error: %v", mdName, err) + } + return md, nil +} + func isRollingUpdateFinished(md *v1alpha1.MachineDeployment) bool { for _, cond := range md.Status.Conditions { switch { @@ -945,10 +869,18 @@ func filterOutNodes(nodes []*v1.Node, instanceType string) []*v1.Node { filteredNodes = append(filteredNodes, node) } } - return filteredNodes } +func filterMachinesMatchingNames(machines []*v1alpha1.Machine, matchingNames sets.Set[string]) (filteredMachines []*v1alpha1.Machine) { + for _, m := range machines { + if matchingNames.Has(m.Name) { + filteredMachines = append(filteredMachines, m) + } + } + return +} + func getInstanceTypeForNode(node *v1.Node) string { var instanceTypeLabelValue string if node.Labels != nil { @@ -1032,6 +964,79 @@ func (m *McmManager) buildNodeFromTemplate(name string, template *nodeTemplate) return &node, nil } +func (m *McmManager) cordonNodes(nodeNames []string) error { + if len(nodeNames) == 0 { + return nil + } + ctx, cancelFn := context.WithDeadline(context.Background(), time.Now().Add(m.maxRetryTimeout)) + defer cancelFn() + var errs []error + for _, nodeName := range nodeNames { + node, err := m.nodeLister.Get(nodeName) + if err != nil { + errs = append(errs, err) + continue + } + if node.Spec.Unschedulable { + klog.V(4).Infof("Node %q is already cordoned", nodeName) + continue + } + if eligibility.HasNoScaleDownAnnotation(node) { + klog.V(4).Infof("Node %q is marked with ScaleDownDisabledAnnotation %q", nodeName, eligibility.ScaleDownDisabledKey) + continue + } + adjustNode := node.DeepCopy() + adjustNode.Spec.Unschedulable = true + _, err = m.nodeInterface.Update(ctx, adjustNode, metav1.UpdateOptions{}) + if err != nil { + errs = append(errs, fmt.Errorf("failed to cordon Node %q: %w", nodeName, err)) + } + klog.V(3).Infof("Node %q has been cordoned successfully", nodeName) + } + if len(errs) > 0 { + return utilerrors.NewAggregate(errs) + } + return nil +} + +// getMachineInfo extracts the machine Key from the given node's providerID if found and checks whether it is failed or terminating and returns the MachineInfo or an error +func (m *McmManager) getMachineInfo(node *apiv1.Node) (*machineInfo, error) { + machines, err := m.machineLister.Machines(m.namespace).List(labels.Everything()) + if err != nil { + return nil, fmt.Errorf("cannot list machines in namespace %q due to: %s", m.namespace, err) + } + + providerID := node.Spec.ProviderID + var machineName, machineNamespace string + var isFailedOrTerminating bool + for _, machine := range machines { + machineID := strings.Split(machine.Spec.ProviderID, "/") + nodeID := strings.Split(node.Spec.ProviderID, "/") + // If registered, the ID will match the cloudprovider instance ID. + // If unregistered, the ID will match the machine name. + if machineID[len(machineID)-1] == nodeID[len(nodeID)-1] || + nodeID[len(nodeID)-1] == machine.Name { + machineName = machine.Name + machineNamespace = machine.Namespace + isFailedOrTerminating = isMachineFailedOrTerminating(machine) + break + } + } + + if machineName == "" { + klog.V(3).Infof("No Machine found for node providerID %q", providerID) + return nil, nil + } + return &machineInfo{ + Key: types.NamespacedName{ + Name: machineName, + Namespace: machineNamespace, + }, + NodeName: node.Name, + FailedOrTerminating: isFailedOrTerminating, + }, nil +} + func buildGenericLabels(template *nodeTemplate, nodeName string) map[string]string { result := make(map[string]string) // TODO: extract from MCM @@ -1058,10 +1063,34 @@ func buildGenericLabels(template *nodeTemplate, nodeName string) map[string]stri return result } +func buildNodeGroupFromSpec(value string, mcmManager *McmManager) (*nodeGroup, error) { + spec, err := dynamic.SpecFromString(value, true) + if err != nil { + return nil, fmt.Errorf("failed to parse node group spec: %v", err) + } + s := strings.Split(spec.Name, ".") + Namespace, Name := s[0], s[1] + ng := buildNodeGroup(mcmManager, spec.MinSize, spec.MaxSize, Namespace, Name) + return ng, nil +} + +func buildNodeGroup(mcmManager *McmManager, minSize int, maxSize int, namespace string, name string) *nodeGroup { + return &nodeGroup{ + mcmManager: mcmManager, + minSize: minSize, + maxSize: maxSize, + scalingMutex: sync.Mutex{}, + NamespacedName: types.NamespacedName{ + Name: name, + Namespace: namespace, + }, + } +} + // isMachineFailedOrTerminating returns true if machine is already being terminated or considered for termination by autoscaler. +// TODO: Move to MCM machineutils.IsMachineFailedOrTerminating after MCM release. func isMachineFailedOrTerminating(machine *v1alpha1.Machine) bool { if !machine.GetDeletionTimestamp().IsZero() || machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - klog.Infof("Machine %q is already being terminated or in a failed phase, and hence skipping the deletion", machine.Name) return true } return false @@ -1075,3 +1104,36 @@ func filterExtendedResources(allResources v1.ResourceList) (extendedResources v1 }) return } + +// computeScaleDownData computes fresh scaleDownData for the given input MachineDeployment and the machineNamesForDeletion. +// The output scaleDownData encapsulates the scale-down amount and an updated, non-nil MachineDeployment. +func computeScaleDownData(md *v1alpha1.MachineDeployment, machineNamesForDeletion []string) (data scaleDownData) { + forDeletionSet := sets.New(machineNamesForDeletion...) + alreadyMarkedSet := sets.New(getMachineNamesTriggeredForDeletion(md)...) + + uniqueForDeletionSet := forDeletionSet.Difference(alreadyMarkedSet) + toBeMarkedSet := alreadyMarkedSet.Union(forDeletionSet) + + data.RevisedToBeDeletedMachineNames = uniqueForDeletionSet + data.RevisedScaledownAmount = uniqueForDeletionSet.Len() + data.RevisedMachineDeployment = nil + + expectedReplicas := md.Spec.Replicas - int32(data.RevisedScaledownAmount) + if expectedReplicas == md.Spec.Replicas { + klog.Infof("MachineDeployment %q is already set to %d, no need to scale-down", md.Name, expectedReplicas) + } else if expectedReplicas < 0 { + klog.Errorf("Cannot delete machines in MachineDeployment %q, expected decrease in replicas: %d is more than current replicas: %d", md.Name, data.RevisedScaledownAmount, md.Spec.Replicas) + } else { + mdCopy := md.DeepCopy() + if mdCopy.Annotations == nil { + mdCopy.Annotations = make(map[string]string) + } + triggerDeletionAnnotValue := createMachinesTriggeredForDeletionAnnotValue(toBeMarkedSet.UnsortedList()) + if mdCopy.Annotations[machineutils.TriggerDeletionByMCM] != triggerDeletionAnnotValue { + mdCopy.Annotations[machineutils.TriggerDeletionByMCM] = triggerDeletionAnnotValue + } + mdCopy.Spec.Replicas = expectedReplicas + data.RevisedMachineDeployment = mdCopy + } + return +} diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go index eff9df3d7ce7..3e436443da8d 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go @@ -19,6 +19,8 @@ package mcm import ( "errors" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" "k8s.io/utils/ptr" "maps" @@ -222,6 +224,87 @@ func TestFilterExtendedResources(t *testing.T) { assert.Equal(t, customResources, extendedResources) } +func TestComputeScaledownData(t *testing.T) { + t.Run("simple", func(t *testing.T) { + initialReplicas := int32(2) + md := newMachineDeployments(1, initialReplicas, nil, nil, nil)[0] + md.Annotations = map[string]string{} + + machineNamesForDeletion := []string{"n1"} + data := computeScaleDownData(md, machineNamesForDeletion) + assert.Equal(t, createMachinesTriggeredForDeletionAnnotValue(machineNamesForDeletion), data.RevisedMachineDeployment.Annotations[machineutils.TriggerDeletionByMCM]) + assert.Equal(t, len(machineNamesForDeletion), data.RevisedScaledownAmount) + assert.Equal(t, int32(2-len(machineNamesForDeletion)), data.RevisedMachineDeployment.Spec.Replicas) + }) + + t.Run("single-duplicate", func(t *testing.T) { + initialReplicas := 2 + md := newMachineDeployments(1, int32(initialReplicas), nil, nil, nil)[0] + md.Annotations = map[string]string{} + + machineNamesForDeletion := []string{"n1"} + data := computeScaleDownData(md, machineNamesForDeletion) + assert.Equal(t, createMachinesTriggeredForDeletionAnnotValue(machineNamesForDeletion), data.RevisedMachineDeployment.Annotations[machineutils.TriggerDeletionByMCM]) + assert.Equal(t, len(machineNamesForDeletion), data.RevisedScaledownAmount) + + expectedReplicas := int32(initialReplicas - len(machineNamesForDeletion)) + assert.Equal(t, expectedReplicas, data.RevisedMachineDeployment.Spec.Replicas) + + md = data.RevisedMachineDeployment + // repeating computeScaleDownData for same machineNamesForDeletion should have 0 RevisedScaledownAmount, empty RevisedToBeDeletedMachineNames, and nil RevisedMachineDeployment + data = computeScaleDownData(md, machineNamesForDeletion) + assert.Equal(t, 0, data.RevisedScaledownAmount) + assert.Empty(t, data.RevisedToBeDeletedMachineNames) + assert.Nil(t, data.RevisedMachineDeployment) + + }) + + t.Run("multi-duplicates", func(t *testing.T) { + initialReplicas := 3 + md := newMachineDeployments(1, int32(initialReplicas), nil, nil, nil)[0] + md.Annotations = map[string]string{} + + machineNamesForDeletion := []string{"n1", "n2"} + data := computeScaleDownData(md, machineNamesForDeletion) + assert.Equal(t, createMachinesTriggeredForDeletionAnnotValue(machineNamesForDeletion), data.RevisedMachineDeployment.Annotations[machineutils.TriggerDeletionByMCM]) + assert.Equal(t, len(machineNamesForDeletion), data.RevisedScaledownAmount) + expectedReplicas := int32(initialReplicas - len(machineNamesForDeletion)) + assert.Equal(t, expectedReplicas, data.RevisedMachineDeployment.Spec.Replicas) + + md = data.RevisedMachineDeployment + // repeating computeScaleDownData for same machineNamesForDeletion should have 0 RevisedScaledownAmount, empty RevisedToBeDeletedMachineNames, and nil RevisedMachineDeployment + data = computeScaleDownData(md, machineNamesForDeletion) + assert.Equal(t, 0, data.RevisedScaledownAmount) + assert.Empty(t, data.RevisedToBeDeletedMachineNames) + assert.Nil(t, data.RevisedMachineDeployment) + + }) + + t.Run("overlapping", func(t *testing.T) { + initialReplicas := 5 + md := newMachineDeployments(1, int32(initialReplicas), nil, nil, nil)[0] + md.Annotations = map[string]string{} + + machineNamesForDeletion := sets.New("n1", "n2") + data := computeScaleDownData(md, machineNamesForDeletion.UnsortedList()) + assert.Equal(t, createMachinesTriggeredForDeletionAnnotValue(machineNamesForDeletion.UnsortedList()), data.RevisedMachineDeployment.Annotations[machineutils.TriggerDeletionByMCM]) + assert.Equal(t, len(machineNamesForDeletion), data.RevisedScaledownAmount) + expectedReplicas := int32(initialReplicas - len(machineNamesForDeletion)) + assert.Equal(t, expectedReplicas, data.RevisedMachineDeployment.Spec.Replicas) + + newMachineNamesForDeletion := sets.New("n2", "n3", "n4") + md = data.RevisedMachineDeployment + data = computeScaleDownData(md, newMachineNamesForDeletion.UnsortedList()) + assert.NotNil(t, data.RevisedMachineDeployment) + uniqueMachinesNamesForDeletion := newMachineNamesForDeletion.Difference(machineNamesForDeletion) + assert.Equal(t, uniqueMachinesNamesForDeletion.Len(), data.RevisedScaledownAmount) + assert.Equal(t, uniqueMachinesNamesForDeletion, data.RevisedToBeDeletedMachineNames) + expectedReplicas = int32(initialReplicas - machineNamesForDeletion.Union(newMachineNamesForDeletion).Len()) + assert.Equal(t, expectedReplicas, data.RevisedMachineDeployment.Spec.Replicas) + + }) +} + func createSampleInstanceType(instanceTypeName string, customResourceName apiv1.ResourceName, customResourceQuantity resource.Quantity) *instanceType { awsM5Large := AWSInstanceTypes[instanceTypeName] extendedResources := make(apiv1.ResourceList) diff --git a/cluster-autoscaler/cloudprovider/mcm/test_utils.go b/cluster-autoscaler/cloudprovider/mcm/test_utils.go index 3c3f55e4c696..58c884625990 100644 --- a/cluster-autoscaler/cloudprovider/mcm/test_utils.go +++ b/cluster-autoscaler/cloudprovider/mcm/test_utils.go @@ -7,6 +7,7 @@ package mcm import ( "fmt" appsv1 "k8s.io/api/apps/v1" + types "k8s.io/apimachinery/pkg/types" "k8s.io/utils/pointer" "testing" "time" @@ -24,7 +25,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" customfake "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/mcm/fakeclient" - deletetaint "k8s.io/autoscaler/cluster-autoscaler/utils/taints" appsv1informers "k8s.io/client-go/informers" coreinformers "k8s.io/client-go/informers" ) @@ -42,7 +42,7 @@ func newMachineDeployments( labels map[string]string, ) []*v1alpha1.MachineDeployment { machineDeployments := make([]*v1alpha1.MachineDeployment, machineDeploymentCount) - for i := range machineDeployments { + for i := 0; i < machineDeploymentCount; i++ { machineDeployment := &v1alpha1.MachineDeployment{ TypeMeta: metav1.TypeMeta{ APIVersion: "machine.sapcloud.io", @@ -74,7 +74,7 @@ func newMachineSets( ) []*v1alpha1.MachineSet { machineSets := make([]*v1alpha1.MachineSet, machineSetCount) - for i := range machineSets { + for i := 0; i < machineSetCount; i++ { ms := &v1alpha1.MachineSet{ TypeMeta: metav1.TypeMeta{ APIVersion: "machine.sapcloud.io", @@ -97,10 +97,9 @@ func newMachine( statusTemplate *v1alpha1.MachineStatus, mdName, msName string, priorityAnnotationValue string, - setDeletionTimeStamp, setNodeLabel bool, ) *v1alpha1.Machine { - m := newMachines(1, providerId, statusTemplate, mdName, msName, []string{priorityAnnotationValue}, []bool{setDeletionTimeStamp})[0] + m := newMachines(1, providerId, statusTemplate, mdName, msName, []string{priorityAnnotationValue})[0] m.Name = name m.Spec.ProviderID = providerId if !setNodeLabel { @@ -109,32 +108,39 @@ func newMachine( return m } +func generateNames(prefix string, count int) []string { + names := make([]string, count) + for i := 0; i < count; i++ { + names[i] = fmt.Sprintf("%s-%d", prefix, i+1) + } + return names +} + func newMachines( machineCount int, providerIdGenerateName string, statusTemplate *v1alpha1.MachineStatus, mdName, msName string, priorityAnnotationValues []string, - setDeletionTimeStamp []bool, ) []*v1alpha1.Machine { machines := make([]*v1alpha1.Machine, machineCount) - + machineNames := generateNames("machine", machineCount) + nodeNames := generateNames("node", machineCount) currentTime := metav1.Now() - for i := range machines { + for i := 0; i < machineCount; i++ { m := &v1alpha1.Machine{ TypeMeta: metav1.TypeMeta{ APIVersion: "machine.sapcloud.io", Kind: "Machine", }, ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("machine-%d", i+1), + Name: machineNames[i], Namespace: testNamespace, OwnerReferences: []metav1.OwnerReference{ {Name: msName}, }, Labels: map[string]string{machineDeploymentNameLabel: mdName}, - Annotations: map[string]string{machinePriorityAnnotation: priorityAnnotationValues[i]}, CreationTimestamp: metav1.Now(), }, } @@ -143,12 +149,12 @@ func newMachines( m.Spec = v1alpha1.MachineSpec{ProviderID: fmt.Sprintf("%s/i%d", providerIdGenerateName, i+1)} } - m.Labels["node"] = fmt.Sprintf("node-%d", i+1) - if setDeletionTimeStamp[i] { - m.ObjectMeta.DeletionTimestamp = ¤tTime - } + m.Labels["node"] = nodeNames[i] if statusTemplate != nil { m.Status = *newMachineStatus(statusTemplate) + if m.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating { + m.DeletionTimestamp = ¤tTime + } } machines[i] = m } @@ -158,9 +164,8 @@ func newMachines( func newNode( nodeName, providerId string, - addToBeDeletedTaint bool, ) *corev1.Node { - node := newNodes(1, providerId, []bool{addToBeDeletedTaint})[0] + node := newNodes(1, providerId)[0] clone := node.DeepCopy() clone.Name = nodeName clone.Spec.ProviderID = providerId @@ -170,30 +175,20 @@ func newNode( func newNodes( nodeCount int, providerIdGenerateName string, - addToBeDeletedTaint []bool, ) []*corev1.Node { - nodes := make([]*corev1.Node, nodeCount) - for i := range nodes { - var taints []corev1.Taint - if addToBeDeletedTaint[i] { - taints = append(taints, corev1.Taint{ - Key: deletetaint.ToBeDeletedTaint, - Value: testTaintValue, - Effect: corev1.TaintEffectNoSchedule, - }) - } + nodeNames := generateNames("node", nodeCount) + for i := 0; i < nodeCount; i++ { node := &corev1.Node{ TypeMeta: metav1.TypeMeta{ APIVersion: "appsv1", Kind: "Node", }, ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("node-%d", i+1), + Name: nodeNames[i], }, Spec: corev1.NodeSpec{ ProviderID: fmt.Sprintf("%s/i%d", providerIdGenerateName, i+1), - Taints: taints, }, } @@ -287,6 +282,7 @@ func createMcmManager( discoveryOpts: cloudprovider.NodeGroupDiscoveryOptions{ NodeGroupSpecs: nodeGroups, }, + nodeGroups: make(map[types.NamespacedName]*nodeGroup), deploymentLister: appsControlSharedInformers.Deployments().Lister(), machineClient: fakeTypedMachineClient, machineDeploymentLister: machineDeployments.Lister(), @@ -294,10 +290,11 @@ func createMcmManager( machineLister: machines.Lister(), machineClassLister: machineClasses.Lister(), nodeLister: nodes.Lister(), + nodeInterface: fakeTargetCoreClient.CoreV1().Nodes(), maxRetryTimeout: 5 * time.Second, retryInterval: 1 * time.Second, } - + g.Expect(mcmManager.generateMachineDeploymentMap()).To(gomega.Succeed()) hasSyncedCachesFns := []cache.InformerSynced{ nodes.Informer().HasSynced, machines.Informer().HasSynced, diff --git a/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils/utils.go b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils/utils.go new file mode 100644 index 000000000000..6be7c4cb5419 --- /dev/null +++ b/cluster-autoscaler/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils/utils.go @@ -0,0 +1,85 @@ +// SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +// Package machineutils contains the consts and global vaariables for machine operation +package machineutils + +import ( + "time" + + v1 "k8s.io/api/core/v1" +) + +const ( + // GetVMStatus sets machine status to terminating and specifies next step as getting VMs + GetVMStatus = "Set machine status to termination. Now, getting VM Status" + + // InstanceInitialization is a step that represents initialization of a VM instance (post-creation). + InstanceInitialization = "Initialize VM Instance" + + // InitiateDrain specifies next step as initiate node drain + InitiateDrain = "Initiate node drain" + + // DelVolumesAttachments specifies next step as deleting volume attachments + DelVolumesAttachments = "Delete Volume Attachments" + + // InitiateVMDeletion specifies next step as initiate VM deletion + InitiateVMDeletion = "Initiate VM deletion" + + // InitiateNodeDeletion specifies next step as node object deletion + InitiateNodeDeletion = "Initiate node object deletion" + + // InitiateFinalizerRemoval specifies next step as machine finalizer removal + InitiateFinalizerRemoval = "Initiate machine object finalizer removal" + + // LastAppliedALTAnnotation contains the last configuration of annotations, labels & taints applied on the node object + LastAppliedALTAnnotation = "node.machine.sapcloud.io/last-applied-anno-labels-taints" + + // MachinePriority is the annotation used to specify priority + // associated with a machine while deleting it. The less its + // priority the more likely it is to be deleted first + // Default priority for a machine is set to 3 + MachinePriority = "machinepriority.machine.sapcloud.io" + + // MachineClassKind is used to identify the machineClassKind for generic machineClasses + MachineClassKind = "MachineClass" + + // NotManagedByMCM annotation helps in identifying the nodes which are not handled by MCM + NotManagedByMCM = "node.machine.sapcloud.io/not-managed-by-mcm" + + // TriggerDeletionByMCM annotation on the node would trigger the deletion of the corresponding machine object in the control cluster + TriggerDeletionByMCM = "node.machine.sapcloud.io/trigger-deletion-by-mcm" + + // NodeUnhealthy is a node termination reason for failed machines + NodeUnhealthy = "Unhealthy" + + // NodeScaledDown is a node termination reason for healthy deleted machines + NodeScaledDown = "ScaleDown" + + // NodeTerminationCondition describes nodes that are terminating + NodeTerminationCondition v1.NodeConditionType = "Terminating" + + // TaintNodeCriticalComponentsNotReady is the name of a gardener taint + // indicating that a node is not yet ready to have user workload scheduled + TaintNodeCriticalComponentsNotReady = "node.gardener.cloud/critical-components-not-ready" +) + +// RetryPeriod is an alias for specifying the retry period +type RetryPeriod time.Duration + +// These are the valid values for RetryPeriod +const ( + // ConflictRetry tells the controller to retry quickly - 200 milliseconds + ConflictRetry RetryPeriod = RetryPeriod(200 * time.Millisecond) + // ShortRetry tells the controller to retry after a short duration - 15 seconds + ShortRetry RetryPeriod = RetryPeriod(5 * time.Second) + // MediumRetry tells the controller to retry after a medium duration - 2 minutes + MediumRetry RetryPeriod = RetryPeriod(3 * time.Minute) + // LongRetry tells the controller to retry after a long duration - 10 minutes + LongRetry RetryPeriod = RetryPeriod(10 * time.Minute) +) + +// EssentialTaints are taints on node object which if added/removed, require an immediate reconcile by machine controller +// TODO: update this when taints for ALT updation and PostCreate operations is introduced. +var EssentialTaints = []string{TaintNodeCriticalComponentsNotReady} diff --git a/cluster-autoscaler/vendor/modules.txt b/cluster-autoscaler/vendor/modules.txt index 2b8c030a1ad3..ed951d75775e 100644 --- a/cluster-autoscaler/vendor/modules.txt +++ b/cluster-autoscaler/vendor/modules.txt @@ -252,6 +252,7 @@ github.com/gardener/machine-controller-manager/pkg/client/informers/externalvers github.com/gardener/machine-controller-manager/pkg/client/listers/machine/v1alpha1 github.com/gardener/machine-controller-manager/pkg/util/provider/cache github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes +github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils # github.com/gardener/machine-controller-manager-provider-aws v0.20.0 ## explicit; go 1.21 github.com/gardener/machine-controller-manager-provider-aws/pkg/aws/apis