diff --git a/pkg/descheduler/apis/config/types_loadaware.go b/pkg/descheduler/apis/config/types_loadaware.go index 96f4f7b27..8b0bb7d1f 100644 --- a/pkg/descheduler/apis/config/types_loadaware.go +++ b/pkg/descheduler/apis/config/types_loadaware.go @@ -37,6 +37,7 @@ type LowNodeLoadArgs struct { // NumberOfNodes can be configured to activate the strategy only when the number of under utilized nodes are above the configured value. // This could be helpful in large clusters where a few nodes could go under utilized frequently or for a short period of time. + // This parameter includes the sum of nodes with low node utilization, low prod utilization, and both. // By default, NumberOfNodes is set to zero. NumberOfNodes int32 @@ -65,12 +66,18 @@ type LowNodeLoadArgs struct { // A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization). UseDeviationThresholds bool - // HighThresholds defines the target usage threshold of resources + // HighThresholds defines the target usage threshold of node resources HighThresholds ResourceThresholds - // LowThresholds defines the low usage threshold of resources + // LowThresholds defines the low usage threshold of node resources LowThresholds ResourceThresholds + // ProdHighThresholds defines the target usage threshold of Prod resources + ProdHighThresholds ResourceThresholds + + // ProdLowThresholds defines the low usage threshold of Prod resources + ProdLowThresholds ResourceThresholds + // ResourceWeights indicates the weights of resources. // The weights of resources are both 1 by default. ResourceWeights map[corev1.ResourceName]int64 @@ -97,12 +104,18 @@ type LowNodeLoadNodePool struct { // A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization). UseDeviationThresholds bool - // HighThresholds defines the target usage threshold of resources + // HighThresholds defines the target usage threshold of node resources HighThresholds ResourceThresholds - // LowThresholds defines the low usage threshold of resources + // LowThresholds defines the low usage threshold of node resources LowThresholds ResourceThresholds + // ProdHighThresholds defines the target usage threshold of Prod resources + ProdHighThresholds ResourceThresholds `json:"prodHighThresholds,omitempty"` + + // ProdLowThresholds defines the low usage threshold of Prod resources + ProdLowThresholds ResourceThresholds `json:"prodLowThresholds,omitempty"` + // ResourceWeights indicates the weights of resources. // The weights of resources are both 1 by default. ResourceWeights map[corev1.ResourceName]int64 diff --git a/pkg/descheduler/apis/config/v1alpha2/conversion_plugins.go b/pkg/descheduler/apis/config/v1alpha2/conversion_plugins.go index a24fc7003..0ab913d82 100644 --- a/pkg/descheduler/apis/config/v1alpha2/conversion_plugins.go +++ b/pkg/descheduler/apis/config/v1alpha2/conversion_plugins.go @@ -33,6 +33,8 @@ func Convert_v1alpha2_LowNodeLoadArgs_To_config_LowNodeLoadArgs(in *LowNodeLoadA UseDeviationThresholds: out.UseDeviationThresholds, HighThresholds: out.HighThresholds, LowThresholds: out.LowThresholds, + ProdHighThresholds: out.ProdHighThresholds, + ProdLowThresholds: out.ProdLowThresholds, ResourceWeights: out.ResourceWeights, AnomalyCondition: out.AnomalyCondition, } diff --git a/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go b/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go index 8dae93feb..6d00980e5 100644 --- a/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go +++ b/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go @@ -36,6 +36,7 @@ type LowNodeLoadArgs struct { // NumberOfNodes can be configured to activate the strategy only when the number of under utilized nodes are above the configured value. // This could be helpful in large clusters where a few nodes could go under utilized frequently or for a short period of time. + // This parameter includes the sum of nodes with low node utilization, low prod utilization, and both. // By default, NumberOfNodes is set to zero. NumberOfNodes *int32 `json:"numberOfNodes,omitempty"` @@ -64,12 +65,18 @@ type LowNodeLoadArgs struct { // A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization). UseDeviationThresholds *bool `json:"useDeviationThresholds,omitempty"` - // HighThresholds defines the target usage threshold of resources + // HighThresholds defines the target usage threshold of node resources HighThresholds ResourceThresholds `json:"highThresholds,omitempty"` - // LowThresholds defines the low usage threshold of resources + // LowThresholds defines the low usage threshold of node resources LowThresholds ResourceThresholds `json:"lowThresholds,omitempty"` + // ProdHighThresholds defines the target usage threshold of Prod resources + ProdHighThresholds ResourceThresholds `json:"prodHighThresholds,omitempty"` + + // ProdLowThresholds defines the low usage threshold of Prod resources + ProdLowThresholds ResourceThresholds `json:"prodLowThresholds,omitempty"` + // ResourceWeights indicates the weights of resources. // The weights of CPU and Memory are both 1 by default. ResourceWeights map[corev1.ResourceName]int64 `json:"resourceWeights,omitempty"` @@ -96,12 +103,18 @@ type LowNodeLoadNodePool struct { // A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization). UseDeviationThresholds bool `json:"useDeviationThresholds,omitempty"` - // HighThresholds defines the target usage threshold of resources + // HighThresholds defines the target usage threshold of node resources HighThresholds ResourceThresholds `json:"highThresholds,omitempty"` - // LowThresholds defines the low usage threshold of resources + // LowThresholds defines the low usage threshold of node resources LowThresholds ResourceThresholds `json:"lowThresholds,omitempty"` + // ProdHighThresholds defines the target usage threshold of Prod resources + ProdHighThresholds ResourceThresholds `json:"prodHighThresholds,omitempty"` + + // ProdLowThresholds defines the low usage threshold of Prod resources + ProdLowThresholds ResourceThresholds `json:"prodLowThresholds,omitempty"` + // ResourceWeights indicates the weights of resources. // The weights of resources are both 1 by default. ResourceWeights map[corev1.ResourceName]int64 `json:"resourceWeights,omitempty"` diff --git a/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go b/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go index 55130cef1..36e82704c 100644 --- a/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go +++ b/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go @@ -381,6 +381,8 @@ func autoConvert_v1alpha2_LowNodeLoadArgs_To_config_LowNodeLoadArgs(in *LowNodeL } out.HighThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.HighThresholds)) out.LowThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.LowThresholds)) + out.ProdHighThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.ProdHighThresholds)) + out.ProdLowThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.ProdLowThresholds)) out.ResourceWeights = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ResourceWeights)) if in.AnomalyCondition != nil { in, out := &in.AnomalyCondition, &out.AnomalyCondition @@ -428,6 +430,8 @@ func autoConvert_config_LowNodeLoadArgs_To_v1alpha2_LowNodeLoadArgs(in *config.L } out.HighThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.HighThresholds)) out.LowThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.LowThresholds)) + out.ProdHighThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.ProdHighThresholds)) + out.ProdLowThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.ProdLowThresholds)) out.ResourceWeights = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ResourceWeights)) if in.AnomalyCondition != nil { in, out := &in.AnomalyCondition, &out.AnomalyCondition @@ -464,6 +468,8 @@ func autoConvert_v1alpha2_LowNodeLoadNodePool_To_config_LowNodeLoadNodePool(in * out.UseDeviationThresholds = in.UseDeviationThresholds out.HighThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.HighThresholds)) out.LowThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.LowThresholds)) + out.ProdHighThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.ProdHighThresholds)) + out.ProdLowThresholds = *(*config.ResourceThresholds)(unsafe.Pointer(&in.ProdLowThresholds)) out.ResourceWeights = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ResourceWeights)) if in.AnomalyCondition != nil { in, out := &in.AnomalyCondition, &out.AnomalyCondition @@ -488,6 +494,8 @@ func autoConvert_config_LowNodeLoadNodePool_To_v1alpha2_LowNodeLoadNodePool(in * out.UseDeviationThresholds = in.UseDeviationThresholds out.HighThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.HighThresholds)) out.LowThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.LowThresholds)) + out.ProdHighThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.ProdHighThresholds)) + out.ProdLowThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.ProdLowThresholds)) out.ResourceWeights = *(*map[corev1.ResourceName]int64)(unsafe.Pointer(&in.ResourceWeights)) if in.AnomalyCondition != nil { in, out := &in.AnomalyCondition, &out.AnomalyCondition diff --git a/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go b/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go index 883eb2433..da77a5659 100644 --- a/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go @@ -230,6 +230,20 @@ func (in *LowNodeLoadArgs) DeepCopyInto(out *LowNodeLoadArgs) { (*out)[key] = val } } + if in.ProdHighThresholds != nil { + in, out := &in.ProdHighThresholds, &out.ProdHighThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.ProdLowThresholds != nil { + in, out := &in.ProdLowThresholds, &out.ProdLowThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } if in.ResourceWeights != nil { in, out := &in.ResourceWeights, &out.ResourceWeights *out = make(map[corev1.ResourceName]int64, len(*in)) @@ -297,6 +311,20 @@ func (in *LowNodeLoadNodePool) DeepCopyInto(out *LowNodeLoadNodePool) { (*out)[key] = val } } + if in.ProdHighThresholds != nil { + in, out := &in.ProdHighThresholds, &out.ProdHighThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.ProdLowThresholds != nil { + in, out := &in.ProdLowThresholds, &out.ProdLowThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } if in.ResourceWeights != nil { in, out := &in.ResourceWeights, &out.ResourceWeights *out = make(map[corev1.ResourceName]int64, len(*in)) diff --git a/pkg/descheduler/apis/config/validation/validation_loadaware.go b/pkg/descheduler/apis/config/validation/validation_loadaware.go index dd4af65a8..5c5e29613 100644 --- a/pkg/descheduler/apis/config/validation/validation_loadaware.go +++ b/pkg/descheduler/apis/config/validation/validation_loadaware.go @@ -68,6 +68,23 @@ func ValidateLowLoadUtilizationArgs(path *field.Path, args *deschedulerconfig.Lo } } + for resourceName, percentage := range nodePool.ProdHighThresholds { + if percentage < 0 { + allErrs = append(allErrs, field.Invalid(nodePoolPath.Child("ProdHighThresholds").Key(string(resourceName)), percentage, "percentage must be greater than or equal to 0")) + } + if nodeHighPercentage, ok := nodePool.HighThresholds[resourceName]; ok && percentage > nodeHighPercentage { + allErrs = append(allErrs, field.Invalid(nodePoolPath.Child("ProdHighThresholds").Key(string(resourceName)), percentage, "node percentage must be greater than or equal to prodHighThresholds")) + } + } + for resourceName, percentage := range nodePool.ProdLowThresholds { + if percentage < 0 { + allErrs = append(allErrs, field.Invalid(nodePoolPath.Child("ProdLowThresholds").Key(string(resourceName)), percentage, "percentage must be greater than or equal to 0")) + } + if highProdPercentage, ok := nodePool.ProdHighThresholds[resourceName]; ok && percentage > highProdPercentage { + allErrs = append(allErrs, field.Invalid(nodePoolPath.Child("ProdLowThresholds").Key(string(resourceName)), percentage, "low percentage must be less than or equal to prodHighThresholds")) + } + } + if nodePool.AnomalyCondition.ConsecutiveAbnormalities <= 0 { fieldPath := nodePoolPath.Child("anomalyDetectionThresholds").Child("consecutiveAbnormalities") allErrs = append(allErrs, field.Invalid(fieldPath, nodePool.AnomalyCondition.ConsecutiveAbnormalities, "consecutiveAbnormalities must be greater than 0")) diff --git a/pkg/descheduler/apis/config/zz_generated.deepcopy.go b/pkg/descheduler/apis/config/zz_generated.deepcopy.go index 468257a0c..a0068213c 100644 --- a/pkg/descheduler/apis/config/zz_generated.deepcopy.go +++ b/pkg/descheduler/apis/config/zz_generated.deepcopy.go @@ -206,6 +206,20 @@ func (in *LowNodeLoadArgs) DeepCopyInto(out *LowNodeLoadArgs) { (*out)[key] = val } } + if in.ProdHighThresholds != nil { + in, out := &in.ProdHighThresholds, &out.ProdHighThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.ProdLowThresholds != nil { + in, out := &in.ProdLowThresholds, &out.ProdLowThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } if in.ResourceWeights != nil { in, out := &in.ResourceWeights, &out.ResourceWeights *out = make(map[corev1.ResourceName]int64, len(*in)) @@ -273,6 +287,20 @@ func (in *LowNodeLoadNodePool) DeepCopyInto(out *LowNodeLoadNodePool) { (*out)[key] = val } } + if in.ProdHighThresholds != nil { + in, out := &in.ProdHighThresholds, &out.ProdHighThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.ProdLowThresholds != nil { + in, out := &in.ProdLowThresholds, &out.ProdLowThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } if in.ResourceWeights != nil { in, out := &in.ResourceWeights, &out.ResourceWeights *out = make(map[corev1.ResourceName]int64, len(*in)) diff --git a/pkg/descheduler/framework/plugins/loadaware/low_node_load.go b/pkg/descheduler/framework/plugins/loadaware/low_node_load.go index 7356362da..c999815c2 100644 --- a/pkg/descheduler/framework/plugins/loadaware/low_node_load.go +++ b/pkg/descheduler/framework/plugins/loadaware/low_node_load.go @@ -56,6 +56,7 @@ type LowNodeLoad struct { nodeMetricLister koordslolisters.NodeMetricLister args *deschedulerconfig.LowNodeLoadArgs nodeAnomalyDetectors *gocache.Cache + prodAnomalyDetectors *gocache.Cache } // NewLowNodeLoad builds plugin from its arguments while passing a handle @@ -107,6 +108,7 @@ func NewLowNodeLoad(args runtime.Object, handle framework.Handle) (framework.Plu koordSharedInformerFactory.WaitForCacheSync(context.TODO().Done()) nodeAnomalyDetectors := gocache.New(loadLoadUtilizationArgs.DetectorCacheTimeout.Duration, loadLoadUtilizationArgs.DetectorCacheTimeout.Duration) + prodAnomalyDetectors := gocache.New(loadLoadUtilizationArgs.DetectorCacheTimeout.Duration, loadLoadUtilizationArgs.DetectorCacheTimeout.Duration) return &LowNodeLoad{ handle: handle, @@ -114,6 +116,7 @@ func NewLowNodeLoad(args runtime.Object, handle framework.Handle) (framework.Plu args: loadLoadUtilizationArgs, podFilter: podFilter, nodeAnomalyDetectors: nodeAnomalyDetectors, + prodAnomalyDetectors: prodAnomalyDetectors, }, nil } @@ -161,51 +164,69 @@ func (pl *LowNodeLoad) processOneNodePool(ctx context.Context, nodePool *desched return nil } - lowThresholds, highThresholds := newThresholds(nodePool.UseDeviationThresholds, nodePool.LowThresholds, nodePool.HighThresholds) + lowThresholds, highThresholds, prodLowThresholds, prodHighThresholds := newThresholds(nodePool.UseDeviationThresholds, nodePool.LowThresholds, nodePool.HighThresholds, nodePool.ProdLowThresholds, nodePool.ProdHighThresholds) resourceNames := getResourceNames(lowThresholds) nodeUsages := getNodeUsage(nodes, resourceNames, pl.nodeMetricLister, pl.handle.GetPodsAssignedToNodeFunc(), pl.args.NodeMetricExpirationSeconds) - nodeThresholds := getNodeThresholds(nodeUsages, lowThresholds, highThresholds, resourceNames, nodePool.UseDeviationThresholds) - lowNodes, sourceNodes := classifyNodes(nodeUsages, nodeThresholds, lowThresholdFilter, highThresholdFilter) + nodeThresholds := getNodeThresholds(nodeUsages, lowThresholds, highThresholds, prodLowThresholds, prodHighThresholds, resourceNames, nodePool.UseDeviationThresholds) + lowNodes, sourceNodes, prodLowNodes, prodHighNodes, bothLowNodes := classifyNodes(nodeUsages, nodeThresholds, lowThresholdFilter, highThresholdFilter, prodLowThresholdFilter, prodHighThresholdFilter) - logUtilizationCriteria(nodePool.Name, "Criteria for nodes under low thresholds and above high thresholds", lowThresholds, highThresholds, len(lowNodes), len(sourceNodes), len(nodes)) + logUtilizationCriteria(nodePool.Name, "Criteria for nodes under low thresholds and above high thresholds", lowThresholds, highThresholds, + prodLowThresholds, prodHighThresholds, len(lowNodes), len(sourceNodes), len(prodLowNodes), len(prodHighNodes), len(bothLowNodes), len(nodes)) - if len(sourceNodes) == 0 { + if len(sourceNodes) == 0 && len(prodHighNodes) == 0 { klog.V(4).InfoS("All nodes are under target utilization, nothing to do here", "nodePool", nodePool.Name) return nil } abnormalNodes := filterRealAbnormalNodes(sourceNodes, pl.nodeAnomalyDetectors, nodePool.AnomalyCondition) - if len(abnormalNodes) == 0 { + abnormalProdNodes := filterRealAbnormalNodes(prodHighNodes, pl.prodAnomalyDetectors, nodePool.AnomalyCondition) + if len(abnormalNodes) == 0 && len(abnormalProdNodes) == 0 { klog.V(4).InfoS("None of the nodes were detected as anomalous, nothing to do here", "nodePool", nodePool.Name) return nil } - if len(lowNodes) == 0 { + if len(lowNodes) == 0 && len(prodLowNodes) == 0 && len(bothLowNodes) == 0 { klog.V(4).InfoS("No nodes are underutilized, nothing to do here, you might tune your thresholds further", "nodePool", nodePool.Name) return nil } resetNodesAsNormal(lowNodes, pl.nodeAnomalyDetectors) + resetNodesAsNormal(prodLowNodes, pl.prodAnomalyDetectors) + resetNodesAsNormal(bothLowNodes, pl.nodeAnomalyDetectors) - if len(lowNodes) <= int(pl.args.NumberOfNodes) { + allLowNodes := len(lowNodes) + len(prodLowNodes) + len(bothLowNodes) + if allLowNodes <= int(pl.args.NumberOfNodes) { klog.V(4).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", - "underutilizedNodes", len(lowNodes), "numberOfNodes", pl.args.NumberOfNodes, "nodePool", nodePool.Name) + "underutilizedNodes", allLowNodes, "numberOfNodes", pl.args.NumberOfNodes, "nodePool", nodePool.Name) return nil } - if len(lowNodes) == len(nodes) { + if allLowNodes == len(nodes) { klog.V(4).InfoS("All nodes are underutilized, nothing to do here", "nodePool", nodePool.Name) return nil } - continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsages map[corev1.ResourceName]*resource.Quantity) bool { - if _, overutilized := isNodeOverutilized(nodeInfo.NodeUsage.usage, nodeInfo.thresholds.highResourceThreshold); !overutilized { - resetNodesAsNormal([]NodeInfo{nodeInfo}, pl.nodeAnomalyDetectors) + continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsages map[corev1.ResourceName]*resource.Quantity, prod bool) bool { + var usage, thresholds map[corev1.ResourceName]*resource.Quantity + if prod { + usage = nodeInfo.NodeUsage.prodUsage + thresholds = nodeInfo.thresholds.prodHighResourceThreshold + } else { + usage = nodeInfo.NodeUsage.usage + thresholds = nodeInfo.thresholds.highResourceThreshold + } + if _, overutilized := isNodeOverutilized(usage, thresholds); !overutilized { + if prod { + resetNodesAsNormal([]NodeInfo{nodeInfo}, pl.prodAnomalyDetectors) + } else { + resetNodesAsNormal([]NodeInfo{nodeInfo}, pl.nodeAnomalyDetectors) + } return false } for _, resourceName := range resourceNames { if quantity, ok := totalAvailableUsages[resourceName]; ok { if quantity.CmpInt64(0) < 1 { + klog.V(4).InfoS("available usage is too low.", "resourceName", resourceName, "prod", prod) return false } } @@ -213,13 +234,17 @@ func (pl *LowNodeLoad) processOneNodePool(ctx context.Context, nodePool *desched return true } - sortNodesByUsage(abnormalNodes, nodePool.ResourceWeights, false) + sortNodesByUsage(abnormalNodes, nodePool.ResourceWeights, false, false) + sortNodesByUsage(abnormalProdNodes, nodePool.ResourceWeights, false, true) evictPodsFromSourceNodes( ctx, nodePool.Name, abnormalNodes, lowNodes, + abnormalProdNodes, + prodLowNodes, + bothLowNodes, pl.args.DryRun, pl.args.NodeFit, nodePool.ResourceWeights, @@ -228,9 +253,10 @@ func (pl *LowNodeLoad) processOneNodePool(ctx context.Context, nodePool *desched pl.handle.GetPodsAssignedToNodeFunc(), resourceNames, continueEvictionCond, - overUtilizedEvictionReason(highThresholds), + overUtilizedEvictionReason(highThresholds, prodHighThresholds), ) tryMarkNodesAsNormal(abnormalNodes, pl.nodeAnomalyDetectors) + tryMarkNodesAsNormal(abnormalProdNodes, pl.prodAnomalyDetectors) for _, v := range sourceNodes { processedNodes.Insert(v.node.Name) } @@ -283,11 +309,13 @@ func filterRealAbnormalNodes(sourceNodes []NodeInfo, nodeAnomalyDetectors *gocac return abnormalNodes } -func newThresholds(useDeviationThresholds bool, low, high deschedulerconfig.ResourceThresholds) (thresholds, highThresholds deschedulerconfig.ResourceThresholds) { +func newThresholds(useDeviationThresholds bool, low, high, lowProd, highProd deschedulerconfig.ResourceThresholds) (thresholds, highThresholds, prodThreshold, highProdThreshold deschedulerconfig.ResourceThresholds) { thresholds = low highThresholds = high + prodThreshold = lowProd + highProdThreshold = highProd resourceNames := getResourceNames(thresholds) - resourceNames = append(resourceNames, getResourceNames(highThresholds)...) + resourceNames = append(append(append(resourceNames, getResourceNames(highThresholds)...), getResourceNames(prodThreshold)...), getResourceNames(highProdThreshold)...) resourceNames = append(resourceNames, corev1.ResourceMemory) if thresholds == nil { @@ -296,6 +324,12 @@ func newThresholds(useDeviationThresholds bool, low, high deschedulerconfig.Reso if highThresholds == nil { highThresholds = make(deschedulerconfig.ResourceThresholds) } + if prodThreshold == nil { + prodThreshold = make(deschedulerconfig.ResourceThresholds) + } + if highProdThreshold == nil { + highProdThreshold = make(deschedulerconfig.ResourceThresholds) + } for _, resourceName := range resourceNames { if _, ok := thresholds[resourceName]; !ok { @@ -307,9 +341,18 @@ func newThresholds(useDeviationThresholds bool, low, high deschedulerconfig.Reso highThresholds[resourceName] = MaxResourcePercentage } } + if _, ok := prodThreshold[resourceName]; !ok { + if useDeviationThresholds { + prodThreshold[resourceName] = MinResourcePercentage + highProdThreshold[resourceName] = MinResourcePercentage + } else { + prodThreshold[resourceName] = MaxResourcePercentage + highProdThreshold[resourceName] = MaxResourcePercentage + } + } } - return thresholds, highThresholds + return thresholds, highThresholds, prodThreshold, highProdThreshold } func lowThresholdFilter(usage *NodeUsage, threshold NodeThresholds) bool { @@ -320,11 +363,24 @@ func lowThresholdFilter(usage *NodeUsage, threshold NodeThresholds) bool { return isNodeUnderutilized(usage.usage, threshold.lowResourceThreshold) } +func prodLowThresholdFilter(usage *NodeUsage, threshold NodeThresholds) bool { + if nodeutil.IsNodeUnschedulable(usage.node) { + klog.V(4).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(usage.node)) + return false + } + return isNodeUnderutilized(usage.prodUsage, threshold.prodLowResourceThreshold) +} + func highThresholdFilter(usage *NodeUsage, threshold NodeThresholds) bool { _, overutilized := isNodeOverutilized(usage.usage, threshold.highResourceThreshold) return overutilized } +func prodHighThresholdFilter(usage *NodeUsage, threshold NodeThresholds) bool { + _, overutilized := isNodeOverutilized(usage.prodUsage, threshold.prodHighResourceThreshold) + return overutilized +} + func filterNodes(nodeSelector *metav1.LabelSelector, nodes []*corev1.Node, processedNodes sets.String) ([]*corev1.Node, error) { if nodeSelector == nil { return nodes, nil @@ -370,32 +426,52 @@ func filterPods(podSelectors []deschedulerconfig.LowNodeLoadPodSelector) (framew }, nil } -func logUtilizationCriteria(nodePoolName, message string, lowThresholds, highThresholds deschedulerconfig.ResourceThresholds, totalLowNodesNumber, totalHighNodesNumber, totalNumber int) { +func logUtilizationCriteria(nodePoolName, message string, lowThresholds, highThresholds, prodLowThresholds, prodHighThresholds deschedulerconfig.ResourceThresholds, + totalLowNodesNumber, totalHighNodesNumber, prodLowNodesNumber, prodHighNodesNumber, bothLowNodesNumber, totalNumber int) { utilizationCriteria := []interface{}{ "nodePool", nodePoolName, "nodesUnderLowThresholds", totalLowNodesNumber, "nodesAboveHighThresholds", totalHighNodesNumber, - "nodesAppropriately", totalNumber - totalLowNodesNumber - totalHighNodesNumber, + "prodNodesUnderLowThresholds", prodLowNodesNumber, + "prodNodesAboveHighThresholds", prodHighNodesNumber, + "bothProdNodesLowThresholds", bothLowNodesNumber, + "nodesAppropriately", totalNumber - totalLowNodesNumber - totalHighNodesNumber - prodLowNodesNumber - prodHighNodesNumber - bothLowNodesNumber, "totalNumberOfNodes", totalNumber, } for name := range lowThresholds { - utilizationCriteria = append(utilizationCriteria, string(name), fmt.Sprintf("%d%%(low)-%d%%(high)", int64(lowThresholds[name]), int64(highThresholds[name]))) + utilizationCriteria = append(utilizationCriteria, string(name), fmt.Sprintf("%d%%(lowNode)-%d%%(highNode),%d%%(prodLow)-%d%%(prodHigh)", + int64(lowThresholds[name]), int64(highThresholds[name]), int64(prodLowThresholds[name]), int64(prodHighThresholds[name]))) } klog.InfoS(message, utilizationCriteria...) } -func overUtilizedEvictionReason(highThresholds deschedulerconfig.ResourceThresholds) evictionReasonGeneratorFn { +func overUtilizedEvictionReason(highThresholds, prodHighThresholds deschedulerconfig.ResourceThresholds) evictionReasonGeneratorFn { resourceNames := getResourceNames(highThresholds) sort.Slice(resourceNames, func(i, j int) bool { return resourceNames[i] < resourceNames[j] }) - return func(nodeInfo NodeInfo) string { - overutilizedResources, _ := isNodeOverutilized(nodeInfo.usage, nodeInfo.thresholds.highResourceThreshold) - usagePercentages := resourceUsagePercentages(nodeInfo.NodeUsage) + return func(nodeInfo NodeInfo, prod bool) string { + var usage, thresholds map[corev1.ResourceName]*resource.Quantity + var thresholdsPercent deschedulerconfig.ResourceThresholds + var reason string + if prod { + usage = nodeInfo.prodUsage + thresholds = nodeInfo.thresholds.prodHighResourceThreshold + thresholdsPercent = prodHighThresholds + reason = "prod" + } else { + usage = nodeInfo.usage + thresholds = nodeInfo.thresholds.highResourceThreshold + thresholdsPercent = highThresholds + reason = "node" + } + + overutilizedResources, _ := isNodeOverutilized(usage, thresholds) + usagePercentages := resourceUsagePercentages(nodeInfo.NodeUsage, prod) var infos []string for _, resourceName := range resourceNames { if _, ok := overutilizedResources[resourceName]; ok { - infos = append(infos, fmt.Sprintf("%s usage(%.2f%%)>threshold(%.2f%%)", resourceName, usagePercentages[resourceName], highThresholds[resourceName])) + infos = append(infos, fmt.Sprintf("%s %s usage(%.2f%%)>threshold(%.2f%%)", reason, resourceName, usagePercentages[resourceName], thresholdsPercent[resourceName])) } } return fmt.Sprintf("node is overutilized, %s", strings.Join(infos, ", ")) diff --git a/pkg/descheduler/framework/plugins/loadaware/low_node_load_test.go b/pkg/descheduler/framework/plugins/loadaware/low_node_load_test.go index 247985f75..7612b75af 100644 --- a/pkg/descheduler/framework/plugins/loadaware/low_node_load_test.go +++ b/pkg/descheduler/framework/plugins/loadaware/low_node_load_test.go @@ -174,6 +174,8 @@ func TestLowNodeLoad(t *testing.T) { name string useDeviationThresholds bool thresholds, targetThresholds ResourceThresholds + prodLowThresholds ResourceThresholds + prodHighThresholds ResourceThresholds nodes []*corev1.Node pods []*corev1.Pod podMetrics map[types.NamespacedName]*slov1alpha1.ResourceMap @@ -406,7 +408,7 @@ func TestLowNodeLoad(t *testing.T) { expectedPodsEvicted: 2, }, { - name: "without priorities stop when cpu capacity is depleted", + name: "without priorities stop when memory capacity is depleted", thresholds: ResourceThresholds{ corev1.ResourceCPU: 30, corev1.ResourcePods: 30, @@ -453,7 +455,7 @@ func TestLowNodeLoad(t *testing.T) { }), test.BuildTestPod("p9", 400, 2100, n2NodeName, test.SetRSOwnerRef), }, - // 4 pods available for eviction based on corev1.ResourcePods, only 3 pods can be evicted before cpu is depleted + // 4 pods available for eviction based on corev1.ResourcePods, only 3 pods can be evicted before memory is depleted expectedPodsEvicted: 3, }, { @@ -957,6 +959,285 @@ func TestLowNodeLoad(t *testing.T) { expectedPodsEvicted: 2, evictedPods: []string{}, }, + { + name: "node && prod thresholds, node under thresholds, but prod over thresholds", + thresholds: ResourceThresholds{ + corev1.ResourceCPU: 30, + corev1.ResourcePods: 30, + }, + targetThresholds: ResourceThresholds{ + corev1.ResourceCPU: 90, + corev1.ResourcePods: 90, + }, + prodLowThresholds: ResourceThresholds{ + corev1.ResourceCPU: 20, + corev1.ResourcePods: 20, + }, + prodHighThresholds: ResourceThresholds{ + corev1.ResourceCPU: 40, + corev1.ResourcePods: 40, + }, + nodes: []*corev1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + pods: []*corev1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *corev1.Pod) { + test.SetRSOwnerRef(pod) + if pod.Labels == nil { + labels := make(map[string]string) + pod.Labels = labels + pod.Labels["koordinator.sh/priority-class"] = "koord-batch" + } + }), + test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *corev1.Pod) { + test.SetRSOwnerRef(pod) + if pod.Labels == nil { + labels := make(map[string]string) + pod.Labels = labels + pod.Labels["koordinator.sh/priority-class"] = "koord-batch" + } + }), + test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *corev1.Pod) { + test.SetRSOwnerRef(pod) + if pod.Labels == nil { + labels := make(map[string]string) + pod.Labels = labels + pod.Labels["koordinator.sh/priority-class"] = "koord-batch" + } + }), + // These won't be evicted. + test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + pod.Spec.Volumes = []corev1.Volume{ + { + Name: "sample", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI), + }, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A Critical Pod. + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), + }, + expectedPodsEvicted: 2, + evictedPods: []string{}, + }, + { + name: "both node and prod usage is higher than thresholds, only node usage will be evicted", + thresholds: ResourceThresholds{ + corev1.ResourceCPU: 30, + corev1.ResourcePods: 30, + }, + targetThresholds: ResourceThresholds{ + corev1.ResourceCPU: 70, + corev1.ResourcePods: 80, + }, + prodLowThresholds: ResourceThresholds{ + corev1.ResourceCPU: 20, + corev1.ResourcePods: 20, + }, + prodHighThresholds: ResourceThresholds{ + corev1.ResourceCPU: 40, + corev1.ResourcePods: 40, + }, + nodes: []*corev1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + pods: []*corev1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *corev1.Pod) { + test.SetRSOwnerRef(pod) + if pod.Labels == nil { + labels := make(map[string]string) + pod.Labels = labels + pod.Labels["koordinator.sh/priority-class"] = "koord-batch" + } + }), + test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *corev1.Pod) { + test.SetRSOwnerRef(pod) + if pod.Labels == nil { + labels := make(map[string]string) + pod.Labels = labels + pod.Labels["koordinator.sh/priority-class"] = "koord-batch" + } + }), + // These won't be evicted. + test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + pod.Spec.Volumes = []corev1.Volume{ + { + Name: "sample", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI), + }, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A Critical Pod. + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), + }, + expectedPodsEvicted: 1, + evictedPods: []string{}, + }, + { + name: "support reschedule prod pod only", + thresholds: ResourceThresholds{ + corev1.ResourceCPU: 0, + corev1.ResourcePods: 0, + }, + targetThresholds: ResourceThresholds{ + corev1.ResourceCPU: 100, + corev1.ResourcePods: 100, + }, + prodLowThresholds: ResourceThresholds{ + corev1.ResourceCPU: 30, + corev1.ResourcePods: 30, + }, + prodHighThresholds: ResourceThresholds{ + corev1.ResourceCPU: 50, + corev1.ResourcePods: 50, + }, + nodes: []*corev1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + pods: []*corev1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef), + // These won't be evicted. + test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + pod.Spec.Volumes = []corev1.Volume{ + { + Name: "sample", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI), + }, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A Critical Pod. + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), + }, + expectedPodsEvicted: 4, + evictedPods: []string{}, + }, + { + name: "both low resource used by high node, prod pods don`t have enough resource to rebalance", + thresholds: ResourceThresholds{ + corev1.ResourceCPU: 30, + corev1.ResourcePods: 50, + }, + targetThresholds: ResourceThresholds{ + corev1.ResourceCPU: 60, + corev1.ResourcePods: 80, + }, + prodLowThresholds: ResourceThresholds{ + corev1.ResourceCPU: 20, + corev1.ResourcePods: 30, + }, + prodHighThresholds: ResourceThresholds{ + corev1.ResourceCPU: 30, + corev1.ResourcePods: 50, + }, + nodes: []*corev1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 20, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 20, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 20, nil), + }, + pods: []*corev1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p16", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p17", 400, 0, n1NodeName, test.SetRSOwnerRef), + // These won't be evicted. + test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + pod.Spec.Volumes = []corev1.Volume{ + { + Name: "sample", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI), + }, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *corev1.Pod) { + // A Critical Pod. + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p10", 400, 0, n3NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p11", 400, 0, n3NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p12", 400, 0, n3NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p13", 400, 0, n3NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p14", 400, 0, n3NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p15", 400, 0, n3NodeName, test.SetRSOwnerRef), + }, + expectedPodsEvicted: 5, + evictedPods: []string{}, + }, } for _, tt := range testCases { @@ -1189,6 +1470,8 @@ func TestMaxEvictionTotal(t *testing.T) { { LowThresholds: tt.thresholds, HighThresholds: tt.targetThresholds, + ProdLowThresholds: tt.prodLowThresholds, + ProdHighThresholds: tt.prodHighThresholds, UseDeviationThresholds: tt.useDeviationThresholds, AnomalyCondition: &deschedulerconfig.LoadAnomalyCondition{ ConsecutiveAbnormalities: 1, @@ -1224,8 +1507,11 @@ func TestOverUtilizedEvictionReason(t *testing.T) { tests := []struct { name string targetThresholds ResourceThresholds + prodThresholds ResourceThresholds + prod bool node *corev1.Node usage map[corev1.ResourceName]*resource.Quantity + prodUsage map[corev1.ResourceName]*resource.Quantity want string }{ { @@ -1249,7 +1535,7 @@ func TestOverUtilizedEvictionReason(t *testing.T) { corev1.ResourceCPU: resource.NewMilliQuantity(64*1000, resource.DecimalSI), corev1.ResourceMemory: resource.NewQuantity(32*1024*1024*1024, resource.BinarySI), }, - want: "node is overutilized, cpu usage(66.67%)>threshold(50.00%)", + want: "node is overutilized, node cpu usage(66.67%)>threshold(50.00%)", }, { name: "both cpu and memory overutilized", @@ -1272,25 +1558,91 @@ func TestOverUtilizedEvictionReason(t *testing.T) { corev1.ResourceCPU: resource.NewMilliQuantity(64*1000, resource.DecimalSI), corev1.ResourceMemory: resource.NewQuantity(400*1024*1024*1024, resource.BinarySI), }, - want: "node is overutilized, cpu usage(66.67%)>threshold(50.00%), memory usage(78.12%)>threshold(50.00%)", + want: "node is overutilized, node cpu usage(66.67%)>threshold(50.00%), node memory usage(78.12%)>threshold(50.00%)", + }, + { + name: "prod cpu overutilized", + targetThresholds: deschedulerconfig.ResourceThresholds{ + corev1.ResourceCPU: 50, + corev1.ResourceMemory: 50, + }, + prodThresholds: deschedulerconfig.ResourceThresholds{ + corev1.ResourceCPU: 40, + corev1.ResourceMemory: 40, + }, + prod: true, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + }, + Status: corev1.NodeStatus{ + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("96"), + corev1.ResourceMemory: resource.MustParse("512Gi"), + }, + }, + }, + usage: map[corev1.ResourceName]*resource.Quantity{ + corev1.ResourceCPU: resource.NewMilliQuantity(46*1000, resource.DecimalSI), + corev1.ResourceMemory: resource.NewQuantity(250*1024*1024*1024, resource.BinarySI), + }, + prodUsage: map[corev1.ResourceName]*resource.Quantity{ + corev1.ResourceCPU: resource.NewMilliQuantity(45*1000, resource.DecimalSI), + corev1.ResourceMemory: resource.NewQuantity(200*1024*1024*1024, resource.BinarySI), + }, + want: "node is overutilized, prod cpu usage(46.88%)>threshold(40.00%)", + }, + { + name: "prod cpu && memory overutilized", + targetThresholds: deschedulerconfig.ResourceThresholds{ + corev1.ResourceCPU: 50, + corev1.ResourceMemory: 50, + }, + prodThresholds: deschedulerconfig.ResourceThresholds{ + corev1.ResourceCPU: 40, + corev1.ResourceMemory: 40, + }, + prod: true, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + }, + Status: corev1.NodeStatus{ + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("96"), + corev1.ResourceMemory: resource.MustParse("512Gi"), + }, + }, + }, + usage: map[corev1.ResourceName]*resource.Quantity{ + corev1.ResourceCPU: resource.NewMilliQuantity(46*1000, resource.DecimalSI), + corev1.ResourceMemory: resource.NewQuantity(250*1024*1024*1024, resource.BinarySI), + }, + prodUsage: map[corev1.ResourceName]*resource.Quantity{ + corev1.ResourceCPU: resource.NewMilliQuantity(45*1000, resource.DecimalSI), + corev1.ResourceMemory: resource.NewQuantity(250*1024*1024*1024, resource.BinarySI), + }, + want: "node is overutilized, prod cpu usage(46.88%)>threshold(40.00%), prod memory usage(48.83%)>threshold(40.00%)", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { nodeUsage := &NodeUsage{ - node: tt.node, - usage: tt.usage, + node: tt.node, + usage: tt.usage, + prodUsage: tt.prodUsage, } resourceNames := getResourceNames(tt.targetThresholds) - nodeThresholds := getNodeThresholds(map[string]*NodeUsage{"test-node": nodeUsage}, nil, tt.targetThresholds, resourceNames, false) + nodeThresholds := getNodeThresholds(map[string]*NodeUsage{"test-node": nodeUsage}, nil, tt.targetThresholds, + nil, tt.prodThresholds, resourceNames, false) - evictionReasonGenerator := overUtilizedEvictionReason(tt.targetThresholds) + evictionReasonGenerator := overUtilizedEvictionReason(tt.targetThresholds, tt.prodThresholds) got := evictionReasonGenerator(NodeInfo{ NodeUsage: nodeUsage, thresholds: nodeThresholds["test-node"], - }) + }, tt.prod) assert.Equal(t, tt.want, got) }) } diff --git a/pkg/descheduler/framework/plugins/loadaware/utilization_util.go b/pkg/descheduler/framework/plugins/loadaware/utilization_util.go index de6e8e4cd..c844e3f14 100644 --- a/pkg/descheduler/framework/plugins/loadaware/utilization_util.go +++ b/pkg/descheduler/framework/plugins/loadaware/utilization_util.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" + "github.com/koordinator-sh/koordinator/apis/extension" slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" slolisters "github.com/koordinator-sh/koordinator/pkg/client/listers/slo/v1alpha1" deschedulerconfig "github.com/koordinator-sh/koordinator/pkg/descheduler/apis/config" @@ -43,13 +44,17 @@ type ResourceThresholds = deschedulerconfig.ResourceThresholds type NodeUsage struct { node *corev1.Node allPods []*corev1.Pod + prodPods []*corev1.Pod usage map[corev1.ResourceName]*resource.Quantity + prodUsage map[corev1.ResourceName]*resource.Quantity podMetrics map[types.NamespacedName]*slov1alpha1.ResourceMap } type NodeThresholds struct { - lowResourceThreshold map[corev1.ResourceName]*resource.Quantity - highResourceThreshold map[corev1.ResourceName]*resource.Quantity + lowResourceThreshold map[corev1.ResourceName]*resource.Quantity + highResourceThreshold map[corev1.ResourceName]*resource.Quantity + prodLowResourceThreshold map[corev1.ResourceName]*resource.Quantity + prodHighResourceThreshold map[corev1.ResourceName]*resource.Quantity } type NodeInfo struct { @@ -57,9 +62,9 @@ type NodeInfo struct { thresholds NodeThresholds } -type continueEvictionCond func(nodeInfo NodeInfo, totalAvailableUsages map[corev1.ResourceName]*resource.Quantity) bool +type continueEvictionCond func(nodeInfo NodeInfo, totalAvailableUsages map[corev1.ResourceName]*resource.Quantity, prod bool) bool -type evictionReasonGeneratorFn func(nodeInfo NodeInfo) string +type evictionReasonGeneratorFn func(nodeInfo NodeInfo, prod bool) string const ( MinResourcePercentage = 0 @@ -78,20 +83,22 @@ func normalizePercentage(percent Percentage) Percentage { func getNodeThresholds( nodeUsages map[string]*NodeUsage, - lowThreshold, highThreshold ResourceThresholds, + lowThreshold, highThreshold, prodLowThreshold, prodHighThreshold ResourceThresholds, resourceNames []corev1.ResourceName, useDeviationThresholds bool, ) map[string]NodeThresholds { - var averageResourceUsagePercent ResourceThresholds + var averageResourceUsagePercent, prodAverageResourceUsagePercent ResourceThresholds if useDeviationThresholds { - averageResourceUsagePercent = calcAverageResourceUsagePercent(nodeUsages) + averageResourceUsagePercent, prodAverageResourceUsagePercent = calcAverageResourceUsagePercent(nodeUsages) } nodeThresholdsMap := map[string]NodeThresholds{} for _, nodeUsage := range nodeUsages { thresholds := NodeThresholds{ - lowResourceThreshold: map[corev1.ResourceName]*resource.Quantity{}, - highResourceThreshold: map[corev1.ResourceName]*resource.Quantity{}, + lowResourceThreshold: map[corev1.ResourceName]*resource.Quantity{}, + highResourceThreshold: map[corev1.ResourceName]*resource.Quantity{}, + prodLowResourceThreshold: map[corev1.ResourceName]*resource.Quantity{}, + prodHighResourceThreshold: map[corev1.ResourceName]*resource.Quantity{}, } allocatable := nodeUsage.node.Status.Allocatable for _, resourceName := range resourceNames { @@ -100,13 +107,19 @@ func getNodeThresholds( if lowThreshold[resourceName] == MinResourcePercentage { thresholds.lowResourceThreshold[resourceName] = &resourceCapacity thresholds.highResourceThreshold[resourceName] = &resourceCapacity + thresholds.prodLowResourceThreshold[resourceName] = &resourceCapacity + thresholds.prodHighResourceThreshold[resourceName] = &resourceCapacity } else { thresholds.lowResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]-lowThreshold[resourceName])) thresholds.highResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]+highThreshold[resourceName])) + thresholds.prodLowResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, normalizePercentage(prodAverageResourceUsagePercent[resourceName]-prodLowThreshold[resourceName])) + thresholds.prodHighResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, normalizePercentage(prodAverageResourceUsagePercent[resourceName]+prodHighThreshold[resourceName])) } } else { thresholds.lowResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, lowThreshold[resourceName]) thresholds.highResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, highThreshold[resourceName]) + thresholds.prodLowResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, prodLowThreshold[resourceName]) + thresholds.prodHighResourceThreshold[resourceName] = resourceThreshold(allocatable, resourceName, prodHighThreshold[resourceName]) } } nodeThresholdsMap[nodeUsage.node.Name] = thresholds @@ -137,6 +150,15 @@ func getNodeUsage(nodes []*corev1.Node, resourceNames []corev1.ResourceName, nod klog.ErrorS(err, "Node will not be processed, error accessing its pods", "node", klog.KObj(v)) continue } + prodPods := make([]*corev1.Pod, 0) + prodPodsMap := make(map[string]*corev1.Pod) + for _, pod := range pods { + if extension.GetPodPriorityClassWithDefault(pod) == extension.PriorityProd { + prodPods = append(prodPods, pod) + podKey := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) + prodPodsMap[podKey] = pod + } + } nodeMetric, err := nodeMetricLister.Get(v.Name) if err != nil { @@ -151,28 +173,28 @@ func getNodeUsage(nodes []*corev1.Node, resourceNames []corev1.ResourceName, nod } usage := map[corev1.ResourceName]*resource.Quantity{} + prodUsage := map[corev1.ResourceName]*resource.Quantity{} for _, resourceName := range resourceNames { sysUsage := nodeMetric.Status.NodeMetric.SystemUsage.ResourceList[resourceName] - var podUsage resource.Quantity + var podUsage, prodPodUsage resource.Quantity for _, podMetricInfo := range nodeMetric.Status.PodsMetric { podUsage.Add(podMetricInfo.PodUsage.ResourceList[resourceName]) + podKey := fmt.Sprintf("%s/%s", podMetricInfo.Namespace, podMetricInfo.Name) + if _, ok := prodPodsMap[podKey]; ok { + prodPodUsage.Add(podMetricInfo.PodUsage.ResourceList[resourceName]) + } } var usageQuantity resource.Quantity usageQuantity.Add(sysUsage) usageQuantity.Add(podUsage) - if usageQuantity.IsZero() { - switch resourceName { - case corev1.ResourceCPU: - usageQuantity = *resource.NewMilliQuantity(0, resource.DecimalSI) - case corev1.ResourceMemory, corev1.ResourceEphemeralStorage, corev1.ResourceStorage: - usageQuantity = *resource.NewQuantity(0, resource.BinarySI) - default: - usageQuantity = *resource.NewQuantity(0, resource.DecimalSI) - } - } + + usageQuantity = ResetResourceUsageIsZero(resourceName, usageQuantity) + prodPodUsage = ResetResourceUsageIsZero(resourceName, prodPodUsage) usage[resourceName] = &usageQuantity + prodUsage[resourceName] = &prodPodUsage } usage[corev1.ResourcePods] = resource.NewQuantity(int64(len(pods)), resource.DecimalSI) + prodUsage[corev1.ResourcePods] = resource.NewQuantity(int64(len(prodPods)), resource.DecimalSI) podMetrics := make(map[types.NamespacedName]*slov1alpha1.ResourceMap) for _, podMetric := range nodeMetric.Status.PodsMetric { @@ -183,6 +205,8 @@ func getNodeUsage(nodes []*corev1.Node, resourceNames []corev1.ResourceName, nod node: v, allPods: pods, usage: usage, + prodUsage: prodUsage, + prodPods: prodPods, podMetrics: podMetrics, } } @@ -190,36 +214,80 @@ func getNodeUsage(nodes []*corev1.Node, resourceNames []corev1.ResourceName, nod return nodeUsages } +func ResetResourceUsageIsZero(resourceName corev1.ResourceName, usageQuantity resource.Quantity) resource.Quantity { + if usageQuantity.IsZero() { + switch resourceName { + case corev1.ResourceCPU: + usageQuantity = *resource.NewMilliQuantity(0, resource.DecimalSI) + case corev1.ResourceMemory, corev1.ResourceEphemeralStorage, corev1.ResourceStorage: + usageQuantity = *resource.NewQuantity(0, resource.BinarySI) + default: + usageQuantity = *resource.NewQuantity(0, resource.DecimalSI) + } + return usageQuantity + } + return usageQuantity +} + // classifyNodes classifies the nodes into low-utilization or high-utilization nodes. // If a node lies between low and high thresholds, it is simply ignored. func classifyNodes( nodeUsages map[string]*NodeUsage, nodeThresholds map[string]NodeThresholds, - lowThresholdFilter, highThresholdFilter func(usage *NodeUsage, threshold NodeThresholds) bool, -) (lowNodes []NodeInfo, highNodes []NodeInfo) { + lowThresholdFilter, highThresholdFilter, prodLowThresholdFilter, prodHighThresholdFilter func(usage *NodeUsage, threshold NodeThresholds) bool, +) (lowNodes []NodeInfo, highNodes []NodeInfo, prodLowNodes []NodeInfo, prodHighNodes []NodeInfo, bothLowNodes []NodeInfo) { for _, nodeUsage := range nodeUsages { nodeInfo := NodeInfo{ NodeUsage: nodeUsage, thresholds: nodeThresholds[nodeUsage.node.Name], } + nodeUsageExplain := "" if lowThresholdFilter(nodeUsage, nodeThresholds[nodeUsage.node.Name]) { - klog.InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) - lowNodes = append(lowNodes, nodeInfo) + if prodHighThresholdFilter(nodeUsage, nodeThresholds[nodeUsage.node.Name]) { + prodHighNodes = append(prodHighNodes, nodeInfo) + nodeUsageExplain = "lower than node usage but high than prod usage" + } else if prodLowThresholdFilter(nodeUsage, nodeThresholds[nodeUsage.node.Name]) { + bothLowNodes = append(bothLowNodes, nodeInfo) + nodeUsageExplain = "both lower than node && prod usage" + } else { + lowNodes = append(lowNodes, nodeInfo) + nodeUsageExplain = "lower than node usage and it's appropriately for prod usage" + } + klog.InfoS("Node's utilization", "node", klog.KObj(nodeUsage.node), "result information", nodeUsageExplain, "node usage", nodeUsage.usage, "node usagePercentage", resourceUsagePercentages(nodeUsage, false), + "prod usage", nodeUsage.prodUsage, "prod usagePercentage", resourceUsagePercentages(nodeUsage, true)) } else if highThresholdFilter(nodeUsage, nodeThresholds[nodeUsage.node.Name]) { - klog.InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) highNodes = append(highNodes, nodeInfo) + nodeUsageExplain = "higher than node usage" + klog.InfoS("Node's utilization", "node", klog.KObj(nodeUsage.node), "result information", nodeUsageExplain, "node usage", nodeUsage.usage, "node usagePercentage", resourceUsagePercentages(nodeUsage, false), + "prod usage", nodeUsage.prodUsage, "prod usagePercentage", resourceUsagePercentages(nodeUsage, true)) } else { - klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) + if prodHighThresholdFilter(nodeUsage, nodeThresholds[nodeUsage.node.Name]) { + prodHighNodes = append(prodHighNodes, nodeInfo) + nodeUsageExplain = "appropriately for node usage but higher than prod usage" + } else if prodLowThresholdFilter(nodeUsage, nodeThresholds[nodeUsage.node.Name]) { + prodLowNodes = append(prodLowNodes, nodeInfo) + nodeUsageExplain = "appropriately for node usage but lower than prod usage" + } else { + nodeUsageExplain = "both appropriately for node && prod usage" + } + klog.InfoS("Node's utilization", "node", klog.KObj(nodeUsage.node), "result information", nodeUsageExplain, "node usage", nodeUsage.usage, "node usagePercentage", resourceUsagePercentages(nodeUsage, false), + "prod usage", nodeUsage.prodUsage, "prod usagePercentage", resourceUsagePercentages(nodeUsage, true)) } } - return lowNodes, highNodes + return lowNodes, highNodes, prodLowNodes, prodHighNodes, bothLowNodes } -func resourceUsagePercentages(nodeUsage *NodeUsage) map[corev1.ResourceName]float64 { +func resourceUsagePercentages(nodeUsage *NodeUsage, prod bool) map[corev1.ResourceName]float64 { allocatable := nodeUsage.node.Status.Allocatable resourceUsagePercentage := map[corev1.ResourceName]float64{} - for resourceName, resourceUsage := range nodeUsage.usage { + var usage map[corev1.ResourceName]*resource.Quantity + if prod { + usage = nodeUsage.prodUsage + } else { + usage = nodeUsage.usage + } + for resourceName, resourceUsage := range usage { resourceCapacity := allocatable[resourceName] if !resourceCapacity.IsZero() { resourceUsagePercentage[resourceName] = 100 * float64(resourceUsage.MilliValue()) / float64(resourceCapacity.MilliValue()) @@ -232,7 +300,8 @@ func resourceUsagePercentages(nodeUsage *NodeUsage) map[corev1.ResourceName]floa func evictPodsFromSourceNodes( ctx context.Context, nodePoolName string, - sourceNodes, destinationNodes []NodeInfo, + sourceNodes, destinationNodes, + prodSourceNodes, prodDestinationNodes, bothDestinationNodes []NodeInfo, dryRun bool, nodeFit bool, resourceWeights map[corev1.ResourceName]int64, @@ -243,40 +312,114 @@ func evictPodsFromSourceNodes( continueEviction continueEvictionCond, evictionReasonGenerator evictionReasonGeneratorFn, ) { - var targetNodes []*corev1.Node - totalAvailableUsages := map[corev1.ResourceName]*resource.Quantity{} - for _, destinationNode := range destinationNodes { - targetNodes = append(targetNodes, destinationNode.node) + totalAvailableUsages, targetNodes := targetAvailableUsage(destinationNodes, resourceNames, false) + prodAvailableUsages, prodTargetNodes := targetAvailableUsage(prodDestinationNodes, resourceNames, true) + bothTotalAvailableUsage, bothTotalNodes := targetAvailableUsage(bothDestinationNodes, resourceNames, false) + prodBothAvailableUsage, prodBothTotalNodes := targetAvailableUsage(bothDestinationNodes, resourceNames, true) + klog.V(4).InfoS("node pool availableUsage", "onlyNodeTotal", totalAvailableUsages, "onlyProdOnly", prodAvailableUsages, + "bothLowNodesTotal", bothTotalAvailableUsage, "bothLowProdTotal", prodBothAvailableUsage) + + nodeTotalAvailableUsages := newAvailableUsage(resourceNames) + for _, resourceName := range resourceNames { + if quantity, ok := nodeTotalAvailableUsages[resourceName]; ok { + if _, totalOk := totalAvailableUsages[resourceName]; totalOk { + quantity.Add(*totalAvailableUsages[resourceName]) + } + if _, bothOk := bothTotalAvailableUsage[resourceName]; bothOk { + quantity.Add(*bothTotalAvailableUsage[resourceName]) + } + } + } + nodeKeysAndValues := []interface{}{ + "nodePool", nodePoolName, + } + for resourceName, quantity := range nodeTotalAvailableUsages { + nodeKeysAndValues = append(nodeKeysAndValues, string(resourceName), quantity.String()) + } + klog.V(4).InfoS("Total node usage capacity to be moved", nodeKeysAndValues...) + + targetNodes = append(targetNodes, bothTotalNodes...) + balancePods(ctx, nodePoolName, sourceNodes, targetNodes, nodeTotalAvailableUsages, dryRun, nodeFit, false, resourceWeights, podEvictor, + podFilter, nodeIndexer, continueEviction, evictionReasonGenerator) + + // bothLowNode will be used by nodeHigh and prodHigh nodes, needs sub resources used by pods on nodeHigh. + for _, resourceName := range resourceNames { + if quantity, ok := nodeTotalAvailableUsages[resourceName]; ok { + // A part of bothTotalAvailableUsage has been used, + // then the remaining part of nodeTotalAvailableUsage can be utilized. + if bothTotalAvailableUsage[resourceName].Cmp(*quantity) > 0 { + bothTotalAvailableUsage[resourceName] = quantity + } + } + } - for _, resourceName := range resourceNames { - quantity, ok := totalAvailableUsages[resourceName] - if !ok { - switch resourceName { - case corev1.ResourceCPU: - quantity = resource.NewMilliQuantity(0, resource.DecimalSI) - case corev1.ResourceMemory, corev1.ResourceEphemeralStorage, corev1.ResourceStorage: - quantity = resource.NewQuantity(0, resource.BinarySI) - default: - quantity = resource.NewQuantity(0, resource.DecimalSI) + prodTotalAvailableUsages := newAvailableUsage(resourceNames) + for _, resourceName := range resourceNames { + if prodTotalQuantity, ok := prodTotalAvailableUsages[resourceName]; ok { + if _, prodOk := prodAvailableUsages[resourceName]; prodOk { + prodTotalQuantity.Add(*prodAvailableUsages[resourceName]) + } + // add min(prodBothAvailableUsage, bothTotalAvailableUsage) to prodTotalAvailableUsages + if _, prodBothOk := prodBothAvailableUsage[resourceName]; prodBothOk { + if prodBothAvailableUsage[resourceName].Cmp(*bothTotalAvailableUsage[resourceName]) > 0 { + prodTotalQuantity.Add(*bothTotalAvailableUsage[resourceName]) + } else { + prodTotalQuantity.Add(*prodBothAvailableUsage[resourceName]) } - totalAvailableUsages[resourceName] = quantity } - quantity.Add(*destinationNode.thresholds.highResourceThreshold[resourceName]) - quantity.Sub(*destinationNode.usage[resourceName]) } } - - keysAndValues := []interface{}{ + prodTargetNodes = append(prodTargetNodes, prodBothTotalNodes...) + prodKeysAndValues := []interface{}{ "nodePool", nodePoolName, } - for resourceName, quantity := range totalAvailableUsages { - keysAndValues = append(keysAndValues, string(resourceName), quantity.String()) + for resourceName, quantity := range nodeTotalAvailableUsages { + prodKeysAndValues = append(prodKeysAndValues, string(resourceName), quantity.String()) + } + klog.V(4).InfoS("Total prod usage capacity to be moved", prodKeysAndValues...) + balancePods(ctx, nodePoolName, prodSourceNodes, prodTargetNodes, prodTotalAvailableUsages, dryRun, nodeFit, true, resourceWeights, podEvictor, + podFilter, nodeIndexer, continueEviction, evictionReasonGenerator) +} + +func newAvailableUsage(resourceNames []corev1.ResourceName) map[corev1.ResourceName]*resource.Quantity { + availableUsage := make(map[corev1.ResourceName]*resource.Quantity) + for _, resourceName := range resourceNames { + var quantity *resource.Quantity + switch resourceName { + case corev1.ResourceCPU: + quantity = resource.NewMilliQuantity(0, resource.DecimalSI) + case corev1.ResourceMemory, corev1.ResourceEphemeralStorage, corev1.ResourceStorage: + quantity = resource.NewQuantity(0, resource.BinarySI) + default: + quantity = resource.NewQuantity(0, resource.DecimalSI) + } + availableUsage[resourceName] = quantity } - klog.V(4).InfoS("Total capacity to be moved", keysAndValues...) + return availableUsage +} +func balancePods(ctx context.Context, + nodePoolName string, + sourceNodes []NodeInfo, + targetNodes []*corev1.Node, + totalAvailableUsages map[corev1.ResourceName]*resource.Quantity, + dryRun bool, + nodeFit, prod bool, + resourceWeights map[corev1.ResourceName]int64, + podEvictor framework.Evictor, + podFilter framework.FilterFunc, + nodeIndexer podutil.GetPodsAssignedToNodeFunc, + continueEviction continueEvictionCond, + evictionReasonGenerator evictionReasonGeneratorFn) { for _, srcNode := range sourceNodes { + var allPods []*corev1.Pod + if prod { + allPods = srcNode.prodPods + } else { + allPods = srcNode.allPods + } nonRemovablePods, removablePods := classifyPods( - srcNode.allPods, + allPods, podutil.WrapFilterFuncs(podFilter, func(pod *corev1.Pod) bool { if !nodeFit { return true @@ -285,22 +428,55 @@ func evictPodsFromSourceNodes( }), ) klog.V(4).InfoS("Evicting pods from node", - "nodePool", nodePoolName, "node", klog.KObj(srcNode.node), "usage", srcNode.usage, - "allPods", len(srcNode.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods)) + "nodePool", nodePoolName, "node", klog.KObj(srcNode.node), "prod", prod, "usage", srcNode.usage, + "allPods", len(allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods)) if len(removablePods) == 0 { klog.V(4).InfoS("No removable pods on node, try next node", "node", klog.KObj(srcNode.node), "nodePool", nodePoolName) continue } sortPodsOnOneOverloadedNode(srcNode, removablePods, resourceWeights) - evictPods(ctx, nodePoolName, dryRun, removablePods, srcNode, totalAvailableUsages, podEvictor, podFilter, continueEviction, evictionReasonGenerator) + evictPods(ctx, nodePoolName, dryRun, prod, removablePods, srcNode, totalAvailableUsages, podEvictor, podFilter, continueEviction, evictionReasonGenerator) } } +func targetAvailableUsage(destinationNodes []NodeInfo, resourceNames []corev1.ResourceName, prod bool) (map[corev1.ResourceName]*resource.Quantity, []*corev1.Node) { + var targetNodes []*corev1.Node + totalAvailableUsages := map[corev1.ResourceName]*resource.Quantity{} + for _, resourceName := range resourceNames { + var quantity *resource.Quantity + switch resourceName { + case corev1.ResourceCPU: + quantity = resource.NewMilliQuantity(0, resource.DecimalSI) + case corev1.ResourceMemory, corev1.ResourceEphemeralStorage, corev1.ResourceStorage: + quantity = resource.NewQuantity(0, resource.BinarySI) + default: + quantity = resource.NewQuantity(0, resource.DecimalSI) + } + totalAvailableUsages[resourceName] = quantity + } + + for _, destinationNode := range destinationNodes { + targetNodes = append(targetNodes, destinationNode.node) + for _, resourceName := range resourceNames { + if prod { + totalAvailableUsages[resourceName].Add(*destinationNode.thresholds.prodHighResourceThreshold[resourceName]) + totalAvailableUsages[resourceName].Sub(*destinationNode.prodUsage[resourceName]) + } else { + totalAvailableUsages[resourceName].Add(*destinationNode.thresholds.highResourceThreshold[resourceName]) + totalAvailableUsages[resourceName].Sub(*destinationNode.usage[resourceName]) + } + } + } + + return totalAvailableUsages, targetNodes +} + func evictPods( ctx context.Context, nodePoolName string, dryRun bool, + prod bool, inputPods []*corev1.Pod, nodeInfo NodeInfo, totalAvailableUsages map[corev1.ResourceName]*resource.Quantity, @@ -310,7 +486,7 @@ func evictPods( evictionReasonGenerator evictionReasonGeneratorFn, ) { for _, pod := range inputPods { - if !continueEviction(nodeInfo, totalAvailableUsages) { + if !continueEviction(nodeInfo, totalAvailableUsages, prod) { return } @@ -322,7 +498,7 @@ func evictPods( klog.InfoS("Evict pod in dry run mode", "pod", klog.KObj(pod), "node", klog.KObj(nodeInfo.node), "nodePool", nodePoolName) } else { evictionOptions := framework.EvictOptions{ - Reason: evictionReasonGenerator(nodeInfo), + Reason: evictionReasonGenerator(nodeInfo, prod), } if !podEvictor.Evict(ctx, pod, evictionOptions) { klog.InfoS("Failed to Evict Pod", "pod", klog.KObj(pod), "node", klog.KObj(nodeInfo.node), "nodePool", nodePoolName) @@ -365,11 +541,17 @@ func evictPods( } // sortNodesByUsage sorts nodes based on usage. -func sortNodesByUsage(nodes []NodeInfo, resourceToWeightMap map[corev1.ResourceName]int64, ascending bool) { +func sortNodesByUsage(nodes []NodeInfo, resourceToWeightMap map[corev1.ResourceName]int64, ascending, prod bool) { scorer := sorter.ResourceUsageScorer(resourceToWeightMap) sort.Slice(nodes, func(i, j int) bool { - iNodeUsage := usageToResourceList(nodes[i].usage) - jNodeUsage := usageToResourceList(nodes[j].usage) + var iNodeUsage, jNodeUsage corev1.ResourceList + if prod { + iNodeUsage = usageToResourceList(nodes[i].prodUsage) + jNodeUsage = usageToResourceList(nodes[j].prodUsage) + } else { + iNodeUsage = usageToResourceList(nodes[i].usage) + jNodeUsage = usageToResourceList(nodes[j].usage) + } iScore := scorer(iNodeUsage, nodes[i].node.Status.Allocatable) jScore := scorer(jNodeUsage, nodes[j].node.Status.Allocatable) @@ -441,10 +623,12 @@ func classifyPods(pods []*corev1.Pod, filter func(pod *corev1.Pod) bool) ([]*cor return nonRemovablePods, removablePods } -func calcAverageResourceUsagePercent(nodeUsages map[string]*NodeUsage) ResourceThresholds { +func calcAverageResourceUsagePercent(nodeUsages map[string]*NodeUsage) (ResourceThresholds, ResourceThresholds) { allUsedPercentages := ResourceThresholds{} + prodUsedPercentages := ResourceThresholds{} for _, nodeUsage := range nodeUsages { usage := nodeUsage.usage + prodUsage := nodeUsage.prodUsage allocatable := nodeUsage.node.Status.Allocatable for resourceName, used := range usage { total := allocatable[resourceName] @@ -457,14 +641,29 @@ func calcAverageResourceUsagePercent(nodeUsages map[string]*NodeUsage) ResourceT allUsedPercentages[resourceName] += Percentage(used.Value()) / Percentage(total.Value()) * 100.0 } } + for resourceName, used := range prodUsage { + total := allocatable[resourceName] + if total.IsZero() { + continue + } + if resourceName == corev1.ResourceCPU { + prodUsedPercentages[resourceName] += Percentage(used.MilliValue()) / Percentage(total.MilliValue()) * 100.0 + } else { + prodUsedPercentages[resourceName] += Percentage(used.Value()) / Percentage(total.Value()) * 100.0 + } + } } average := ResourceThresholds{} + prodAverage := ResourceThresholds{} numberOfNodes := len(nodeUsages) for resourceName, totalPercentage := range allUsedPercentages { average[resourceName] = totalPercentage / Percentage(numberOfNodes) } - return average + for resourceName, totalPercentage := range prodUsedPercentages { + prodAverage[resourceName] = totalPercentage / Percentage(numberOfNodes) + } + return average, prodAverage } func sortPodsOnOneOverloadedNode(srcNode NodeInfo, removablePods []*corev1.Pod, resourceWeights map[corev1.ResourceName]int64) { weights := make(map[corev1.ResourceName]int64) diff --git a/pkg/descheduler/framework/plugins/loadaware/utilization_util_test.go b/pkg/descheduler/framework/plugins/loadaware/utilization_util_test.go index 3024a6c98..2f6ebf225 100644 --- a/pkg/descheduler/framework/plugins/loadaware/utilization_util_test.go +++ b/pkg/descheduler/framework/plugins/loadaware/utilization_util_test.go @@ -108,7 +108,7 @@ func TestResourceUsagePercentages(t *testing.T) { corev1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI), corev1.ResourcePods: resource.NewQuantity(11, resource.BinarySI), }, - }) + }, false) expectedUsageInIntPercentage := map[corev1.ResourceName]float64{ corev1.ResourceCPU: 63, @@ -133,7 +133,7 @@ func TestSortNodesByUsageDescendingOrder(t *testing.T) { corev1.ResourceMemory: 1, corev1.ResourcePods: 1, } - sortNodesByUsage(nodeList, weightMap, false) + sortNodesByUsage(nodeList, weightMap, false, false) assert.Equal(t, expectedNodeList, nodeList) } @@ -146,7 +146,7 @@ func TestSortNodesByUsageAscendingOrder(t *testing.T) { corev1.ResourceMemory: 1, corev1.ResourcePods: 1, } - sortNodesByUsage(nodeList, weightMap, true) + sortNodesByUsage(nodeList, weightMap, true, false) assert.Equal(t, expectedNodeList, nodeList) }