Skip to content

Commit

Permalink
koord-manager: consider NodeReserved when calculate mid resource. (#2253
Browse files Browse the repository at this point in the history
)

Signed-off-by: wangyang60 <[email protected]>
  • Loading branch information
tan90github authored and j4ckstraw committed Dec 4, 2024
1 parent 6e4f99e commit a459409
Show file tree
Hide file tree
Showing 6 changed files with 430 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ func (p *Plugin) calculateOnNode(strategy *configuration.ColocationStrategy, nod
}
}

hostAppHPUsed := resutil.GetHostAppHPUsed(resourceMetrics)
hostAppHPUsed := resutil.GetHostAppHPUsed(resourceMetrics, extension.PriorityBatch)
// For the pods reported metrics but not shown in current list, count them according to the metric priority.
podsDanglingUsed := util.NewZeroResourceList()
for _, podMetric := range podMetricDanglingMap {
Expand Down Expand Up @@ -352,7 +352,7 @@ func (p *Plugin) calculateOnNUMALevel(strategy *configuration.ColocationStrategy
podsHPZoneMaxUsedReq := make([]corev1.ResourceList, zoneNum)
batchZoneAllocatable := make([]corev1.ResourceList, zoneNum)

hostAppHPUsed := resutil.GetHostAppHPUsed(resourceMetrics)
hostAppHPUsed := resutil.GetHostAppHPUsed(resourceMetrics, extension.PriorityBatch)
systemUsed := resutil.GetResourceListForCPUAndMemory(nodeMetric.Status.NodeMetric.SystemUsage.ResourceList)
// resource usage of host applications with prod priority will be count as host system usage since they consumes the
// node reserved resource. bind host app on single numa node is not supported yet. divide the usage by numa node number.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3731,6 +3731,67 @@ func TestPluginCalculate(t *testing.T) {
},
wantErr: false,
},
{
name: "calculate with memory usage, including mid host application usage",
args: args{
strategy: &configuration.ColocationStrategy{
Enable: pointer.Bool(true),
CPUReclaimThresholdPercent: pointer.Int64(65),
MemoryReclaimThresholdPercent: pointer.Int64(65),
DegradeTimeMinutes: pointer.Int64(15),
UpdateTimeThresholdSeconds: pointer.Int64(300),
ResourceDiffThreshold: pointer.Float64(0.1),
},
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node1",
},
Status: makeNodeStat("100", "120G"),
},
resourceMetrics: &framework.ResourceMetrics{
NodeMetric: &slov1alpha1.NodeMetric{
Status: slov1alpha1.NodeMetricStatus{
UpdateTime: &metav1.Time{Time: time.Now()},
NodeMetric: &slov1alpha1.NodeMetricInfo{
NodeUsage: slov1alpha1.ResourceMap{
ResourceList: makeResourceList("50", "55G"),
},
SystemUsage: slov1alpha1.ResourceMap{
ResourceList: makeResourceList("4", "6G"),
},
},
PodsMetric: []*slov1alpha1.PodMetricInfo{
genPodMetric("test", "podA", "11", "11G"),
genPodMetric("test", "podB", "10", "10G"),
genPodMetric("test", "podC", "22", "22G"),
},
HostApplicationMetric: []*slov1alpha1.HostApplicationMetricInfo{
{
Name: "test-mid-host-application",
Usage: slov1alpha1.ResourceMap{
ResourceList: makeResourceList("3", "6G"),
},
Priority: extension.PriorityMid,
},
},
},
},
},
},
want: []framework.ResourceItem{
{
Name: extension.BatchCPU,
Quantity: resource.NewQuantity(25000, resource.DecimalSI),
Message: "batchAllocatable[CPU(Milli-Core)]:25000 = nodeCapacity:100000 - nodeSafetyMargin:35000 - systemUsageOrNodeReserved:7000 - podHPUsed:33000",
},
{
Name: extension.BatchMemory,
Quantity: resource.NewScaledQuantity(33, 9),
Message: "batchAllocatable[Mem(GB)]:33 = nodeCapacity:120 - nodeSafetyMargin:42 - systemUsage:12 - podHPUsed:33",
},
},
wantErr: false,
},
{
name: "calculate with memory usage, including batch host application usage",
args: args{
Expand Down
57 changes: 41 additions & 16 deletions pkg/slo-controller/noderesource/plugins/midresource/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"time"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
quotav1 "k8s.io/apiserver/pkg/quota/v1"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
Expand Down Expand Up @@ -126,9 +127,9 @@ func (p *Plugin) degradeCalculate(node *corev1.Node, message string) []framework
return p.Reset(node, message)
}

// Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0)
func (p *Plugin) getUnallocated(node *corev1.Node, podList *corev1.PodList) corev1.ResourceList {
allocated := corev1.ResourceList{}
// Unallocated[Mid] = max(NodeCapacity - NodeReserved - Allocated[Prod], 0)
func (p *Plugin) getUnallocated(nodeName string, podList *corev1.PodList, nodeCapacity, nodeReserved corev1.ResourceList) corev1.ResourceList {
prodPodAllocated := corev1.ResourceList{}
for i := range podList.Items {
pod := &podList.Items[i]
priorityClass := extension.GetPodPriorityClassWithDefault(pod)
Expand All @@ -142,19 +143,27 @@ func (p *Plugin) getUnallocated(node *corev1.Node, podList *corev1.PodList) core
continue
}
podRequest := util.GetPodRequest(pod, corev1.ResourceCPU, corev1.ResourceMemory)
allocated = quotav1.Add(allocated, podRequest)
prodPodAllocated = quotav1.Add(prodPodAllocated, podRequest)
}

return quotav1.SubtractWithNonNegativeResult(node.Status.Allocatable, allocated)
midUnallocated := quotav1.Max(quotav1.Subtract(quotav1.Subtract(nodeCapacity, nodeReserved), prodPodAllocated), util.NewZeroResourceList())
cpuMsg := fmt.Sprintf("midUnallocatedCPU[core]:%v = max(nodeCapacity:%v - nodeReserved:%v - prodPodAllocated:%v, 0)",
midUnallocated.Cpu(), nodeCapacity.Cpu(), nodeReserved.Cpu(), prodPodAllocated.Cpu())
memMsg := fmt.Sprintf("midUnallocatedMem[GB]:%v = max(nodeCapacity:%v - nodeReserved:%v - prodPodAllocated:%v, 0)",
midUnallocated.Memory().ScaledValue(resource.Giga), nodeCapacity.Memory().ScaledValue(resource.Giga),
nodeReserved.Memory().ScaledValue(resource.Giga), prodPodAllocated.Memory().ScaledValue(resource.Giga))

klog.V(6).Infof("calculated mid unallocated for node %s, cpu(core) %v, memory(GB) %v", nodeName, cpuMsg, memMsg)
return midUnallocated
}

func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *corev1.Node, podList *corev1.PodList,
resourceMetrics *framework.ResourceMetrics) []framework.ResourceItem {
// Allocatable[Mid]' := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) + Unallocated[Mid] * midUnallocatedRatio
// Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0)
// Unallocated[Mid] = max(NodeCapacity - NodeReserved - Allocated[Prod], 0)

var allocatableMilliCPU, allocatableMemory, prodReclaimableMilliCPU int64
var prodReclaimableMemory string = "0"
var allocatableMilliCPU, allocatableMemory int64
prodReclaimableCPU, prodReclaimableMemory := resource.NewQuantity(0, resource.DecimalSI), resource.NewQuantity(0, resource.BinarySI)
prodReclaimableMetic := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric

if prodReclaimableMetic == nil || prodReclaimableMetic.Resource.ResourceList == nil {
Expand All @@ -163,18 +172,34 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor
allocatableMemory = 0
} else {
prodReclaimable := resourceMetrics.NodeMetric.Status.ProdReclaimableMetric.Resource
allocatableMilliCPU = prodReclaimable.Cpu().MilliValue()
allocatableMemory = prodReclaimable.Memory().Value()
prodReclaimableMilliCPU = allocatableMilliCPU
prodReclaimableMemory = prodReclaimable.Memory().String()
prodReclaimableCPU = prodReclaimable.Cpu()
prodReclaimableMemory = prodReclaimable.Memory()
allocatableMilliCPU = prodReclaimableCPU.MilliValue()
allocatableMemory = prodReclaimableMemory.Value()
}

nodeAllocatable := node.Status.Allocatable
nodeMetric := resourceMetrics.NodeMetric

hostAppHPUsed := resutil.GetHostAppHPUsed(resourceMetrics, extension.PriorityMid)

nodeCapacity := resutil.GetNodeCapacity(node)

systemUsed := resutil.GetResourceListForCPUAndMemory(nodeMetric.Status.NodeMetric.SystemUsage.ResourceList)
// resource usage of host applications with prod priority will be count as host system usage since they consumes the
// node reserved resource.
systemUsed = quotav1.Add(systemUsed, hostAppHPUsed)

// System.Reserved = Node.Anno.Reserved, Node.Kubelet.Reserved)
nodeAnnoReserved := util.GetNodeReservationFromAnnotation(node.Annotations)
nodeKubeletReserved := util.GetNodeReservationFromKubelet(node)
// FIXME: resource reservation taking max is rather confusing.
nodeReserved := quotav1.Max(nodeKubeletReserved, nodeAnnoReserved)
nodeReserved = quotav1.Max(systemUsed, nodeReserved)

// TODO: consider SafetyMargin and NodeReserved
unallocated := p.getUnallocated(node, podList)
unallocated := p.getUnallocated(node.Name, podList, nodeCapacity, nodeReserved)

cpuInMilliCores, memory, cpuMsg, memMsg := resutil.CalculateMidResourceByPolicy(strategy, nodeAllocatable, unallocated, allocatableMilliCPU, allocatableMemory, prodReclaimableMilliCPU, prodReclaimableMemory, node.Name)
cpuInMilliCores, memory, cpuMsg, memMsg := resutil.CalculateMidResourceByPolicy(strategy, nodeCapacity,
unallocated, allocatableMilliCPU, allocatableMemory, prodReclaimableCPU, prodReclaimableMemory, node.Name)

metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.MidCPU), metrics.UnitInteger, float64(cpuInMilliCores.MilliValue())/1000)
metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.MidMemory), metrics.UnitByte, float64(memory.Value()))
Expand Down
Loading

0 comments on commit a459409

Please sign in to comment.