Skip to content

Commit

Permalink
koord-scheduler: optimize elastic quota log (#1690)
Browse files Browse the repository at this point in the history
Signed-off-by: chuanyun.lcy <[email protected]>
Co-authored-by: chuanyun.lcy <[email protected]>
  • Loading branch information
shaloulcy and chuanyun.lcy authored Sep 29, 2023
1 parent ad0e1e4 commit 125e52a
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 67 deletions.
2 changes: 1 addition & 1 deletion pkg/scheduler/plugins/elasticquota/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ func (ctrl *Controller) syncHandler() []error {
newEQ.Annotations[extension.AnnotationAllocated] = string(allocatedBytes)
newEQ.Status.Used = used

klog.V(5).Infof("quota:%v, oldUsed:%v, newUsed:%v, oldRuntime:%v, newRuntime:%v, oldRequest:%v, newRequest:%v, oldChildRequest:%v, newChildRequest:%v, oldGuarantee:%v, newGuarantee:%v, oldAllocated:%, newAllocated:%v",
klog.V(5).Infof("quota: %v, oldUsed: %v, newUsed: %v, oldRuntime: %v, newRuntime: %v, oldRequest: %v, newRequest: %v, oldChildRequest: %v, newChildRequest: %v, oldGuarantee: %v, newGuarantee: %v, oldAllocated: %v, newAllocated: %v",
eq.Name, eq.Status.Used, used, eq.Annotations[extension.AnnotationRuntime], string(runtimeBytes),
eq.Annotations[extension.AnnotationRequest], string(requestBytes),
eq.Annotations[extension.AnnotationChildRequest], string(childRequestBytes),
Expand Down
43 changes: 26 additions & 17 deletions pkg/scheduler/plugins/elasticquota/core/group_quota_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,16 @@ func (gqm *GroupQuotaManager) setScaleMinQuotaEnabled(flag bool) {
defer gqm.hierarchyUpdateLock.Unlock()

gqm.scaleMinQuotaEnabled = flag
klog.V(5).Infof("Set ScaleMinQuotaEnabled, flag:%v", gqm.scaleMinQuotaEnabled)
klog.V(5).Infof("Set ScaleMinQuotaEnabled, flag: %v", gqm.scaleMinQuotaEnabled)
}

func (gqm *GroupQuotaManager) UpdateClusterTotalResource(deltaRes v1.ResourceList) {
gqm.hierarchyUpdateLock.Lock()
defer gqm.hierarchyUpdateLock.Unlock()

klog.V(5).Infof("UpdateClusterResource tree: %v deltaRes:%v", gqm.treeID, deltaRes)
if klog.V(5).Enabled() {
klog.Infof("UpdateClusterResource tree: %v deltaRes: %v", gqm.treeID, util.DumpJSON(deltaRes))
}
defaultQuota := gqm.getQuotaInfoByNameNoLock(extension.DefaultQuotaName)
if defaultQuota != nil {
defaultQuota.lock.Lock()
Expand Down Expand Up @@ -135,7 +137,9 @@ func (gqm *GroupQuotaManager) updateClusterTotalResourceNoLock(deltaRes v1.Resou
if !quotav1.IsZero(diffRes) {
gqm.totalResourceExceptSystemAndDefaultUsed = totalResNoSysOrDefault.DeepCopy()
gqm.runtimeQuotaCalculatorMap[extension.RootQuotaName].setClusterTotalResource(totalResNoSysOrDefault)
klog.V(5).Infof("UpdateClusterResource tree: %v, finish totalResourceExceptSystemAndDefaultUsed:%v", gqm.treeID, gqm.totalResourceExceptSystemAndDefaultUsed)
if klog.V(5).Enabled() {
klog.Infof("UpdateClusterResource tree: %v, finish totalResourceExceptSystemAndDefaultUsed: %v", gqm.treeID, util.DumpJSON(gqm.totalResourceExceptSystemAndDefaultUsed))
}
}
}

Expand All @@ -153,7 +157,10 @@ func (gqm *GroupQuotaManager) SetTotalResourceForTree(total v1.ResourceList) v1.
delta := quotav1.Subtract(total, gqm.totalResource)
if !quotav1.IsZero(delta) {
gqm.updateClusterTotalResourceNoLock(delta)
klog.V(5).Infof("SetTotalResourceForTree tree: %v, total:%v, totalResourceExceptSystemAndDefaultUsed:%v ", gqm.treeID, gqm.totalResource, gqm.totalResourceExceptSystemAndDefaultUsed)
if klog.V(5).Enabled() {
klog.Infof("SetTotalResourceForTree tree: %v, total: %v, totalResourceExceptSystemAndDefaultUsed: %v", gqm.treeID,
util.DumpJSON(gqm.totalResource), util.DumpJSON(gqm.totalResourceExceptSystemAndDefaultUsed))
}
}

return delta
Expand Down Expand Up @@ -206,7 +213,7 @@ func (gqm *GroupQuotaManager) recursiveUpdateGroupTreeWithDeltaRequest(deltaReq

directParRuntimeCalculatorPtr := gqm.getRuntimeQuotaCalculatorByNameNoLock(curQuotaInfo.ParentName)
if directParRuntimeCalculatorPtr == nil {
klog.Errorf("treeWrapper not exist! quotaName:%v parentName:%v", curQuotaInfo.Name, curQuotaInfo.ParentName)
klog.Errorf("treeWrapper not exist! quotaName: %v, parentName: %v", curQuotaInfo.Name, curQuotaInfo.ParentName)
return
}
if directParRuntimeCalculatorPtr.needUpdateOneGroupRequest(curQuotaInfo) {
Expand Down Expand Up @@ -280,12 +287,12 @@ func (gqm *GroupQuotaManager) RefreshRuntimeNoLock(quotaName string) v1.Resource
}
parRuntimeQuotaCalculator := gqm.getRuntimeQuotaCalculatorByNameNoLock(quotaInfo.ParentName)
if parRuntimeQuotaCalculator == nil {
klog.Errorf("treeWrapper not exist! parentQuotaName:%v", quotaInfo.ParentName)
klog.Errorf("treeWrapper not exist! parentQuotaName: %v", quotaInfo.ParentName)
return nil
}
subTreeWrapper := gqm.getRuntimeQuotaCalculatorByNameNoLock(quotaInfo.Name)
if subTreeWrapper == nil {
klog.Errorf("treeWrapper not exist! parentQuotaName:%v", quotaInfo.Name)
klog.Errorf("treeWrapper not exist! parentQuotaName: %v", quotaInfo.Name)
return nil
}

Expand Down Expand Up @@ -401,7 +408,7 @@ func (gqm *GroupQuotaManager) UpdateQuota(quota *v1alpha1.ElasticQuota, isDelete
}
}

klog.Infof("reset quota tree %v, for quota %v/%v updated", gqm.treeID, quota.Namespace, quota.Name)
klog.Infof("reset quota tree %v, for quota %v updated", gqm.treeID, quota.Name)
gqm.updateQuotaGroupConfigNoLock()

return nil
Expand Down Expand Up @@ -603,7 +610,7 @@ func (gqm *GroupQuotaManager) updatePodUsedNoLock(quotaName string, oldPod, newP
return
}
if !quotaInfo.CheckPodIsAssigned(newPod) && !quotaInfo.CheckPodIsAssigned(oldPod) {
klog.V(5).Infof("updatePodUsed, isAssigned is false, quotaName:%v, podName:%v",
klog.V(5).Infof("updatePodUsed, isAssigned is false, quotaName: %v, podName: %v",
quotaName, getPodName(oldPod, newPod))
return
}
Expand All @@ -623,8 +630,10 @@ func (gqm *GroupQuotaManager) updatePodUsedNoLock(quotaName string, oldPod, newP

deltaUsed := quotav1.Subtract(newPodUsed, oldPodUsed)
if quotav1.IsZero(deltaUsed) {
klog.V(5).Infof("updatePodUsed, deltaUsedIsZero, quotaName:%v, podName:%v, podUsed:%v",
quotaName, getPodName(oldPod, newPod), newPodUsed)
if klog.V(5).Enabled() {
klog.Infof("updatePodUsed, deltaUsedIsZero, quotaName: %v, podName: %v, podUsed: %v",
quotaName, getPodName(oldPod, newPod), util.DumpJSON(newPodUsed))
}
return
}
gqm.updateGroupDeltaUsedNoLock(quotaName, deltaUsed)
Expand Down Expand Up @@ -676,7 +685,7 @@ func (gqm *GroupQuotaManager) MigratePod(pod *v1.Pod, out, in string) {
gqm.updatePodIsAssignedNoLock(in, pod, isAssigned)
gqm.updatePodRequestNoLock(in, nil, pod)
gqm.updatePodUsedNoLock(in, nil, pod)
klog.V(5).Infof("migrate pod :%v from quota:%v to quota:%v, podPhase:%v", pod.Name, out, in, pod.Status.Phase)
klog.V(5).Infof("migrate pod %v from quota %v to quota %v, podPhase: %v", pod.Name, out, in, pod.Status.Phase)
}

func (gqm *GroupQuotaManager) GetQuotaSummary(quotaName string) (*QuotaInfoSummary, bool) {
Expand Down Expand Up @@ -855,7 +864,7 @@ func (gqm *GroupQuotaManager) OnNodeDelete(node *v1.Node) {
delta := quotav1.Subtract(v1.ResourceList{}, node.Status.Allocatable)
gqm.UpdateClusterTotalResource(delta)
delete(gqm.nodeResourceMap, node.Name)
klog.V(5).Infof("OnNodeDeleteFunc success:%v [%v]", node.Name, delta)
klog.V(5).Infof("OnNodeDeleteFunc success: %v [%v]", node.Name, delta)
}

func (gqm *GroupQuotaManager) GetTreeID() string {
Expand Down Expand Up @@ -911,7 +920,7 @@ func (gqm *GroupQuotaManager) recursiveUpdateGroupTreeWithDeltaAllocated(deltaAl

directParRuntimeCalculatorPtr := gqm.getRuntimeQuotaCalculatorByNameNoLock(curQuotaInfo.ParentName)
if directParRuntimeCalculatorPtr == nil {
klog.Errorf("treeWrapper not exist! quotaName:%v parentName:%v", curQuotaInfo.Name, curQuotaInfo.ParentName)
klog.Errorf("treeWrapper not exist! quotaName: %v, parentName: %v", curQuotaInfo.Name, curQuotaInfo.ParentName)
return
}
if directParRuntimeCalculatorPtr.needUpdateOneGroupGuaranteed(curQuotaInfo) {
Expand Down Expand Up @@ -956,7 +965,7 @@ func (gqm *GroupQuotaManager) doUpdateOneGroupMaxQuotaNoLock(quotaName string, n
if quotaInfoLen > 1 {
parentRuntimeCalculator := gqm.getRuntimeQuotaCalculatorByNameNoLock(curQuotaInfo.ParentName)
if parentRuntimeCalculator == nil {
klog.Errorf("runtimeQuotaCalculator not exist! quotaName:%v parentName:%v", curQuotaInfo.Name, curQuotaInfo.ParentName)
klog.Errorf("runtimeQuotaCalculator not exist! quotaName: %v, parentName: %v", curQuotaInfo.Name, curQuotaInfo.ParentName)
return
}
parentRuntimeCalculator.updateOneGroupMaxQuota(curQuotaInfo)
Expand Down Expand Up @@ -994,7 +1003,7 @@ func (gqm *GroupQuotaManager) doUpdateOneGroupMinQuotaNoLock(quotaName string, n
// update parent runtime calculator for min changed
parentRuntimeCalculator := gqm.getRuntimeQuotaCalculatorByNameNoLock(curQuotaInfo.ParentName)
if parentRuntimeCalculator == nil {
klog.Errorf("runtimeQuotaCalculator not exist! quotaName:%v parentName:%v", curQuotaInfo.Name, curQuotaInfo.ParentName)
klog.Errorf("runtimeQuotaCalculator not exist! quotaName: %v, parentName: %v", curQuotaInfo.Name, curQuotaInfo.ParentName)
return
}
parentRuntimeCalculator.updateOneGroupMinQuota(curQuotaInfo)
Expand All @@ -1013,7 +1022,7 @@ func (gqm *GroupQuotaManager) doUpdateOneGroupMinQuotaNoLock(quotaName string, n
if quotaInfoLen > 1 {
parentRuntimeCalculator := gqm.getRuntimeQuotaCalculatorByNameNoLock(curQuotaInfo.ParentName)
if parentRuntimeCalculator == nil {
klog.Errorf("runtimeQuotaCalculator not exist! quotaName:%v parentName:%v", curQuotaInfo.Name, curQuotaInfo.ParentName)
klog.Errorf("runtimeQuotaCalculator not exist! quotaName: %v, parentName: %v", curQuotaInfo.Name, curQuotaInfo.ParentName)
return
}
if parentRuntimeCalculator.needUpdateOneGroupGuaranteed(curQuotaInfo) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/klog/v2"

"github.com/koordinator-sh/koordinator/pkg/util"
)

// quotaNode stores the corresponding quotaInfo's information in a specific resource dimension.
Expand Down Expand Up @@ -437,9 +439,10 @@ func (qtw *RuntimeQuotaCalculator) setClusterTotalResource(full v1.ResourceList)
qtw.totalResource = full.DeepCopy()
qtw.globalRuntimeVersion++

klog.V(5).Infof("UpdateClusterTotalResource"+
"treeName:%v oldTotalResource:%v newTotalResource:%v reqLimit:%v refreshedVersion:%v",
qtw.treeName, oldTotalRes, qtw.totalResource, qtw.groupReqLimit, qtw.globalRuntimeVersion)
if klog.V(5).Enabled() {
klog.Infof("setClusterTotalResource, treeName: %v, oldTotalResource: %v, newTotalResource: %v, reqLimit: %v, refreshedVersion: %v",
qtw.treeName, util.DumpJSON(oldTotalRes), util.DumpJSON(qtw.totalResource), util.DumpJSON(qtw.groupReqLimit), qtw.globalRuntimeVersion)
}
}

// updateOneGroupRuntimeQuota update the quotaInfo's runtimeQuota as the quotaNode's runtime.
Expand Down Expand Up @@ -489,13 +492,9 @@ func (qtw *RuntimeQuotaCalculator) calculateRuntimeNoLock() {
}

func (qtw *RuntimeQuotaCalculator) logQuotaInfoNoLock(verb string, quotaInfo *QuotaInfo) {
klog.Infof("%s\n"+
"quotaName:%v quotaParentName:%v IsParent:%v request:%v maxQuota:%v OriginalMinQuota:%v"+
"autoScaleMinQuota:%v SharedWeight:%v runtime:%v used:%v guaranteed:%v allocated:%v treeName:%v totalResource:%v reqLimit:%v refreshedVersion:%v", verb,
quotaInfo.Name, quotaInfo.ParentName, quotaInfo.IsParent, quotaInfo.CalculateInfo.Request,
quotaInfo.CalculateInfo.Max, quotaInfo.CalculateInfo.Min, quotaInfo.CalculateInfo.AutoScaleMin, quotaInfo.CalculateInfo.SharedWeight,
quotaInfo.CalculateInfo.Runtime, quotaInfo.CalculateInfo.Used, quotaInfo.CalculateInfo.Guaranteed, quotaInfo.CalculateInfo.Allocated, qtw.treeName, qtw.totalResource, qtw.groupReqLimit,
qtw.globalRuntimeVersion)
klog.Infof("[%v] quotaName: %v, quotaParentName: %v, IsParent: %v, CalculateInfo: %v, treeName: %v, totalResource: %v, reqLimit: %v, refreshedVersion: %v",
verb, quotaInfo.Name, quotaInfo.ParentName, quotaInfo.IsParent, util.DumpJSON(quotaInfo.CalculateInfo),
qtw.treeName, util.DumpJSON(qtw.totalResource), util.DumpJSON(qtw.groupReqLimit), qtw.globalRuntimeVersion)
}

func getQuantityValue(res resource.Quantity, resName v1.ResourceName) int64 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
quotav1 "k8s.io/apiserver/pkg/quota/v1"
"k8s.io/klog/v2"

"github.com/koordinator-sh/koordinator/pkg/util"
)

// ScaleMinQuotaManager The child nodes under each node will be divided into two categories, one allows
Expand Down Expand Up @@ -82,9 +84,12 @@ func (s *ScaleMinQuotaManager) update(parQuotaName, subQuotaName string, subMinQ
s.disableScaleSubsSumMinQuotaMap[parQuotaName] = quotav1.Add(s.disableScaleSubsSumMinQuotaMap[parQuotaName], subMinQuota)
}

klog.V(5).Infof("UpdateScaleMinQuota, quota:%v originalMinQuota change from :%v to %v,"+
"enableMinQuotaScale change from :%v to :%v", subQuotaName, s.originalMinQuotaMap[subQuotaName],
subMinQuota, s.quotaEnableMinQuotaScaleMap[subQuotaName], enableScaleMinQuota)
if klog.V(5).Enabled() {
klog.Infof("UpdateScaleMinQuota, quota: %v originalMinQuota change from %v to %v, "+
"enableMinQuotaScale change from %v to %v", subQuotaName,
util.DumpJSON(s.originalMinQuotaMap[subQuotaName]), util.DumpJSON(subMinQuota),
util.DumpJSON(s.quotaEnableMinQuotaScaleMap[subQuotaName]), util.DumpJSON(enableScaleMinQuota))
}

// step3: record the newMinQuota
s.originalMinQuotaMap[subQuotaName] = subMinQuota
Expand Down Expand Up @@ -146,7 +151,10 @@ func (s *ScaleMinQuotaManager) getScaledMinQuota(newTotalRes v1.ResourceList, pa
newMinQuota[resourceDimension] = createQuantity(newMinQuotaValue, resourceDimension)
}
}
klog.V(5).Infof("GetScaleMinQuota, parQuota: %v, subQuota: %v, needScaleDimensions: %v, totalRes: %v, newMinQuota:%v",
parQuotaName, subQuotaName, needScaleDimensions, newTotalRes, newMinQuota)

if klog.V(5).Enabled() {
klog.Infof("GetScaleMinQuota, parQuota: %v, subQuota: %v, needScaleDimensions: %v, totalRes: %v, newMinQuota: %v",
parQuotaName, subQuotaName, needScaleDimensions, util.DumpJSON(newTotalRes), util.DumpJSON(newMinQuota))
}
return true, newMinQuota
}
25 changes: 12 additions & 13 deletions pkg/scheduler/plugins/elasticquota/pod_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ func (g *Plugin) OnPodAdd(obj interface{}) {
mgr := g.GetGroupQuotaManagerForTree(treeID)
if mgr != nil {
mgr.OnPodAdd(quotaName, pod)
klog.V(5).Infof("OnPodAddFunc %v.%v add success, quota: %v, tree: [%v]", pod.Namespace, pod.Name, quotaName, treeID)
klog.V(5).Infof("OnPodAddFunc %v add success, quota: %v, tree: [%v]", klog.KObj(pod), quotaName, treeID)
} else {
klog.Warningf("OnPodAddFunc %v.%v add failed, quota: %v, quota manager not found: %v", pod.Namespace, pod.Name, quotaName, treeID)
klog.Warningf("OnPodAddFunc %v add failed, quota: %v, quota manager not found: %v", klog.KObj(pod), quotaName, treeID)
}
}

Expand All @@ -48,7 +48,6 @@ func (g *Plugin) OnPodUpdate(oldObj, newObj interface{}) {
newPod := newObj.(*corev1.Pod)

if oldPod.ResourceVersion == newPod.ResourceVersion {
klog.Warningf("update pod warning, update version for the same:%v", newPod.Name)
return
}

Expand All @@ -61,19 +60,19 @@ func (g *Plugin) OnPodUpdate(oldObj, newObj interface{}) {
if oldQuotaName == "" {
if newQuotaName != "" {
mgr.OnPodAdd(newQuotaName, newPod)
klog.V(5).Infof("OnPodUpdateFunc %v.%v add success, quota:%v, tree: [%v]", newPod.Namespace, newPod.Name, newQuotaName, newTree)
klog.V(5).Infof("OnPodUpdateFunc %v add success, quota: %v, tree: [%v]", klog.KObj(newPod), newQuotaName, newTree)
}
} else {
if newQuotaName != "" {
mgr.OnPodUpdate(newQuotaName, oldQuotaName, newPod, oldPod)
klog.V(5).Infof("OnPodUpdateFunc %v.%v update success, quota:%v, tree: [%v]", newPod.Namespace, newPod.Name, newQuotaName, newTree)
klog.V(5).Infof("OnPodUpdateFunc %v update success, quota:%v, tree: [%v]", klog.KObj(newPod), newQuotaName, newTree)
} else {
mgr.OnPodDelete(oldQuotaName, oldPod)
klog.V(5).Infof("OnPodUpdateFunc %v.%v delete success, quota:%v, tree: [%v]", oldPod.Namespace, oldPod.Name, oldQuotaName, oldTree)
klog.V(5).Infof("OnPodUpdateFunc %v delete success, quota:%v, tree: [%v]", klog.KObj(oldPod), oldQuotaName, oldTree)
}
}
} else {
klog.Errorf("OnPodUpdateFunc %v.%v update failed, quota: %v, quota manager not found: %v", newPod.Namespace, newPod.Name, newQuotaName, newTree)
klog.Errorf("OnPodUpdateFunc %v update failed, quota: %v, quota manager not found: %v", klog.KObj(newPod), newQuotaName, newTree)
}
return
}
Expand All @@ -83,18 +82,18 @@ func (g *Plugin) OnPodUpdate(oldObj, newObj interface{}) {
if oldMgr != nil {
if oldQuotaName != "" {
oldMgr.OnPodDelete(oldQuotaName, oldPod)
klog.V(5).Infof("OnPodUpdateFunc %v.%v, delete success, quota: %v, tree: %v", oldPod.Namespace, oldPod.Name, oldQuotaName, oldTree)
klog.V(5).Infof("OnPodUpdateFunc %v, delete success, quota: %v, tree: %v", klog.KObj(oldPod), oldQuotaName, oldTree)
}
} else {
klog.Errorf("OnPodUpdateFunc %v.%v delete failed, quota: %v, quota manager not found: %v", oldPod.Namespace, oldPod.Name, oldQuotaName, oldTree)
klog.Errorf("OnPodUpdateFunc %v delete failed, quota: %v, quota manager not found: %v", klog.KObj(oldPod), oldQuotaName, oldTree)
}
if newMgr != nil {
if newQuotaName != "" {
newMgr.OnPodAdd(newQuotaName, newPod)
klog.V(5).Infof("OnPodUpdateFunc %v.%v add success, quota: %v, tree: %v ", newPod.Namespace, newPod.Name, newQuotaName, newTree)
klog.V(5).Infof("OnPodUpdateFunc %v add success, quota: %v, tree: %v ", klog.KObj(newPod), newQuotaName, newTree)
}
} else {
klog.Errorf("OnPodUpdateFunc %v.%v add failed, quota: %v, quota manager not found: %v", newPod.Namespace, newPod.Name, newQuotaName, newTree)
klog.Errorf("OnPodUpdateFunc %v add failed, quota: %v, quota manager not found: %v", klog.KObj(newPod), newQuotaName, newTree)
}
}

Expand All @@ -112,8 +111,8 @@ func (g *Plugin) OnPodDelete(obj interface{}) {
mgr := g.GetGroupQuotaManagerForTree(treeID)
if mgr != nil {
mgr.OnPodDelete(quotaName, pod)
klog.V(5).Infof("OnPodDeleteFunc %v.%v delete success, quota: %v, tree: %v", pod.Namespace, pod.Name, quotaName, treeID)
klog.V(5).Infof("OnPodDeleteFunc %v delete success, quota: %v, tree: %v", klog.KObj(pod), quotaName, treeID)
} else {
klog.Errorf("OnPodDeleteFunc %v.%v delete failed, quota: %v, tree: %v", pod.Namespace, pod.Name, quotaName, treeID)
klog.Errorf("OnPodDeleteFunc %v delete failed, quota: %v, tree: %v", klog.KObj(pod), quotaName, treeID)
}
}
Loading

0 comments on commit 125e52a

Please sign in to comment.