From b380a29baadafc7c23ea9cbf26ce62d8ae896e98 Mon Sep 17 00:00:00 2001 From: Manikandan R Date: Fri, 23 Aug 2024 11:15:18 +0530 Subject: [PATCH 1/2] [YUNIKORN-2808] E2E test Verify_preemption_on_priority_queue test is flaky --- pkg/scheduler/objects/preemption.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pkg/scheduler/objects/preemption.go b/pkg/scheduler/objects/preemption.go index 3184658a5..677a82d09 100644 --- a/pkg/scheduler/objects/preemption.go +++ b/pkg/scheduler/objects/preemption.go @@ -176,6 +176,11 @@ func (p *Preemptor) initWorkingState() { p.allocationsByNode = allocationsByNode p.queueByAlloc = queueByAlloc p.nodeAvailableMap = nodeAvailableMap + log.Log(log.SchedPreemption).Info("Preemption triggered based on working state", zap.String("ask", p.ask.GetAllocationKey()), + zap.String("ask queue path", p.queuePath), + zap.Int("allocationsByNode", len(p.allocationsByNode)), + zap.Int("queueByAlloc", len(p.queueByAlloc)), + zap.Int("nodeAvailableMap", len(p.nodeAvailableMap))) } // checkPreemptionQueueGuarantees verifies that it's possible to free enough resources to fit the given ask @@ -198,6 +203,8 @@ func (p *Preemptor) checkPreemptionQueueGuarantees() bool { snapshot.RemoveAllocation(alloc.GetAllocatedResource()) remaining := currentQueue.GetRemainingGuaranteedResource() if remaining != nil && resources.StrictlyGreaterThanOrEquals(remaining, resources.Zero) { + log.Log(log.SchedPreemption).Info("Preemption Queue Guarantees check has passed", zap.String("ask", p.ask.GetAllocationKey()), + zap.String("ask queue path", p.queuePath)) return true } } @@ -530,6 +537,10 @@ func (p *Preemptor) tryNodes() (string, []*Allocation, bool) { } // identify which victims and in which order should be tried if idx, victims := p.calculateVictimsByNode(nodeAvailable, allocations); victims != nil { + log.Log(log.SchedPreemption).Info("Node wise Potential victims collected for preemption", zap.String("ask", p.ask.GetAllocationKey()), + zap.String("ask queue path", p.queuePath), + zap.String("node", nodeID), + zap.Int("victims count", len(victims))) victimsByNode[nodeID] = victims keys := make([]string, 0) for _, victim := range victims { @@ -569,6 +580,10 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) { // no preemption possible return nil, false } + log.Log(log.SchedPreemption).Info("Node chosen for preemption", zap.String("ask", p.ask.GetAllocationKey()), + zap.String("ask queue path", p.queuePath), + zap.String("node", nodeID), + zap.Int("victims count", len(victims))) // look for additional victims in case we have not yet made enough capacity in the queue extraVictims, ok := p.calculateAdditionalVictims(victims) @@ -576,6 +591,10 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) { // not enough resources were preempted return nil, false } + log.Log(log.SchedPreemption).Info("Additional victims chosen for preemption", zap.String("ask", p.ask.GetAllocationKey()), + zap.String("ask queue path", p.queuePath), + zap.String("node", nodeID), + zap.Int("additional victims count", len(extraVictims))) victims = append(victims, extraVictims...) if len(victims) == 0 { return nil, false From 0cb6b73324695d3a5157545b1d047fb0171b3235 Mon Sep 17 00:00:00 2001 From: Manikandan R Date: Fri, 23 Aug 2024 11:58:14 +0530 Subject: [PATCH 2/2] Run the tests for many versions --- .github/workflows/pre-commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 0c5d1fb4b..fa86ecc0d 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -44,7 +44,7 @@ jobs: strategy: fail-fast: false matrix: - k8s: [v1.31.0] + k8s: [v1.31.0, v1.30.0, v1.29.4, v1.28.9, v1.27.13, v1.26.15, v1.25.16, v1.24.17] plugin: [""] steps: - name: Checkout yunikorn-k8shim source code