diff --git a/pkg/scheduler/objects/application.go b/pkg/scheduler/objects/application.go index cde6fbd9a..a064d321b 100644 --- a/pkg/scheduler/objects/application.go +++ b/pkg/scheduler/objects/application.go @@ -1223,6 +1223,12 @@ func (sa *Application) tryPlaceholderAllocate(nodeIterator func() NodeIterator, return allocResult } +// check ask against both user headRoom and queue headRoom +func (sa *Application) checkHeadRooms(ask *AllocationAsk, userHeadroom *resources.Resource, headRoom *resources.Resource) bool { + // check if this fits in the users' headroom first, if that fits check the queues' headroom + return userHeadroom.FitInMaxUndef(ask.GetAllocatedResource()) && headRoom.FitInMaxUndef(ask.GetAllocatedResource()) +} + // Try a reserved allocation of an outstanding reservation func (sa *Application) tryReservedAllocate(headRoom *resources.Resource, nodeIterator func() NodeIterator) *Allocation { sa.Lock() @@ -1250,13 +1256,8 @@ func (sa *Application) tryReservedAllocate(headRoom *resources.Resource, nodeIte alloc := newUnreservedAllocation(reserve.nodeID, unreserveAsk) return alloc } - // check if this fits in the users' headroom first, if that fits check the queues' headroom - if !userHeadroom.FitInMaxUndef(ask.GetAllocatedResource()) { - continue - } - // check if this fits in the queue's headroom - if !headRoom.FitInMaxUndef(ask.GetAllocatedResource()) { + if !sa.checkHeadRooms(ask, userHeadroom, headRoom) { continue } @@ -1280,12 +1281,16 @@ func (sa *Application) tryReservedAllocate(headRoom *resources.Resource, nodeIte // lets try this on all other nodes for _, reserve := range sa.reservations { // Other nodes cannot be tried if the ask has a required node - if reserve.ask.GetRequiredNode() != "" { + ask := reserve.ask + if ask.GetRequiredNode() != "" { continue } iterator := nodeIterator() if iterator != nil { - alloc := sa.tryNodesNoReserve(reserve.ask, iterator, reserve.nodeID) + if !sa.checkHeadRooms(ask, userHeadroom, headRoom) { + continue + } + alloc := sa.tryNodesNoReserve(ask, iterator, reserve.nodeID) // have a candidate return it, including the node that was reserved if alloc != nil { return alloc @@ -1490,7 +1495,7 @@ func (sa *Application) tryNode(node *Node, ask *AllocationAsk) *Allocation { alloc := NewAllocation(node.NodeID, ask) if node.AddAllocation(alloc) { if err := sa.queue.IncAllocatedResource(alloc.GetAllocatedResource(), false); err != nil { - log.Log(log.SchedApplication).Warn("queue update failed unexpectedly", + log.Log(log.SchedApplication).DPanic("queue update failed unexpectedly", zap.Error(err)) // revert the node update node.RemoveAllocation(alloc.GetAllocationID()) diff --git a/pkg/scheduler/objects/application_test.go b/pkg/scheduler/objects/application_test.go index bc055ee6e..044396724 100644 --- a/pkg/scheduler/objects/application_test.go +++ b/pkg/scheduler/objects/application_test.go @@ -2601,6 +2601,39 @@ func TestGetRateLimitedAppLog(t *testing.T) { assert.Check(t, l != nil) } +func TestTryAllocateWithReservedHeadRoomChecking(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Fatalf("reserved headroom test regression: %v", r) + } + }() + + res, err := resources.NewResourceFromConf(map[string]string{"memory": "2G"}) + assert.NilError(t, err, "failed to create basic resource") + var headRoom *resources.Resource + headRoom, err = resources.NewResourceFromConf(map[string]string{"memory": "1G"}) + assert.NilError(t, err, "failed to create basic resource") + + app := newApplication(appID1, "default", "root") + ask := newAllocationAsk(aKey, appID1, res) + var queue *Queue + queue, err = createRootQueue(map[string]string{"memory": "1G"}) + assert.NilError(t, err, "queue create failed") + app.queue = queue + err = app.AddAllocationAsk(ask) + assert.NilError(t, err, "ask should have been added to app") + + node1 := newNodeRes(nodeID1, res) + node2 := newNodeRes(nodeID2, res) + // reserve that works + err = app.Reserve(node1, ask) + assert.NilError(t, err, "reservation should not have failed") + + iter := getNodeIteratorFn(node1, node2) + alloc := app.tryReservedAllocate(headRoom, iter) + assert.Assert(t, alloc == nil, "Alloc is expected to be nil due to insufficient headroom") +} + func TestUpdateRunnableStatus(t *testing.T) { app := newApplication(appID0, "default", "root.unknown") assert.Assert(t, app.runnableInQueue) diff --git a/pkg/scheduler/objects/utilities_test.go b/pkg/scheduler/objects/utilities_test.go index e66e4e982..577e26ff2 100644 --- a/pkg/scheduler/objects/utilities_test.go +++ b/pkg/scheduler/objects/utilities_test.go @@ -42,6 +42,7 @@ const ( aKey = "alloc-1" aAllocationID = "alloc-allocationid-1" nodeID1 = "node-1" + nodeID2 = "node-2" instType1 = "itype-1" )