diff --git a/internal/scheduler/internaltypes/node.go b/internal/scheduler/internaltypes/node.go index ee78a3aea0e..d91ed8f52c0 100644 --- a/internal/scheduler/internaltypes/node.go +++ b/internal/scheduler/internaltypes/node.go @@ -1,11 +1,24 @@ package internaltypes import ( + "math" + + "github.com/pkg/errors" "golang.org/x/exp/maps" v1 "k8s.io/api/core/v1" + "github.com/armadaproject/armada/internal/scheduler/configuration" "github.com/armadaproject/armada/internal/scheduler/kubernetesobjects/label" koTaint "github.com/armadaproject/armada/internal/scheduler/kubernetesobjects/taint" + "github.com/armadaproject/armada/internal/scheduler/schedulerobjects" +) + +const ( + // evictedPriority is the priority class priority resources consumed by evicted jobs are accounted for at. + // This helps avoid scheduling new jobs onto nodes that make it impossible to re-schedule evicted jobs. + EvictedPriority int32 = -1 + // MinPriority is the smallest possible priority class priority within the NodeDb. + MinPriority int32 = EvictedPriority ) // Node is a scheduler-internal representation of one Kubernetes node. @@ -18,10 +31,10 @@ type Node struct { index uint64 // Executor this node belongs to and node name, which must be unique per executor. - executor string - name string - pool string - nodeTypeId uint64 + executor string + name string + pool string + nodeType *NodeType // We need to store taints and labels separately from the node type: the latter only includes // indexed taints and labels, but we need all of them when checking pod requirements. @@ -40,9 +53,65 @@ type Node struct { EvictedJobRunIds map[string]bool } +func FromSchedulerObjectsNode(node *schedulerobjects.Node, + nodeIndex uint64, + indexedTaints map[string]bool, + indexedNodeLabels map[string]bool, + resourceListFactory *ResourceListFactory, +) (*Node, error) { + taints := node.GetTaints() + if node.Unschedulable { + taints = append(koTaint.DeepCopyTaints(taints), UnschedulableTaint()) + } + + labels := maps.Clone(node.GetLabels()) + if labels == nil { + labels = map[string]string{} + } + labels[configuration.NodeIdLabel] = node.Id + + totalResources := node.TotalResources + + nodeType := NewNodeType( + taints, + labels, + indexedTaints, + indexedNodeLabels, + ) + + allocatableByPriority := map[int32]ResourceList{} + minimumPriority := int32(math.MaxInt32) + for p, rl := range node.AllocatableByPriorityAndResource { + if p < minimumPriority { + minimumPriority = p + } + allocatableByPriority[p] = resourceListFactory.FromNodeProto(rl.Resources) + } + if minimumPriority < 0 { + return nil, errors.Errorf("found negative priority %d on node %s; negative priorities are reserved for internal use", minimumPriority, node.Id) + } + allocatableByPriority[EvictedPriority] = allocatableByPriority[minimumPriority] + + return CreateNode( + node.Id, + nodeType, + nodeIndex, + node.Executor, + node.Name, + node.Pool, + taints, + labels, + resourceListFactory.FromNodeProto(totalResources.Resources), + allocatableByPriority, + map[string]ResourceList{}, + map[string]ResourceList{}, + map[string]bool{}, + nil), nil +} + func CreateNode( id string, - nodeTypeId uint64, + nodeType *NodeType, index uint64, executor string, name string, @@ -58,7 +127,7 @@ func CreateNode( ) *Node { return &Node{ id: id, - nodeTypeId: nodeTypeId, + nodeType: nodeType, index: index, executor: executor, name: name, @@ -95,7 +164,11 @@ func (node *Node) GetExecutor() string { } func (node *Node) GetNodeTypeId() uint64 { - return node.nodeTypeId + return node.nodeType.GetId() +} + +func (node *Node) GetNodeType() *NodeType { + return node.nodeType } func (node *Node) GetLabels() map[string]string { @@ -139,7 +212,7 @@ func (node *Node) DeepCopyNilKeys() *Node { executor: node.executor, name: node.name, pool: node.pool, - nodeTypeId: node.nodeTypeId, + nodeType: node.nodeType, taints: node.taints, labels: node.labels, totalResources: node.totalResources, diff --git a/internal/scheduler/internaltypes/node_test.go b/internal/scheduler/internaltypes/node_test.go index 0d211e97689..b591ac80bfc 100644 --- a/internal/scheduler/internaltypes/node_test.go +++ b/internal/scheduler/internaltypes/node_test.go @@ -18,7 +18,6 @@ func TestNode(t *testing.T) { assert.Nil(t, err) const id = "id" - const nodeTypeId = uint64(123) const pool = "pool" const index = uint64(1) const executor = "executor" @@ -84,9 +83,16 @@ func TestNode(t *testing.T) { }, } + nodeType := NewNodeType( + taints, + labels, + map[string]bool{"foo": true}, + map[string]bool{"key": true}, + ) + node := CreateNode( id, - nodeTypeId, + nodeType, index, executor, name, @@ -102,7 +108,8 @@ func TestNode(t *testing.T) { ) assert.Equal(t, id, node.GetId()) - assert.Equal(t, nodeTypeId, node.GetNodeTypeId()) + assert.Equal(t, nodeType.GetId(), node.GetNodeTypeId()) + assert.Equal(t, nodeType.GetId(), node.GetNodeType().GetId()) assert.Equal(t, index, node.GetIndex()) assert.Equal(t, executor, node.GetExecutor()) assert.Equal(t, name, node.GetName()) diff --git a/internal/scheduler/internaltypes/node_type.go b/internal/scheduler/internaltypes/node_type.go index 393cb8a8305..131c8346a68 100644 --- a/internal/scheduler/internaltypes/node_type.go +++ b/internal/scheduler/internaltypes/node_type.go @@ -65,7 +65,7 @@ type ( labelsFilterFunc func(key, value string) bool ) -func NewNodeType(taints []v1.Taint, labels map[string]string, indexedTaints map[string]interface{}, indexedLabels map[string]interface{}) *NodeType { +func NewNodeType(taints []v1.Taint, labels map[string]string, indexedTaints map[string]bool, indexedLabels map[string]bool) *NodeType { if taints == nil { taints = make([]v1.Taint, 0) } diff --git a/internal/scheduler/internaltypes/node_type_test.go b/internal/scheduler/internaltypes/node_type_test.go index 146cca262f8..3af4087e749 100644 --- a/internal/scheduler/internaltypes/node_type_test.go +++ b/internal/scheduler/internaltypes/node_type_test.go @@ -86,7 +86,7 @@ func makeSut() *NodeType { return NewNodeType( taints, labels, - map[string]interface{}{"taint1": true, "taint2": true, "taint3": true}, - map[string]interface{}{"label1": true, "label2": true, "label3": true}, + map[string]bool{"taint1": true, "taint2": true, "taint3": true}, + map[string]bool{"label1": true, "label2": true, "label3": true}, ) } diff --git a/internal/scheduler/internaltypes/resource_list.go b/internal/scheduler/internaltypes/resource_list.go index 7d1556c4166..98477d71df0 100644 --- a/internal/scheduler/internaltypes/resource_list.go +++ b/internal/scheduler/internaltypes/resource_list.go @@ -80,6 +80,19 @@ func (rl ResourceList) GetResources() []Resource { return result } +func (rl ResourceList) ToMap() map[string]k8sResource.Quantity { + if rl.IsEmpty() { + return map[string]k8sResource.Quantity{} + } + + result := map[string]k8sResource.Quantity{} + for i, q := range rl.resources { + quantity := k8sResource.NewScaledQuantity(q, rl.factory.scales[i]) + result[rl.factory.indexToName[i]] = *quantity + } + return result +} + func (rl ResourceList) AllZero() bool { if rl.IsEmpty() { return true diff --git a/internal/scheduler/internaltypes/resource_list_test.go b/internal/scheduler/internaltypes/resource_list_test.go index b48b4aab57c..a0fd19df587 100644 --- a/internal/scheduler/internaltypes/resource_list_test.go +++ b/internal/scheduler/internaltypes/resource_list_test.go @@ -82,6 +82,23 @@ func TestGetResources_HandlesEmptyCorrectly(t *testing.T) { assert.Equal(t, 0, len(empty.GetResources())) } +func TestToMap(t *testing.T) { + factory := testFactory() + a := testResourceList(factory, "1", "1Gi") + expected := map[string]k8sResource.Quantity{ + "memory": *k8sResource.NewScaledQuantity(1024*1024*1024, k8sResource.Scale(0)), + "ephemeral-storage": *k8sResource.NewScaledQuantity(0, k8sResource.Scale(0)), + "cpu": *k8sResource.NewScaledQuantity(1000, k8sResource.Milli), + "nvidia.com/gpu": *k8sResource.NewScaledQuantity(0, k8sResource.Milli), + } + assert.Equal(t, expected, a.ToMap()) +} + +func TestToMap_HandlesEmptyCorrectly(t *testing.T) { + empty := ResourceList{} + assert.Equal(t, map[string]k8sResource.Quantity{}, empty.ToMap()) +} + func TestAllZero(t *testing.T) { factory := testFactory() assert.True(t, testResourceList(factory, "0", "0").AllZero()) diff --git a/internal/scheduler/nodedb/unschedulable.go b/internal/scheduler/internaltypes/unschedulable.go similarity index 68% rename from internal/scheduler/nodedb/unschedulable.go rename to internal/scheduler/internaltypes/unschedulable.go index 7de03733310..3d12b2bf7c3 100644 --- a/internal/scheduler/nodedb/unschedulable.go +++ b/internal/scheduler/internaltypes/unschedulable.go @@ -1,4 +1,4 @@ -package nodedb +package internaltypes import v1 "k8s.io/api/core/v1" @@ -16,11 +16,3 @@ func UnschedulableTaint() v1.Taint { Effect: unschedulableTaintEffect, } } - -// UnschedulableToleration returns a toleration that tolerates UnschedulableTaint(). -func UnschedulableToleration() v1.Toleration { - return v1.Toleration{ - Key: unschedulableTaintKey, - Value: unschedulableTaintValue, - } -} diff --git a/internal/scheduler/nodedb/nodedb.go b/internal/scheduler/nodedb/nodedb.go index 776bcd18295..3ebe76f8543 100644 --- a/internal/scheduler/nodedb/nodedb.go +++ b/internal/scheduler/nodedb/nodedb.go @@ -5,6 +5,7 @@ import ( "math" "strings" "sync" + "sync/atomic" "text/tabwriter" "time" @@ -17,90 +18,44 @@ import ( "github.com/armadaproject/armada/internal/common/armadaerrors" "github.com/armadaproject/armada/internal/common/slices" "github.com/armadaproject/armada/internal/common/types" + "github.com/armadaproject/armada/internal/common/util" "github.com/armadaproject/armada/internal/scheduler/configuration" "github.com/armadaproject/armada/internal/scheduler/internaltypes" "github.com/armadaproject/armada/internal/scheduler/jobdb" - koTaint "github.com/armadaproject/armada/internal/scheduler/kubernetesobjects/taint" "github.com/armadaproject/armada/internal/scheduler/schedulerobjects" "github.com/armadaproject/armada/internal/scheduler/scheduling/context" ) -const ( - // evictedPriority is the priority class priority resources consumed by evicted jobs are accounted for at. - // This helps avoid scheduling new jobs onto nodes that make it impossible to re-schedule evicted jobs. - evictedPriority int32 = -1 - // MinPriority is the smallest possible priority class priority within the NodeDb. - MinPriority int32 = evictedPriority -) - var empty struct{} -func (nodeDb *NodeDb) create(node *schedulerobjects.Node) (*internaltypes.Node, error) { - taints := node.GetTaints() - if node.Unschedulable { - taints = append(koTaint.DeepCopyTaints(taints), UnschedulableTaint()) - } - - labels := maps.Clone(node.GetLabels()) - if labels == nil { - labels = map[string]string{} - } - labels[configuration.NodeIdLabel] = node.Id - - totalResources := node.TotalResources - - nodeType := internaltypes.NewNodeType( - taints, - labels, - nodeDb.indexedTaints, - nodeDb.indexedNodeLabels, - ) - - allocatableByPriority := map[int32]internaltypes.ResourceList{} - minimumPriority := int32(math.MaxInt32) - for p, rl := range node.AllocatableByPriorityAndResource { - if p < minimumPriority { - minimumPriority = p - } - allocatableByPriority[p] = nodeDb.resourceListFactory.FromNodeProto(rl.Resources) - } - if minimumPriority < 0 { - return nil, errors.Errorf("found negative priority %d on node %s; negative priorities are reserved for internal use", minimumPriority, node.Id) - } - allocatableByPriority[evictedPriority] = allocatableByPriority[minimumPriority] - +func (nodeDb *NodeDb) AddNodeToDb(node *internaltypes.Node) { nodeDb.mu.Lock() + defer nodeDb.mu.Unlock() for key := range nodeDb.indexedNodeLabels { - if value, ok := labels[key]; ok { + if value, ok := node.GetLabelValue(key); ok { nodeDb.indexedNodeLabelValues[key][value] = empty } } - index := uint64(nodeDb.numNodes) - nodeDb.numNodes++ + nodeType := node.GetNodeType() nodeDb.numNodesByNodeType[nodeType.GetId()]++ - nodeDb.totalResources.Add(totalResources) - nodeDb.nodeTypes[nodeType.GetId()] = nodeType - nodeDb.mu.Unlock() - - return internaltypes.CreateNode( - node.Id, - nodeType.GetId(), - index, - node.Executor, - node.Name, - node.Pool, - taints, - labels, - nodeDb.resourceListFactory.FromNodeProto(totalResources.Resources), - allocatableByPriority, - map[string]internaltypes.ResourceList{}, - map[string]internaltypes.ResourceList{}, - map[string]bool{}, - nil), nil + nodeDb.totalResources = nodeDb.totalResources.Add(node.GetTotalResources()) + nodeDb.nodeTypes[node.GetNodeTypeId()] = nodeType } func (nodeDb *NodeDb) CreateAndInsertWithJobDbJobsWithTxn(txn *memdb.Txn, jobs []*jobdb.Job, node *schedulerobjects.Node) error { - entry, err := nodeDb.create(node) + index := atomic.AddUint64(&nodeDb.numNodes, 1) + + entry, err := internaltypes.FromSchedulerObjectsNode(node, + index, + nodeDb.indexedTaints, + nodeDb.indexedNodeLabels, + nodeDb.resourceListFactory) + if err != nil { + return err + } + + nodeDb.AddNodeToDb(entry) + if err != nil { return err } @@ -148,7 +103,7 @@ type NodeDb struct { // for which indexes are created to enable efficient lookup. indexedResources []string // Like indexedResources, but stored as a map for efficient lookup. - indexedResourcesSet map[string]interface{} + indexedResourcesSet map[string]bool // The resolution with which indexed resources are tracked. // In the same order as indexedResources above. // In the same units as the supportedResourceType. @@ -171,14 +126,14 @@ type NodeDb struct { // by a pod when looking for a node a pod can be scheduled on. // // If not set, all taints are indexed. - indexedTaints map[string]interface{} + indexedTaints map[string]bool // Node labels to create indexes for. // Should include node labels frequently used for scheduling. // Since the NodeDb can efficiently sort out nodes for which these labels // do not match pod node selectors when looking for a node a pod can be scheduled on. // // If not set, no labels are indexed. - indexedNodeLabels map[string]interface{} + indexedNodeLabels map[string]bool // Mutex for the remaining fields of this struct, which are mutated after initialization. mu sync.Mutex @@ -186,11 +141,11 @@ type NodeDb struct { // Map from indexed label names to the set of values that label takes across all nodes in the NodeDb. indexedNodeLabelValues map[string]map[string]struct{} // Total number of nodes in the db. - numNodes int + numNodes uint64 // Number of nodes in the db by node type. numNodesByNodeType map[uint64]int // Total amount of resources, e.g., "cpu", "memory", "gpu", across all nodes in the db. - totalResources schedulerobjects.ResourceList + totalResources internaltypes.ResourceList // Set of node types. Populated automatically as nodes are inserted. // Node types are not cleaned up if all nodes of that type are removed from the NodeDb. nodeTypes map[uint64]*internaltypes.NodeType @@ -222,7 +177,7 @@ func NewNodeDb( wellKnownNodeTypes []configuration.WellKnownNodeType, resourceListFactory *internaltypes.ResourceListFactory, ) (*NodeDb, error) { - nodeDbPriorities := []int32{evictedPriority} + nodeDbPriorities := []int32{internaltypes.EvictedPriority} nodeDbPriorities = append(nodeDbPriorities, types.AllowedPriorities(priorityClasses)...) indexedResourceNames := slices.Map(indexedResources, func(v configuration.ResourceType) string { return v.Name }) @@ -260,17 +215,17 @@ func NewNodeDb( priorityClasses: priorityClasses, nodeDbPriorities: nodeDbPriorities, indexedResources: indexedResourceNames, - indexedResourcesSet: mapFromSlice(indexedResourceNames), + indexedResourcesSet: util.StringListToSet(indexedResourceNames), indexedResourceResolution: indexedResourceResolution, indexNameByPriority: indexNameByPriority, keyIndexByPriority: keyIndexByPriority, - indexedTaints: mapFromSlice(indexedTaints), - indexedNodeLabels: mapFromSlice(indexedNodeLabels), + indexedTaints: util.StringListToSet(indexedTaints), + indexedNodeLabels: util.StringListToSet(indexedNodeLabels), indexedNodeLabelValues: indexedNodeLabelValues, nodeTypes: make(map[uint64]*internaltypes.NodeType), wellKnownNodeTypes: make(map[string]*configuration.WellKnownNodeType), numNodesByNodeType: make(map[uint64]int), - totalResources: schedulerobjects.ResourceList{Resources: make(map[string]resource.Quantity)}, + totalResources: internaltypes.ResourceList{}, db: db, // Set the initial capacity (somewhat arbitrarily) to 128 reasons. podRequirementsNotMetReasonStringCache: make(map[uint64]string, 128), @@ -287,14 +242,6 @@ func NewNodeDb( return &nodeDb, nil } -func mapFromSlice(vs []string) map[string]interface{} { - rv := make(map[string]interface{}) - for _, v := range vs { - rv[v] = true - } - return rv -} - func makeIndexedResourceResolution(indexedResourceTypes []configuration.ResourceType, resourceListFactory *internaltypes.ResourceListFactory) ([]int64, error) { if len(indexedResourceTypes) < 1 { return nil, errors.New("must specify at least one entry in indexedResources in config") @@ -367,15 +314,13 @@ func (nodeDb *NodeDb) IndexedNodeLabelValues(label string) (map[string]struct{}, } func (nodeDb *NodeDb) NumNodes() int { - nodeDb.mu.Lock() - defer nodeDb.mu.Unlock() return int(nodeDb.numNodes) } func (nodeDb *NodeDb) TotalResources() schedulerobjects.ResourceList { nodeDb.mu.Lock() defer nodeDb.mu.Unlock() - return nodeDb.totalResources.DeepCopy() + return schedulerobjects.ResourceList{Resources: nodeDb.totalResources.ToMap()} } func (nodeDb *NodeDb) Txn(write bool) *memdb.Txn { @@ -458,8 +403,8 @@ func (nodeDb *NodeDb) SelectNodeForJobWithTxn(txn *memdb.Txn, jctx *context.JobS pctx := &context.PodSchedulingContext{ Created: time.Now(), ScheduledAtPriority: priority, - PreemptedAtPriority: MinPriority, - NumNodes: nodeDb.numNodes, + PreemptedAtPriority: internaltypes.MinPriority, + NumNodes: int(nodeDb.numNodes), NumExcludedNodesByReason: make(map[string]int), } jctx.PodSchedulingContext = pctx @@ -560,7 +505,7 @@ func (nodeDb *NodeDb) selectNodeForJobWithTxnAtPriority( // Try scheduling at evictedPriority. If this succeeds, no preemption is necessary. pctx.NumExcludedNodesByReason = maps.Clone(numExcludedNodesByReason) - if node, err := nodeDb.selectNodeForPodAtPriority(txn, jctx, matchingNodeTypeIds, evictedPriority); err != nil { + if node, err := nodeDb.selectNodeForPodAtPriority(txn, jctx, matchingNodeTypeIds, internaltypes.EvictedPriority); err != nil { return nil, err } else if err := assertPodSchedulingContextNode(pctx, node); err != nil { return nil, err @@ -579,7 +524,7 @@ func (nodeDb *NodeDb) selectNodeForJobWithTxnAtPriority( return nil, nil } pctx.NodeId = "" - pctx.PreemptedAtPriority = MinPriority + pctx.PreemptedAtPriority = internaltypes.MinPriority // Schedule by preventing evicted jobs from being re-scheduled. // This method respect fairness by preventing from re-scheduling jobs that appear as far back in the total order as possible. @@ -592,7 +537,7 @@ func (nodeDb *NodeDb) selectNodeForJobWithTxnAtPriority( } pctx.NodeId = "" - pctx.PreemptedAtPriority = MinPriority + pctx.PreemptedAtPriority = internaltypes.MinPriority // Schedule by kicking off jobs currently bound to a node. // This method does not respect fairness when choosing on which node to schedule the job. @@ -629,7 +574,7 @@ func (nodeDb *NodeDb) selectNodeForJobWithUrgencyPreemption( pctx := jctx.PodSchedulingContext numExcludedNodesByReason := pctx.NumExcludedNodesByReason for _, priority := range nodeDb.nodeDbPriorities { - if priority == evictedPriority { + if priority == internaltypes.EvictedPriority { // We already tried scheduling at evictedPriority above. continue } @@ -759,7 +704,7 @@ func (nodeDb *NodeDb) selectNodeForJobWithFairPreemption(txn *memdb.Txn, jctx *c if err != nil { return nil, errors.WithStack(err) } - maxPriority := MinPriority + maxPriority := internaltypes.MinPriority for obj := it.Next(); obj != nil && selectedNode == nil; obj = it.Next() { evictedJobSchedulingContext := obj.(*EvictedJobSchedulingContext) evictedJctx := evictedJobSchedulingContext.JobSchedulingContext @@ -775,7 +720,7 @@ func (nodeDb *NodeDb) selectNodeForJobWithFairPreemption(txn *memdb.Txn, jctx *c } node = &consideredNode{ node: nodeFromDb, - availableResource: nodeFromDb.AllocatableByPriority[evictedPriority], + availableResource: nodeFromDb.AllocatableByPriority[internaltypes.EvictedPriority], staticRequirementsNotMet: false, evictedJobs: []*EvictedJobSchedulingContext{}, } @@ -873,7 +818,7 @@ func (nodeDb *NodeDb) bindJobToNodeInPlace(node *internaltypes.Node, job *jobdb. allocatable := node.AllocatableByPriority markAllocated(allocatable, priority, requests) if isEvicted { - markAllocatable(allocatable, evictedPriority, requests) + markAllocatable(allocatable, internaltypes.EvictedPriority, requests) } nodeDb.scheduledAtPriorityByJobId[jobId] = priority @@ -936,7 +881,7 @@ func (nodeDb *NodeDb) evictJobFromNodeInPlace(job *jobdb.Job, node *internaltype } jobRequests := job.EfficientResourceRequirements() markAllocatable(allocatableByPriority, priority, jobRequests) - markAllocated(allocatableByPriority, evictedPriority, jobRequests) + markAllocated(allocatableByPriority, internaltypes.EvictedPriority, jobRequests) return nil } @@ -1007,7 +952,7 @@ func (nodeDb *NodeDb) unbindJobFromNodeInPlace(job *jobdb.Job, node *internaltyp allocatable := node.AllocatableByPriority var priority int32 if isEvicted { - priority = evictedPriority + priority = internaltypes.EvictedPriority } else { var ok bool priority, ok = nodeDb.GetScheduledAtPriority(jobId) diff --git a/internal/scheduler/nodedb/nodeidindex_test.go b/internal/scheduler/nodedb/nodeidindex_test.go index 6f40a5edd22..024e19c130f 100644 --- a/internal/scheduler/nodedb/nodeidindex_test.go +++ b/internal/scheduler/nodedb/nodeidindex_test.go @@ -44,7 +44,11 @@ func TestFromArgsValid(t *testing.T) { func makeTestNode(id string) *internaltypes.Node { return internaltypes.CreateNode(id, - 1, + internaltypes.NewNodeType([]v1.Taint{}, + map[string]string{}, + map[string]bool{}, + map[string]bool{}, + ), 1, "executor", "node_name", diff --git a/internal/scheduler/nodedb/nodeiteration_test.go b/internal/scheduler/nodedb/nodeiteration_test.go index 53580e8489f..8ab626577dd 100644 --- a/internal/scheduler/nodedb/nodeiteration_test.go +++ b/internal/scheduler/nodedb/nodeiteration_test.go @@ -12,6 +12,8 @@ import ( "k8s.io/apimachinery/pkg/api/resource" armadaslices "github.com/armadaproject/armada/internal/common/slices" + "github.com/armadaproject/armada/internal/common/util" + "github.com/armadaproject/armada/internal/scheduler/internaltypes" "github.com/armadaproject/armada/internal/scheduler/schedulerobjects" "github.com/armadaproject/armada/internal/scheduler/testfixtures" @@ -384,9 +386,16 @@ func TestNodeTypeIterator(t *testing.T) { // Set monotonically increasing node IDs to ensure nodes appear in predictable order. node.Id = fmt.Sprintf("%d", i) - entry, err := nodeDb.create(node) + entry, err := internaltypes.FromSchedulerObjectsNode(node, + uint64(i), + nodeDb.indexedTaints, + nodeDb.indexedNodeLabels, + nodeDb.resourceListFactory) + require.NoError(t, err) + nodeDb.AddNodeToDb(entry) + entries[i] = entry } require.NoError(t, nodeDb.UpsertMany(entries)) @@ -788,9 +797,16 @@ func TestNodeTypesIterator(t *testing.T) { // Set monotonically increasing node IDs to ensure nodes appear in predictable order. node.Id = fmt.Sprintf("%d", i) - entry, err := nodeDb.create(node) + entry, err := internaltypes.FromSchedulerObjectsNode(node, + uint64(i), + nodeDb.indexedTaints, + nodeDb.indexedNodeLabels, + nodeDb.resourceListFactory) + require.NoError(t, err) + nodeDb.AddNodeToDb(entry) + entries[i] = entry } require.NoError(t, nodeDb.UpsertMany(entries)) @@ -911,8 +927,8 @@ func labelsToNodeTypeId(labels map[string]string) uint64 { nodeType := internaltypes.NewNodeType( []v1.Taint{}, labels, - mapFromSlice(testfixtures.TestIndexedTaints), - mapFromSlice(testfixtures.TestIndexedNodeLabels), + util.StringListToSet(testfixtures.TestIndexedTaints), + util.StringListToSet(testfixtures.TestIndexedNodeLabels), ) return nodeType.GetId() } diff --git a/internal/scheduler/nodedb/nodematching_test.go b/internal/scheduler/nodedb/nodematching_test.go index e6731f1e5ad..56f5fdca87a 100644 --- a/internal/scheduler/nodedb/nodematching_test.go +++ b/internal/scheduler/nodedb/nodematching_test.go @@ -406,8 +406,8 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { tests := map[string]struct { Taints []v1.Taint Labels map[string]string - IndexedTaints map[string]interface{} - IndexedLabels map[string]interface{} + IndexedTaints map[string]bool + IndexedLabels map[string]bool Req *schedulerobjects.PodRequirements ExpectSuccess bool }{ @@ -446,14 +446,14 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { "untolerated non-indexed taint": { Taints: []v1.Taint{{Key: "foo", Value: "foo", Effect: v1.TaintEffectNoSchedule}}, Labels: nil, - IndexedTaints: make(map[string]interface{}), + IndexedTaints: make(map[string]bool), Req: &schedulerobjects.PodRequirements{}, ExpectSuccess: true, }, "matched node selector": { Taints: nil, Labels: map[string]string{"bar": "bar"}, - IndexedLabels: map[string]interface{}{"bar": ""}, + IndexedLabels: map[string]bool{"bar": true}, Req: &schedulerobjects.PodRequirements{ NodeSelector: map[string]string{"bar": "bar"}, }, @@ -462,7 +462,7 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { "unset indexed label": { Taints: nil, Labels: nil, - IndexedLabels: map[string]interface{}{"bar": ""}, + IndexedLabels: map[string]bool{"bar": true}, Req: &schedulerobjects.PodRequirements{ Tolerations: []v1.Toleration{{Key: "foo", Value: "foo"}}, NodeSelector: map[string]string{"bar": "bar"}, @@ -472,7 +472,7 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { "different label value": { Taints: nil, Labels: map[string]string{"bar": "baz"}, - IndexedLabels: map[string]interface{}{"bar": ""}, + IndexedLabels: map[string]bool{"bar": true}, Req: &schedulerobjects.PodRequirements{ NodeSelector: map[string]string{"bar": "bar"}, }, @@ -489,7 +489,7 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { "tolerated taints and matched node selector": { Taints: []v1.Taint{{Key: "foo", Value: "foo", Effect: v1.TaintEffectNoSchedule}}, Labels: map[string]string{"bar": "bar"}, - IndexedLabels: map[string]interface{}{"bar": ""}, + IndexedLabels: map[string]bool{"bar": true}, Req: &schedulerobjects.PodRequirements{ Tolerations: []v1.Toleration{{Key: "foo", Value: "foo"}}, NodeSelector: map[string]string{"bar": "bar"}, @@ -499,7 +499,7 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { "untolerated taints and matched node selector": { Taints: []v1.Taint{{Key: "foo", Value: "foo", Effect: v1.TaintEffectNoSchedule}}, Labels: map[string]string{"bar": "bar"}, - IndexedLabels: map[string]interface{}{"bar": ""}, + IndexedLabels: map[string]bool{"bar": true}, Req: &schedulerobjects.PodRequirements{ NodeSelector: map[string]string{"bar": "bar"}, }, @@ -508,7 +508,7 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { "tolerated taints and different label value": { Taints: []v1.Taint{{Key: "foo", Value: "foo", Effect: v1.TaintEffectNoSchedule}}, Labels: map[string]string{"bar": "baz"}, - IndexedLabels: map[string]interface{}{"bar": ""}, + IndexedLabels: map[string]bool{"bar": true}, Req: &schedulerobjects.PodRequirements{ Tolerations: []v1.Toleration{{Key: "foo", Value: "foo"}}, NodeSelector: map[string]string{"bar": "bar"}, @@ -518,7 +518,7 @@ func TestNodeTypeSchedulingRequirementsMet(t *testing.T) { "tolerated taints and missing label": { Taints: []v1.Taint{{Key: "foo", Value: "foo", Effect: v1.TaintEffectNoSchedule}}, Labels: nil, - IndexedLabels: map[string]interface{}{"bar": ""}, + IndexedLabels: map[string]bool{"bar": true}, Req: &schedulerobjects.PodRequirements{ Tolerations: []v1.Toleration{{Key: "foo", Value: "foo"}}, NodeSelector: map[string]string{"bar": "bar"}, @@ -650,7 +650,11 @@ func TestResourceRequirementsMet(t *testing.T) { func makeTestNodeTaintsLabels(taints []v1.Taint, labels map[string]string) *internaltypes.Node { return internaltypes.CreateNode( "id", - 1, + internaltypes.NewNodeType(taints, + labels, + map[string]bool{}, + map[string]bool{}, + ), 1, "executor", "name", @@ -669,7 +673,11 @@ func makeTestNodeTaintsLabels(taints []v1.Taint, labels map[string]string) *inte func makeTestNodeResources(t *testing.T, allocatableByPriority map[int32]internaltypes.ResourceList, totalResources internaltypes.ResourceList) *internaltypes.Node { return internaltypes.CreateNode( "id", - 1, + internaltypes.NewNodeType([]v1.Taint{}, + map[string]string{}, + map[string]bool{}, + map[string]bool{}, + ), 1, "executor", "name", diff --git a/internal/scheduler/scheduling/gang_scheduler.go b/internal/scheduler/scheduling/gang_scheduler.go index 758171a88d0..6cbbb08c250 100644 --- a/internal/scheduler/scheduling/gang_scheduler.go +++ b/internal/scheduler/scheduling/gang_scheduler.go @@ -9,6 +9,7 @@ import ( "github.com/armadaproject/armada/internal/common/armadacontext" "github.com/armadaproject/armada/internal/common/slices" "github.com/armadaproject/armada/internal/scheduler/floatingresources" + "github.com/armadaproject/armada/internal/scheduler/internaltypes" "github.com/armadaproject/armada/internal/scheduler/jobdb" "github.com/armadaproject/armada/internal/scheduler/nodedb" "github.com/armadaproject/armada/internal/scheduler/schedulerobjects" @@ -192,7 +193,7 @@ func (sch *GangScheduler) trySchedule(ctx *armadacontext.Context, gctx *context. } if ok { currentFit := gctx.Fit() - if currentFit.NumScheduled == gctx.Cardinality() && currentFit.MeanPreemptedAtPriority == float64(nodedb.MinPriority) { + if currentFit.NumScheduled == gctx.Cardinality() && currentFit.MeanPreemptedAtPriority == float64(internaltypes.MinPriority) { // Best possible; no need to keep looking. txn.Commit() return true, "", nil diff --git a/internal/scheduler/scheduling/preempting_queue_scheduler_test.go b/internal/scheduler/scheduling/preempting_queue_scheduler_test.go index 65e42d43604..624bb3cfca0 100644 --- a/internal/scheduler/scheduling/preempting_queue_scheduler_test.go +++ b/internal/scheduler/scheduling/preempting_queue_scheduler_test.go @@ -1818,7 +1818,7 @@ func TestPreemptingQueueScheduler(t *testing.T) { for _, j := range round.NodeIndicesToCordon { node, err := nodeDb.GetNode(tc.Nodes[j].Id) require.NoError(t, err) - taints := append(slices.Clone(node.GetTaints()), nodedb.UnschedulableTaint()) + taints := append(slices.Clone(node.GetTaints()), internaltypes.UnschedulableTaint()) node = testNodeWithTaints(node, taints) err = nodeDb.Upsert(node) require.NoError(t, err) @@ -2286,7 +2286,7 @@ func BenchmarkPreemptingQueueScheduler(b *testing.B) { func testNodeWithTaints(node *internaltypes.Node, taints []v1.Taint) *internaltypes.Node { return internaltypes.CreateNode( node.GetId(), - node.GetNodeTypeId(), + node.GetNodeType(), node.GetIndex(), node.GetExecutor(), node.GetName(),