diff --git a/pkg/scheduler/frameworkext/topologymanager/manager.go b/pkg/scheduler/frameworkext/topologymanager/manager.go index 0f7ce3683..2c5bb652a 100644 --- a/pkg/scheduler/frameworkext/topologymanager/manager.go +++ b/pkg/scheduler/frameworkext/topologymanager/manager.go @@ -70,7 +70,8 @@ func (m *topologyManager) Admit(ctx context.Context, cycleState *framework.Cycle if !ok || extensionPointBeingExecuted == schedulingphase.PostFilter { var admit bool bestHint, admit = m.calculateAffinity(ctx, cycleState, policy, pod, nodeName, exclusivePolicy, allNUMANodeStatus) - klog.V(5).Infof("Best TopologyHint for (pod: %v): %v on node: %v", klog.KObj(pod), bestHint, nodeName) + klog.V(4).Infof("Best TopologyHint for (pod: %v): %+v on node %s, policy %s, exclusivePolicy %s, admit %v", + klog.KObj(pod), bestHint, nodeName, policy, exclusivePolicy, admit) if !admit { return framework.NewStatus(framework.Unschedulable, "node(s) NUMA Topology affinity error") } @@ -107,7 +108,7 @@ func (m *topologyManager) accumulateProvidersHints(ctx context.Context, cycleSta // Get the TopologyHints for a Pod from a provider. hints, _ := provider.GetPodTopologyHints(ctx, cycleState, pod, nodeName) providersHints = append(providersHints, hints) - klog.V(5).Infof("TopologyHints for pod '%v': %v on node: %v", klog.KObj(pod), hints, nodeName) + klog.V(4).Infof("TopologyHints for pod '%v' by provider %T: %v on node: %v", klog.KObj(pod), provider, hints, nodeName) } return providersHints } diff --git a/pkg/scheduler/plugins/reservation/plugin.go b/pkg/scheduler/plugins/reservation/plugin.go index 1bd4fe9e1..5bfada198 100644 --- a/pkg/scheduler/plugins/reservation/plugin.go +++ b/pkg/scheduler/plugins/reservation/plugin.go @@ -324,9 +324,6 @@ func (pl *Plugin) PreFilterExtensions() framework.PreFilterExtensions { func (pl *Plugin) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *corev1.Pod, podInfoToAdd *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status { podRequests := resourceapi.PodRequests(podInfoToAdd.Pod, resourceapi.PodResourcesOptions{}) - if quotav1.IsZero(podRequests) { // TODO: support zero request pod in reservation - return nil - } podRequests[corev1.ResourcePods] = *resource.NewQuantity(1, resource.DecimalSI) // count pods resources state := getStateData(cycleState) @@ -356,9 +353,6 @@ func (pl *Plugin) AddPod(ctx context.Context, cycleState *framework.CycleState, func (pl *Plugin) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *corev1.Pod, podInfoToRemove *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status { podRequests := resourceapi.PodRequests(podInfoToRemove.Pod, resourceapi.PodResourcesOptions{}) - if quotav1.IsZero(podRequests) { // TODO: support zero request pod in reservation - return nil - } podRequests[corev1.ResourcePods] = *resource.NewQuantity(1, resource.DecimalSI) // count pods resources state := getStateData(cycleState) @@ -481,7 +475,10 @@ func (pl *Plugin) filterWithReservations(ctx context.Context, cycleState *framew var allInsufficientResourceReasonsByReservation []string for _, rInfo := range matchedReservations { resourceNames := quotav1.Intersection(rInfo.ResourceNames, podRequestsResourceNames) - if len(resourceNames) == 0 { + // NOTE: The reservation may not consider the irrelevant pods that have no matched resource names since it makes + // no sense in most cases but introduces a performance overhead. However, we allow pods to allocate reserved + // resources to accomplish their reservation affinities. + if len(resourceNames) == 0 && !state.hasAffinity { continue } // When the pod specifies a reservation name, we record the admission reasons. diff --git a/pkg/scheduler/plugins/reservation/plugin_test.go b/pkg/scheduler/plugins/reservation/plugin_test.go index e88b57a79..5f36e598a 100644 --- a/pkg/scheduler/plugins/reservation/plugin_test.go +++ b/pkg/scheduler/plugins/reservation/plugin_test.go @@ -860,6 +860,8 @@ func Test_filterWithReservations(t *testing.T) { corev1.ResourceCPU: resource.MustParse("32"), corev1.ResourceMemory: resource.MustParse("32Gi"), corev1.ResourcePods: resource.MustParse("100"), + apiext.BatchCPU: resource.MustParse("7500"), + apiext.BatchMemory: resource.MustParse("10Gi"), }, }, } @@ -1008,6 +1010,57 @@ func Test_filterWithReservations(t *testing.T) { }, wantStatus: nil, }, + { + name: "filter restricted reservation with nodeInfo and matched requests are zero", + stateData: &stateData{ + schedulingStateData: schedulingStateData{ + hasAffinity: true, + podRequests: corev1.ResourceList{ + apiext.BatchCPU: resource.MustParse("6000"), + apiext.BatchMemory: resource.MustParse("8Gi"), + }, + nodeReservationStates: map[string]*nodeReservationState{ + node.Name: { + podRequested: &framework.Resource{ + MilliCPU: 30 * 1000, + Memory: 24 * 1024 * 1024 * 1024, + ScalarResources: map[corev1.ResourceName]int64{ + apiext.BatchCPU: 1500, + apiext.BatchMemory: 2 * 1024 * 1024 * 1024, + }, + }, + rAllocated: &framework.Resource{ + MilliCPU: 6000, + }, + matchedOrIgnored: []*frameworkext.ReservationInfo{ + frameworkext.NewReservationInfo(&schedulingv1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-r", + }, + Spec: schedulingv1alpha1.ReservationSpec{ + AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted, + Template: &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("6"), + }, + }, + }, + }, + }, + }, + }, + }), + }, + }, + }, + }, + }, + wantStatus: nil, + }, { name: "failed to filter restricted reservation with nodeInfo", stateData: &stateData{ @@ -1081,6 +1134,109 @@ func Test_filterWithReservations(t *testing.T) { }, wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Too many pods"), }, + { + name: "failed to filter restricted reservation since unmatched resources are insufficient", + stateData: &stateData{ + schedulingStateData: schedulingStateData{ + hasAffinity: true, + podRequests: corev1.ResourceList{ + apiext.BatchCPU: resource.MustParse("8000"), + apiext.BatchMemory: resource.MustParse("8Gi"), + }, + nodeReservationStates: map[string]*nodeReservationState{ + node.Name: { + podRequested: &framework.Resource{ + MilliCPU: 30 * 1000, + Memory: 24 * 1024 * 1024 * 1024, + ScalarResources: map[corev1.ResourceName]int64{ + apiext.BatchCPU: 1500, + apiext.BatchMemory: 2 * 1024 * 1024 * 1024, + }, + }, + rAllocated: &framework.Resource{ + MilliCPU: 2000, + }, + matchedOrIgnored: []*frameworkext.ReservationInfo{ + frameworkext.NewReservationInfo(&schedulingv1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-r", + }, + Spec: schedulingv1alpha1.ReservationSpec{ + AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted, + Template: &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("6"), + }, + }, + }, + }, + }, + }, + }, + }), + }, + }, + }, + }, + }, + wantStatus: framework.NewStatus(framework.Unschedulable, + "Insufficient kubernetes.io/batch-cpu by node"), + }, + { + name: "filter restricted reservation and ignore matched requests are zero without affinity", + stateData: &stateData{ + schedulingStateData: schedulingStateData{ + hasAffinity: false, + podRequests: corev1.ResourceList{ + apiext.BatchCPU: resource.MustParse("8000"), + apiext.BatchMemory: resource.MustParse("8Gi"), + }, + nodeReservationStates: map[string]*nodeReservationState{ + node.Name: { + podRequested: &framework.Resource{ + MilliCPU: 30 * 1000, + Memory: 24 * 1024 * 1024 * 1024, + ScalarResources: map[corev1.ResourceName]int64{ + apiext.BatchCPU: 1500, + apiext.BatchMemory: 2 * 1024 * 1024 * 1024, + }, + }, + rAllocated: &framework.Resource{ + MilliCPU: 6000, + }, + matchedOrIgnored: []*frameworkext.ReservationInfo{ + frameworkext.NewReservationInfo(&schedulingv1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-r", + }, + Spec: schedulingv1alpha1.ReservationSpec{ + AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted, + Template: &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("6"), + }, + }, + }, + }, + }, + }, + }, + }), + }, + }, + }, + }, + }, + wantStatus: nil, + }, { name: "filter default reservations with preemption", stateData: &stateData{ @@ -1789,7 +1945,11 @@ func TestPreFilterExtensionAddPod(t *testing.T) { preemptibleInRRs: map[string]map[types.UID]corev1.ResourceList{}, }, }, - wantPreemptible: map[string]corev1.ResourceList{}, + wantPreemptible: map[string]corev1.ResourceList{ + "test-node": { + corev1.ResourcePods: *resource.NewQuantity(-1, resource.DecimalSI), + }, + }, wantPreemptibleInRRs: map[string]map[types.UID]corev1.ResourceList{}, }, { @@ -1958,9 +2118,13 @@ func TestPreFilterExtensionRemovePod(t *testing.T) { wantPreemptibleInRRs map[string]map[types.UID]corev1.ResourceList }{ { - name: "with BestEffort Pod", - pod: &corev1.Pod{}, - wantPreemptible: map[string]corev1.ResourceList{}, + name: "with BestEffort Pod", + pod: &corev1.Pod{}, + wantPreemptible: map[string]corev1.ResourceList{ + "test-node": { + corev1.ResourcePods: *resource.NewQuantity(1, resource.DecimalSI), + }, + }, wantPreemptibleInRRs: map[string]map[types.UID]corev1.ResourceList{}, }, { diff --git a/pkg/scheduler/plugins/reservation/scoring.go b/pkg/scheduler/plugins/reservation/scoring.go index e75c01be7..17facb412 100644 --- a/pkg/scheduler/plugins/reservation/scoring.go +++ b/pkg/scheduler/plugins/reservation/scoring.go @@ -189,7 +189,6 @@ func findMostPreferredReservationByOrder(rOnNode []*frameworkext.ReservationInfo } func scoreReservation(pod *corev1.Pod, rInfo *frameworkext.ReservationInfo, allocated corev1.ResourceList) int64 { - // TODO(joseph): we should support zero-request pods requested := resourceapi.PodRequests(pod, resourceapi.PodResourcesOptions{}) requested = quotav1.Add(requested, allocated) resources := quotav1.RemoveZeros(rInfo.Allocatable)