Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

koord-descheduler: limit the total number of pod evictions #2091

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/koord-descheduler/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ func Setup(ctx context.Context, opts *options.Options, outOfTreeRegistryOptions

evictionLimiter := evictions.NewEvictionLimiter(
cc.ComponentConfig.MaxNoOfPodsToEvictPerNode,
cc.ComponentConfig.MaxNoOfPodsToEvictPerNamespace)
cc.ComponentConfig.MaxNoOfPodsToEvictPerNamespace,
cc.ComponentConfig.MaxNoOfPodsToEvictTotal)

desched, err := descheduler.New(
cc.Client,
Expand Down
3 changes: 3 additions & 0 deletions pkg/descheduler/apis/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ type DeschedulerConfiguration struct {

// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
MaxNoOfPodsToEvictPerNamespace *uint

// MaxNoOfPodsToTotal restricts maximum of pods to be evicted total.
MaxNoOfPodsToEvictTotal *uint
}

// DeschedulerProfile is a descheduling profile.
Expand Down
3 changes: 3 additions & 0 deletions pkg/descheduler/apis/config/v1alpha2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ type DeschedulerConfiguration struct {

// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
MaxNoOfPodsToEvictPerNamespace *uint `json:"maxNoOfPodsToEvictPerNamespace,omitempty"`

// MaxNoOfPodsToTotal restricts maximum of pods to be evicted total.
MaxNoOfPodsToEvictTotal *uint `json:"maxNoOfPodsToEvictTotal,omitempty"`
}

// DecodeNestedObjects decodes plugin args for known types.
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/descheduler/apis/config/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions pkg/descheduler/evictions/eviction_limiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
type EvictionLimiter struct {
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
maxPodsToEvictTotal *uint
lock sync.Mutex
totalCount uint
nodePodCount nodePodEvictedCount
Expand All @@ -36,10 +37,12 @@ type EvictionLimiter struct {
func NewEvictionLimiter(
maxPodsToEvictPerNode *uint,
maxPodsToEvictPerNamespace *uint,
maxPodsToEvictTotal *uint,
) *EvictionLimiter {
return &EvictionLimiter{
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
maxPodsToEvictTotal: maxPodsToEvictTotal,
nodePodCount: make(nodePodEvictedCount),
namespacePodCount: make(namespacePodEvictCount),
}
Expand Down Expand Up @@ -114,6 +117,11 @@ func (pe *EvictionLimiter) AllowEvict(pod *corev1.Pod) bool {
klog.ErrorS(fmt.Errorf("maximum number of evicted pods per namespace reached"), "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace)
return false
}

if pe.maxPodsToEvictTotal != nil && pe.totalCount+1 > *pe.maxPodsToEvictTotal {
klog.ErrorS(fmt.Errorf("maximum number of evicted pods total reached"), "Error evicting pod", "limit", *pe.maxPodsToEvictTotal)
return false
}
return true
}

Expand Down
167 changes: 166 additions & 1 deletion pkg/descheduler/framework/plugins/loadaware/low_node_load_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,172 @@ func TestLowNodeLoad(t *testing.T) {
sharedInformerFactory.WaitForCacheSync(ctx.Done())

eventRecorder := &events.FakeRecorder{}
evictionLimiter := evictions.NewEvictionLimiter(nil, nil)
evictionLimiter := evictions.NewEvictionLimiter(nil, nil, nil)

koordClientSet := koordfake.NewSimpleClientset()
setupNodeMetrics(koordClientSet, tt.nodes, tt.pods, tt.podMetrics)

fh, err := frameworktesting.NewFramework(
[]frameworktesting.RegisterPluginFunc{
func(reg *frameworkruntime.Registry, profile *deschedulerconfig.DeschedulerProfile) {
reg.Register(defaultevictor.PluginName, defaultevictor.New)
profile.Plugins.Evict.Enabled = append(profile.Plugins.Evict.Enabled, deschedulerconfig.Plugin{Name: defaultevictor.PluginName})
profile.Plugins.Filter.Enabled = append(profile.Plugins.Filter.Enabled, deschedulerconfig.Plugin{Name: defaultevictor.PluginName})
profile.PluginConfig = append(profile.PluginConfig, deschedulerconfig.PluginConfig{
Name: defaultevictor.PluginName,
Args: &defaultevictor.DefaultEvictorArgs{},
})
},
func(reg *frameworkruntime.Registry, profile *deschedulerconfig.DeschedulerProfile) {
reg.Register(LowNodeLoadName, func(args runtime.Object, handle framework.Handle) (framework.Plugin, error) {
return NewLowNodeLoad(args, &fakeFrameworkHandle{
Handle: handle,
Interface: koordClientSet,
})
})
profile.Plugins.Balance.Enabled = append(profile.Plugins.Balance.Enabled, deschedulerconfig.Plugin{Name: LowNodeLoadName})
profile.PluginConfig = append(profile.PluginConfig, deschedulerconfig.PluginConfig{
Name: LowNodeLoadName,
Args: &deschedulerconfig.LowNodeLoadArgs{
NodeFit: true,
NodePools: []deschedulerconfig.LowNodeLoadNodePool{
{
LowThresholds: tt.thresholds,
HighThresholds: tt.targetThresholds,
UseDeviationThresholds: tt.useDeviationThresholds,
AnomalyCondition: &deschedulerconfig.LoadAnomalyCondition{
ConsecutiveAbnormalities: 1,
},
},
},
DetectorCacheTimeout: &metav1.Duration{Duration: 5 * time.Minute},
EvictableNamespaces: tt.evictableNamespaces,
},
})
},
},
"test",
frameworkruntime.WithClientSet(fakeClient),
frameworkruntime.WithEvictionLimiter(evictionLimiter),
frameworkruntime.WithEventRecorder(eventRecorder),
frameworkruntime.WithSharedInformerFactory(sharedInformerFactory),
frameworkruntime.WithGetPodsAssignedToNodeFunc(getPodsAssignedToNode),
)
assert.NoError(t, err)

fh.RunBalancePlugins(ctx, tt.nodes)

podsEvicted := evictionLimiter.TotalEvicted()
if tt.expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %v pods to be evicted but %v got evicted", tt.expectedPodsEvicted, podsEvicted)
}
})
}
}

func TestMaxEvictionTotal(t *testing.T) {
n1NodeName := "n1"
n2NodeName := "n2"
n3NodeName := "n3"

testCases := []struct {
name string
useDeviationThresholds bool
thresholds, targetThresholds ResourceThresholds
nodes []*corev1.Node
pods []*corev1.Pod
podMetrics map[types.NamespacedName]*slov1alpha1.ResourceMap
expectedPodsEvicted uint
evictedPods []string
evictableNamespaces *deschedulerconfig.Namespaces
maxEvictionTotal uint
}{
{
name: "maxPodsToEvictTotal",
thresholds: ResourceThresholds{
corev1.ResourceCPU: 30,
corev1.ResourcePods: 30,
},
targetThresholds: ResourceThresholds{
corev1.ResourceCPU: 50,
corev1.ResourcePods: 50,
},
maxEvictionTotal: 1,
nodes: []*corev1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*corev1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *corev1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []corev1.Volume{
{
Name: "sample",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &corev1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI),
},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *corev1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 1,
},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

var objs []runtime.Object
for _, node := range tt.nodes {
objs = append(objs, node)
}
for _, pod := range tt.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
setupFakeDiscoveryWithPolicyResource(&fakeClient.Fake)

sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
_ = sharedInformerFactory.Core().V1().Nodes().Informer()
podInformer := sharedInformerFactory.Core().V1().Pods()

getPodsAssignedToNode, err := test.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}

podsForEviction := make(map[string]struct{})
for _, pod := range tt.evictedPods {
podsForEviction[pod] = struct{}{}
}

sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())

eventRecorder := &events.FakeRecorder{}
evictionLimiter := evictions.NewEvictionLimiter(nil, nil, &tt.maxEvictionTotal)

koordClientSet := koordfake.NewSimpleClientset()
setupNodeMetrics(koordClientSet, tt.nodes, tt.pods, tt.podMetrics)
Expand Down