Skip to content

Commit ba555de

Browse files
committed
✨ Add MachineDrainRule "WaitCompleted"
Signed-off-by: Vince Prignano <[email protected]>
1 parent fe6a595 commit ba555de

File tree

10 files changed

+311
-98
lines changed

10 files changed

+311
-98
lines changed

api/v1beta1/machinedrainrules_types.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ import (
2222

2323
const (
2424
// PodDrainLabel is the label that can be set on Pods in workload clusters to ensure a Pod is not drained.
25-
// The only valid value is "skip".
25+
// The only valid values are "skip" and "wait-completed".
2626
// This label takes precedence over MachineDrainRules defined in the management cluster.
2727
PodDrainLabel = "cluster.x-k8s.io/drain"
2828
)
2929

30-
// MachineDrainRuleDrainBehavior defines the drain behavior. Can be either "Drain" or "Skip".
31-
// +kubebuilder:validation:Enum=Drain;Skip
30+
// MachineDrainRuleDrainBehavior defines the drain behavior. Can be either "Drain", "Skip", or "WaitCompleted".
31+
// +kubebuilder:validation:Enum=Drain;Skip;WaitCompleted
3232
type MachineDrainRuleDrainBehavior string
3333

3434
const (
@@ -37,6 +37,10 @@ const (
3737

3838
// MachineDrainRuleDrainBehaviorSkip means the drain for a Pod should be skipped.
3939
MachineDrainRuleDrainBehaviorSkip MachineDrainRuleDrainBehavior = "Skip"
40+
41+
// MachineDrainRuleDrainBehaviorWaitCompleted means the Pod should not be evicted,
42+
// but overall drain should wait until the Pod completes.
43+
MachineDrainRuleDrainBehaviorWaitCompleted MachineDrainRuleDrainBehavior = "WaitCompleted"
4044
)
4145

4246
// MachineDrainRuleSpec defines the spec of a MachineDrainRule.
@@ -112,14 +116,16 @@ type MachineDrainRuleSpec struct {
112116
// MachineDrainRuleDrainConfig configures if and how Pods are drained.
113117
type MachineDrainRuleDrainConfig struct {
114118
// behavior defines the drain behavior.
115-
// Can be either "Drain" or "Skip".
119+
// Can be either "Drain", "Skip", or "WaitCompleted".
116120
// "Drain" means that the Pods to which this MachineDrainRule applies will be drained.
117121
// If behavior is set to "Drain" the order in which Pods are drained can be configured
118122
// with the order field. When draining Pods of a Node the Pods will be grouped by order
119123
// and one group after another will be drained (by increasing order). Cluster API will
120124
// wait until all Pods of a group are terminated / removed from the Node before starting
121125
// with the next group.
122126
// "Skip" means that the Pods to which this MachineDrainRule applies will be skipped during drain.
127+
// "WaitCompleted" means that the pods to which this MachineDrainRule applies will never be evicted
128+
// and we wait for them to be completed.
123129
// +required
124130
Behavior MachineDrainRuleDrainBehavior `json:"behavior"`
125131

config/crd/bases/cluster.x-k8s.io_machinedrainrules.yaml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/proposals/20240930-machine-drain-rules.md

Lines changed: 55 additions & 52 deletions
Large diffs are not rendered by default.

internal/controllers/machine/drain/drain.go

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ func (d *Helper) GetPodsForEviction(ctx context.Context, cluster *clusterv1.Clus
128128
podNamespaces[ns.Name] = &ns
129129
}
130130

131-
// Note: As soon as a filter decides that a Pod should be skipped (i.e. DrainBehavior == "Skip")
131+
// Note: As soon as a filter decides that a Pod should be skipped (i.e. DrainBehavior == "Skip" or "WaitCompleted")
132132
// other filters won't be evaluated and the Pod will be skipped.
133133
list := filterPods(ctx, allPods, []PodFilter{
134134
// Phase 1: Basic filtering (aligned to kubectl drain)
@@ -150,7 +150,7 @@ func (d *Helper) GetPodsForEviction(ctx context.Context, cluster *clusterv1.Clus
150150

151151
// Phase 2: Filtering based on drain label & MachineDrainRules
152152

153-
// Skip Pods with label cluster.x-k8s.io/drain == "skip"
153+
// Skip Pods with label cluster.x-k8s.io/drain == "skip" or "wait-completed"
154154
d.drainLabelFilter,
155155

156156
// Use drain behavior and order from first matching MachineDrainRule
@@ -225,7 +225,8 @@ func filterPods(ctx context.Context, allPods []*corev1.Pod, filters []PodFilter)
225225
var deleteWarnings []string
226226
for _, filter := range filters {
227227
status = filter(ctx, pod)
228-
if status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorSkip {
228+
if status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorSkip ||
229+
status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted {
229230
// short-circuit as soon as pod is filtered out
230231
// at that point, there is no reason to run pod
231232
// through any additional filters
@@ -281,6 +282,7 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
281282
var podsToTriggerEvictionLater []PodDelete
282283
var podsWithDeletionTimestamp []PodDelete
283284
var podsToBeIgnored []PodDelete
285+
var podsToWaitCompleted []PodDelete
284286
for _, pod := range podDeleteList.items {
285287
switch {
286288
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain && pod.Pod.DeletionTimestamp.IsZero():
@@ -289,6 +291,8 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
289291
} else {
290292
podsToTriggerEvictionLater = append(podsToTriggerEvictionLater, pod)
291293
}
294+
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted:
295+
podsToWaitCompleted = append(podsToWaitCompleted, pod)
292296
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain:
293297
podsWithDeletionTimestamp = append(podsWithDeletionTimestamp, pod)
294298
default:
@@ -300,6 +304,7 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
300304
"podsToTriggerEvictionNow", podDeleteListToString(podsToTriggerEvictionNow, 5),
301305
"podsToTriggerEvictionLater", podDeleteListToString(podsToTriggerEvictionLater, 5),
302306
"podsWithDeletionTimestamp", podDeleteListToString(podsWithDeletionTimestamp, 5),
307+
"podsToWaitCompleted", podDeleteListToString(podsToWaitCompleted, 5),
303308
)
304309

305310
// Trigger evictions for at most 10s. We'll continue on the next reconcile if we hit the timeout.
@@ -394,6 +399,10 @@ evictionLoop:
394399
res.PodsToTriggerEvictionLater = append(res.PodsToTriggerEvictionLater, pd.Pod)
395400
}
396401

402+
for _, pd := range podsToWaitCompleted {
403+
res.PodsToWaitCompleted = append(res.PodsToWaitCompleted, pd.Pod)
404+
}
405+
397406
return res
398407
}
399408

@@ -403,6 +412,9 @@ func minDrainOrderOfPodsToDrain(pds []PodDelete) int32 {
403412
if pd.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain && ptr.Deref(pd.Status.DrainOrder, 0) < minOrder {
404413
minOrder = ptr.Deref(pd.Status.DrainOrder, 0)
405414
}
415+
if pd.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted && 0 < minOrder {
416+
minOrder = 0
417+
}
406418
}
407419
return minOrder
408420
}
@@ -431,13 +443,15 @@ type EvictionResult struct {
431443
PodsDeletionTimestampSet []*corev1.Pod
432444
PodsFailedEviction map[string][]*corev1.Pod
433445
PodsToTriggerEvictionLater []*corev1.Pod
446+
PodsToWaitCompleted []*corev1.Pod
434447
PodsNotFound []*corev1.Pod
435448
PodsIgnored []*corev1.Pod
436449
}
437450

438451
// DrainCompleted returns if a Node is entirely drained, i.e. if all relevant Pods have gone away.
439452
func (r EvictionResult) DrainCompleted() bool {
440-
return len(r.PodsDeletionTimestampSet) == 0 && len(r.PodsFailedEviction) == 0 && len(r.PodsToTriggerEvictionLater) == 0
453+
return len(r.PodsDeletionTimestampSet) == 0 && len(r.PodsFailedEviction) == 0 &&
454+
len(r.PodsToTriggerEvictionLater) == 0 && len(r.PodsToWaitCompleted) == 0
441455
}
442456

443457
// ConditionMessage returns a condition message for the case where a drain is not completed.
@@ -498,6 +512,10 @@ func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string
498512
conditionMessage = fmt.Sprintf("%s\nAfter above Pods have been removed from the Node, the following Pods will be evicted: %s",
499513
conditionMessage, PodListToString(r.PodsToTriggerEvictionLater, 3))
500514
}
515+
if len(r.PodsToWaitCompleted) > 0 {
516+
conditionMessage = fmt.Sprintf("%s\nWaiting for the following Pods to complete without eviction: %s",
517+
conditionMessage, PodListToString(r.PodsToWaitCompleted, 3))
518+
}
501519
return conditionMessage
502520
}
503521

internal/controllers/machine/drain/drain_test.go

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,32 @@ func TestGetPodsForEviction(t *testing.T) {
140140
},
141141
},
142142
}
143+
mdrBehaviorWaitCompleted := &clusterv1.MachineDrainRule{
144+
ObjectMeta: metav1.ObjectMeta{
145+
Name: "mdr-behavior-wait-completed",
146+
Namespace: "test-namespace",
147+
},
148+
Spec: clusterv1.MachineDrainRuleSpec{
149+
Drain: clusterv1.MachineDrainRuleDrainConfig{
150+
Behavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
151+
},
152+
Machines: nil, // Match all machines
153+
Pods: []clusterv1.MachineDrainRulePodSelector{
154+
{
155+
Selector: &metav1.LabelSelector{
156+
MatchLabels: map[string]string{
157+
"app": "behavior-wait-completed",
158+
},
159+
},
160+
NamespaceSelector: &metav1.LabelSelector{
161+
MatchLabels: map[string]string{
162+
"kubernetes.io/metadata.name": "test-namespace",
163+
},
164+
},
165+
},
166+
},
167+
},
168+
}
143169
mdrBehaviorUnknown := &clusterv1.MachineDrainRule{
144170
ObjectMeta: metav1.ObjectMeta{
145171
Name: "mdr-behavior-unknown",
@@ -637,6 +663,15 @@ func TestGetPodsForEviction(t *testing.T) {
637663
},
638664
},
639665
},
666+
{
667+
ObjectMeta: metav1.ObjectMeta{
668+
Name: "pod-3-skip-pod-with-drain-label-wait-completed",
669+
Namespace: metav1.NamespaceDefault,
670+
Labels: map[string]string{
671+
clusterv1.PodDrainLabel: "wait-completed",
672+
},
673+
},
674+
},
640675
},
641676
wantPodDeleteList: PodDeleteList{items: []PodDelete{
642677
{
@@ -663,6 +698,18 @@ func TestGetPodsForEviction(t *testing.T) {
663698
Reason: PodDeleteStatusTypeSkip,
664699
},
665700
},
701+
{
702+
Pod: &corev1.Pod{
703+
ObjectMeta: metav1.ObjectMeta{
704+
Name: "pod-3-skip-pod-with-drain-label-wait-completed",
705+
Namespace: metav1.NamespaceDefault,
706+
},
707+
},
708+
Status: PodDeleteStatus{
709+
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
710+
Reason: PodDeleteStatusTypeWaitCompleted,
711+
},
712+
},
666713
}},
667714
},
668715
{
@@ -714,8 +761,17 @@ func TestGetPodsForEviction(t *testing.T) {
714761
},
715762
},
716763
},
764+
{
765+
ObjectMeta: metav1.ObjectMeta{
766+
Name: "pod-3-behavior-wait-completed",
767+
Namespace: "test-namespace", // matches the Namespace of the selector in mdrBehaviorWaitCompleted.
768+
Labels: map[string]string{
769+
"app": "behavior-wait-completed", // matches mdrBehaviorWaitCompleted.
770+
},
771+
},
772+
},
717773
},
718-
machineDrainRules: []*clusterv1.MachineDrainRule{mdrBehaviorDrain, mdrBehaviorSkip, mdrBehaviorUnknown},
774+
machineDrainRules: []*clusterv1.MachineDrainRule{mdrBehaviorDrain, mdrBehaviorSkip, mdrBehaviorUnknown, mdrBehaviorWaitCompleted},
719775
wantPodDeleteList: PodDeleteList{items: []PodDelete{
720776
{
721777
Pod: &corev1.Pod{
@@ -744,6 +800,18 @@ func TestGetPodsForEviction(t *testing.T) {
744800
Reason: PodDeleteStatusTypeSkip,
745801
},
746802
},
803+
{
804+
Pod: &corev1.Pod{
805+
ObjectMeta: metav1.ObjectMeta{
806+
Name: "pod-3-behavior-wait-completed",
807+
Namespace: "test-namespace",
808+
},
809+
},
810+
Status: PodDeleteStatus{
811+
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
812+
Reason: PodDeleteStatusTypeWaitCompleted,
813+
},
814+
},
747815
}},
748816
},
749817
}
@@ -918,6 +986,19 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
918986
Pods: nil, // Match all Pods
919987
},
920988
}
989+
matchingMDRBehaviorWaitCompleted := &clusterv1.MachineDrainRule{
990+
ObjectMeta: metav1.ObjectMeta{
991+
Name: "mdr-behavior-wait-completed",
992+
Namespace: "test-namespace",
993+
},
994+
Spec: clusterv1.MachineDrainRuleSpec{
995+
Drain: clusterv1.MachineDrainRuleDrainConfig{
996+
Behavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
997+
},
998+
Machines: nil, // Match all machines
999+
Pods: nil, // Match all Pods
1000+
},
1001+
}
9211002
matchingMDRBehaviorUnknown := &clusterv1.MachineDrainRule{
9221003
ObjectMeta: metav1.ObjectMeta{
9231004
Name: "mdr-behavior-unknown",
@@ -986,6 +1067,7 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
9861067
matchingMDRBehaviorDrainB,
9871068
matchingMDRBehaviorDrainA,
9881069
matchingMDRBehaviorSkip,
1070+
matchingMDRBehaviorWaitCompleted,
9891071
matchingMDRBehaviorUnknown,
9901072
notMatchingMDRDifferentNamespace,
9911073
notMatchingMDRNotMatchingSelector,
@@ -995,6 +1077,7 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
9951077
matchingMDRBehaviorDrainB,
9961078
matchingMDRBehaviorSkip,
9971079
matchingMDRBehaviorUnknown,
1080+
matchingMDRBehaviorWaitCompleted,
9981081
},
9991082
},
10001083
}
@@ -1291,6 +1374,17 @@ func TestEvictPods(t *testing.T) {
12911374
Reason: PodDeleteStatusTypeOkay,
12921375
},
12931376
},
1377+
{
1378+
Pod: &corev1.Pod{
1379+
ObjectMeta: metav1.ObjectMeta{
1380+
Name: "pod-9-wait-completed",
1381+
},
1382+
},
1383+
Status: PodDeleteStatus{
1384+
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted, // Will be skipped because DrainBehavior is set to WaitCompleted
1385+
Reason: PodDeleteStatusTypeWaitCompleted,
1386+
},
1387+
},
12941388
}},
12951389
wantEvictionResult: EvictionResult{
12961390
PodsIgnored: []*corev1.Pod{
@@ -1354,6 +1448,13 @@ func TestEvictPods(t *testing.T) {
13541448
},
13551449
},
13561450
},
1451+
PodsToWaitCompleted: []*corev1.Pod{
1452+
{
1453+
ObjectMeta: metav1.ObjectMeta{
1454+
Name: "pod-9-wait-completed",
1455+
},
1456+
},
1457+
},
13571458
},
13581459
},
13591460
}
@@ -1651,6 +1752,13 @@ After above Pods have been removed from the Node, the following Pods will be evi
16511752
},
16521753
},
16531754
},
1755+
PodsToWaitCompleted: []*corev1.Pod{
1756+
{
1757+
ObjectMeta: metav1.ObjectMeta{
1758+
Name: "pod-16-wait-completed",
1759+
},
1760+
},
1761+
},
16541762
},
16551763
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
16561764
* Pods pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, ... (4 more): deletionTimestamp set, but still not removed from the Node
@@ -1660,7 +1768,8 @@ After above Pods have been removed from the Node, the following Pods will be evi
16601768
* Pod pod-8-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 3
16611769
* Pod pod-9-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 4
16621770
* 1 Pod with other issues
1663-
After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)`,
1771+
After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)
1772+
Waiting for the following Pods to complete without eviction: pod-16-wait-completed`,
16641773
},
16651774
{
16661775
name: "Compute long condition message correctly with more skipped errors",

0 commit comments

Comments
 (0)