Skip to content

Commit

Permalink
✨ Add MachineDrainRule "WaitCompleted"
Browse files Browse the repository at this point in the history
Signed-off-by: Vince Prignano <[email protected]>
  • Loading branch information
vincepri committed Jan 9, 2025
1 parent fe6a595 commit ba555de
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 98 deletions.
14 changes: 10 additions & 4 deletions api/v1beta1/machinedrainrules_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ import (

const (
// PodDrainLabel is the label that can be set on Pods in workload clusters to ensure a Pod is not drained.
// The only valid value is "skip".
// The only valid values are "skip" and "wait-completed".
// This label takes precedence over MachineDrainRules defined in the management cluster.
PodDrainLabel = "cluster.x-k8s.io/drain"
)

// MachineDrainRuleDrainBehavior defines the drain behavior. Can be either "Drain" or "Skip".
// +kubebuilder:validation:Enum=Drain;Skip
// MachineDrainRuleDrainBehavior defines the drain behavior. Can be either "Drain", "Skip", or "WaitCompleted".
// +kubebuilder:validation:Enum=Drain;Skip;WaitCompleted
type MachineDrainRuleDrainBehavior string

const (
Expand All @@ -37,6 +37,10 @@ const (

// MachineDrainRuleDrainBehaviorSkip means the drain for a Pod should be skipped.
MachineDrainRuleDrainBehaviorSkip MachineDrainRuleDrainBehavior = "Skip"

// MachineDrainRuleDrainBehaviorWaitCompleted means the Pod should not be evicted,
// but overall drain should wait until the Pod completes.
MachineDrainRuleDrainBehaviorWaitCompleted MachineDrainRuleDrainBehavior = "WaitCompleted"
)

// MachineDrainRuleSpec defines the spec of a MachineDrainRule.
Expand Down Expand Up @@ -112,14 +116,16 @@ type MachineDrainRuleSpec struct {
// MachineDrainRuleDrainConfig configures if and how Pods are drained.
type MachineDrainRuleDrainConfig struct {
// behavior defines the drain behavior.
// Can be either "Drain" or "Skip".
// Can be either "Drain", "Skip", or "WaitCompleted".
// "Drain" means that the Pods to which this MachineDrainRule applies will be drained.
// If behavior is set to "Drain" the order in which Pods are drained can be configured
// with the order field. When draining Pods of a Node the Pods will be grouped by order
// and one group after another will be drained (by increasing order). Cluster API will
// wait until all Pods of a group are terminated / removed from the Node before starting
// with the next group.
// "Skip" means that the Pods to which this MachineDrainRule applies will be skipped during drain.
// "WaitCompleted" means that the pods to which this MachineDrainRule applies will never be evicted
// and we wait for them to be completed.
// +required
Behavior MachineDrainRuleDrainBehavior `json:"behavior"`

Expand Down
1 change: 1 addition & 0 deletions config/crd/bases/cluster.x-k8s.io_machinedrainrules.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

107 changes: 55 additions & 52 deletions docs/proposals/20240930-machine-drain-rules.md

Large diffs are not rendered by default.

26 changes: 22 additions & 4 deletions internal/controllers/machine/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func (d *Helper) GetPodsForEviction(ctx context.Context, cluster *clusterv1.Clus
podNamespaces[ns.Name] = &ns
}

// Note: As soon as a filter decides that a Pod should be skipped (i.e. DrainBehavior == "Skip")
// Note: As soon as a filter decides that a Pod should be skipped (i.e. DrainBehavior == "Skip" or "WaitCompleted")
// other filters won't be evaluated and the Pod will be skipped.
list := filterPods(ctx, allPods, []PodFilter{
// Phase 1: Basic filtering (aligned to kubectl drain)
Expand All @@ -150,7 +150,7 @@ func (d *Helper) GetPodsForEviction(ctx context.Context, cluster *clusterv1.Clus

// Phase 2: Filtering based on drain label & MachineDrainRules

// Skip Pods with label cluster.x-k8s.io/drain == "skip"
// Skip Pods with label cluster.x-k8s.io/drain == "skip" or "wait-completed"
d.drainLabelFilter,

// Use drain behavior and order from first matching MachineDrainRule
Expand Down Expand Up @@ -225,7 +225,8 @@ func filterPods(ctx context.Context, allPods []*corev1.Pod, filters []PodFilter)
var deleteWarnings []string
for _, filter := range filters {
status = filter(ctx, pod)
if status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorSkip {
if status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorSkip ||
status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted {
// short-circuit as soon as pod is filtered out
// at that point, there is no reason to run pod
// through any additional filters
Expand Down Expand Up @@ -281,6 +282,7 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
var podsToTriggerEvictionLater []PodDelete
var podsWithDeletionTimestamp []PodDelete
var podsToBeIgnored []PodDelete
var podsToWaitCompleted []PodDelete
for _, pod := range podDeleteList.items {
switch {
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain && pod.Pod.DeletionTimestamp.IsZero():
Expand All @@ -289,6 +291,8 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
} else {
podsToTriggerEvictionLater = append(podsToTriggerEvictionLater, pod)
}
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted:
podsToWaitCompleted = append(podsToWaitCompleted, pod)
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain:
podsWithDeletionTimestamp = append(podsWithDeletionTimestamp, pod)
default:
Expand All @@ -300,6 +304,7 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
"podsToTriggerEvictionNow", podDeleteListToString(podsToTriggerEvictionNow, 5),
"podsToTriggerEvictionLater", podDeleteListToString(podsToTriggerEvictionLater, 5),
"podsWithDeletionTimestamp", podDeleteListToString(podsWithDeletionTimestamp, 5),
"podsToWaitCompleted", podDeleteListToString(podsToWaitCompleted, 5),
)

// Trigger evictions for at most 10s. We'll continue on the next reconcile if we hit the timeout.
Expand Down Expand Up @@ -394,6 +399,10 @@ evictionLoop:
res.PodsToTriggerEvictionLater = append(res.PodsToTriggerEvictionLater, pd.Pod)
}

for _, pd := range podsToWaitCompleted {
res.PodsToWaitCompleted = append(res.PodsToWaitCompleted, pd.Pod)
}

return res
}

Expand All @@ -403,6 +412,9 @@ func minDrainOrderOfPodsToDrain(pds []PodDelete) int32 {
if pd.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain && ptr.Deref(pd.Status.DrainOrder, 0) < minOrder {
minOrder = ptr.Deref(pd.Status.DrainOrder, 0)
}
if pd.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted && 0 < minOrder {
minOrder = 0
}
}
return minOrder
}
Expand Down Expand Up @@ -431,13 +443,15 @@ type EvictionResult struct {
PodsDeletionTimestampSet []*corev1.Pod
PodsFailedEviction map[string][]*corev1.Pod
PodsToTriggerEvictionLater []*corev1.Pod
PodsToWaitCompleted []*corev1.Pod
PodsNotFound []*corev1.Pod
PodsIgnored []*corev1.Pod
}

// DrainCompleted returns if a Node is entirely drained, i.e. if all relevant Pods have gone away.
func (r EvictionResult) DrainCompleted() bool {
return len(r.PodsDeletionTimestampSet) == 0 && len(r.PodsFailedEviction) == 0 && len(r.PodsToTriggerEvictionLater) == 0
return len(r.PodsDeletionTimestampSet) == 0 && len(r.PodsFailedEviction) == 0 &&
len(r.PodsToTriggerEvictionLater) == 0 && len(r.PodsToWaitCompleted) == 0
}

// ConditionMessage returns a condition message for the case where a drain is not completed.
Expand Down Expand Up @@ -498,6 +512,10 @@ func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string
conditionMessage = fmt.Sprintf("%s\nAfter above Pods have been removed from the Node, the following Pods will be evicted: %s",
conditionMessage, PodListToString(r.PodsToTriggerEvictionLater, 3))
}
if len(r.PodsToWaitCompleted) > 0 {
conditionMessage = fmt.Sprintf("%s\nWaiting for the following Pods to complete without eviction: %s",
conditionMessage, PodListToString(r.PodsToWaitCompleted, 3))
}
return conditionMessage
}

Expand Down
113 changes: 111 additions & 2 deletions internal/controllers/machine/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,32 @@ func TestGetPodsForEviction(t *testing.T) {
},
},
}
mdrBehaviorWaitCompleted := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-wait-completed",
Namespace: "test-namespace",
},
Spec: clusterv1.MachineDrainRuleSpec{
Drain: clusterv1.MachineDrainRuleDrainConfig{
Behavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
},
Machines: nil, // Match all machines
Pods: []clusterv1.MachineDrainRulePodSelector{
{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "behavior-wait-completed",
},
},
NamespaceSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"kubernetes.io/metadata.name": "test-namespace",
},
},
},
},
},
}
mdrBehaviorUnknown := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-unknown",
Expand Down Expand Up @@ -637,6 +663,15 @@ func TestGetPodsForEviction(t *testing.T) {
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-skip-pod-with-drain-label-wait-completed",
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.PodDrainLabel: "wait-completed",
},
},
},
},
wantPodDeleteList: PodDeleteList{items: []PodDelete{
{
Expand All @@ -663,6 +698,18 @@ func TestGetPodsForEviction(t *testing.T) {
Reason: PodDeleteStatusTypeSkip,
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-skip-pod-with-drain-label-wait-completed",
Namespace: metav1.NamespaceDefault,
},
},
Status: PodDeleteStatus{
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
Reason: PodDeleteStatusTypeWaitCompleted,
},
},
}},
},
{
Expand Down Expand Up @@ -714,8 +761,17 @@ func TestGetPodsForEviction(t *testing.T) {
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-behavior-wait-completed",
Namespace: "test-namespace", // matches the Namespace of the selector in mdrBehaviorWaitCompleted.
Labels: map[string]string{
"app": "behavior-wait-completed", // matches mdrBehaviorWaitCompleted.
},
},
},
},
machineDrainRules: []*clusterv1.MachineDrainRule{mdrBehaviorDrain, mdrBehaviorSkip, mdrBehaviorUnknown},
machineDrainRules: []*clusterv1.MachineDrainRule{mdrBehaviorDrain, mdrBehaviorSkip, mdrBehaviorUnknown, mdrBehaviorWaitCompleted},
wantPodDeleteList: PodDeleteList{items: []PodDelete{
{
Pod: &corev1.Pod{
Expand Down Expand Up @@ -744,6 +800,18 @@ func TestGetPodsForEviction(t *testing.T) {
Reason: PodDeleteStatusTypeSkip,
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-behavior-wait-completed",
Namespace: "test-namespace",
},
},
Status: PodDeleteStatus{
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
Reason: PodDeleteStatusTypeWaitCompleted,
},
},
}},
},
}
Expand Down Expand Up @@ -918,6 +986,19 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
Pods: nil, // Match all Pods
},
}
matchingMDRBehaviorWaitCompleted := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-wait-completed",
Namespace: "test-namespace",
},
Spec: clusterv1.MachineDrainRuleSpec{
Drain: clusterv1.MachineDrainRuleDrainConfig{
Behavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
},
Machines: nil, // Match all machines
Pods: nil, // Match all Pods
},
}
matchingMDRBehaviorUnknown := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-unknown",
Expand Down Expand Up @@ -986,6 +1067,7 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
matchingMDRBehaviorDrainB,
matchingMDRBehaviorDrainA,
matchingMDRBehaviorSkip,
matchingMDRBehaviorWaitCompleted,
matchingMDRBehaviorUnknown,
notMatchingMDRDifferentNamespace,
notMatchingMDRNotMatchingSelector,
Expand All @@ -995,6 +1077,7 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
matchingMDRBehaviorDrainB,
matchingMDRBehaviorSkip,
matchingMDRBehaviorUnknown,
matchingMDRBehaviorWaitCompleted,
},
},
}
Expand Down Expand Up @@ -1291,6 +1374,17 @@ func TestEvictPods(t *testing.T) {
Reason: PodDeleteStatusTypeOkay,
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-9-wait-completed",
},
},
Status: PodDeleteStatus{
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted, // Will be skipped because DrainBehavior is set to WaitCompleted
Reason: PodDeleteStatusTypeWaitCompleted,
},
},
}},
wantEvictionResult: EvictionResult{
PodsIgnored: []*corev1.Pod{
Expand Down Expand Up @@ -1354,6 +1448,13 @@ func TestEvictPods(t *testing.T) {
},
},
},
PodsToWaitCompleted: []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-9-wait-completed",
},
},
},
},
},
}
Expand Down Expand Up @@ -1651,6 +1752,13 @@ After above Pods have been removed from the Node, the following Pods will be evi
},
},
},
PodsToWaitCompleted: []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-16-wait-completed",
},
},
},
},
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, ... (4 more): deletionTimestamp set, but still not removed from the Node
Expand All @@ -1660,7 +1768,8 @@ After above Pods have been removed from the Node, the following Pods will be evi
* Pod pod-8-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 3
* Pod pod-9-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 4
* 1 Pod with other issues
After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)`,
After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)
Waiting for the following Pods to complete without eviction: pod-16-wait-completed`,
},
{
name: "Compute long condition message correctly with more skipped errors",
Expand Down
Loading

0 comments on commit ba555de

Please sign in to comment.