Skip to content

Commit

Permalink
wait 20 sec before delete disruptioncron + finalizer (#912)
Browse files Browse the repository at this point in the history
* cleanedAt on disruptioncron

* cleanedAt + finalizer

* rely on deletiontimestamp

* lint

* test

* fix tu

* fix tu

---------

Co-authored-by: Philip Thompson <[email protected]>
  • Loading branch information
clairecng and ptnapoleon authored Oct 3, 2024
1 parent 6080afb commit fddb56b
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 11 deletions.
7 changes: 7 additions & 0 deletions api/v1beta1/disruption_cron_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
package v1beta1

import (
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -102,3 +104,8 @@ type DisruptionCronTrigger struct {
Kind string `json:"kind,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}

// IsReadyToRemoveFinalizer checks if adisruptioncron has been deleting for > finalizerDelay
func (d *DisruptionCron) IsReadyToRemoveFinalizer(finalizerDelay time.Duration) bool {
return d.DeletionTimestamp != nil && time.Now().After(d.DeletionTimestamp.Add(finalizerDelay))
}
59 changes: 59 additions & 0 deletions api/v1beta1/disruption_cron_types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024 Datadog, Inc.

package v1beta1

import (
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("DisruptionCron Types", func() {
var delay = time.Duration(20 * time.Second)

Describe("IsReadyToRemoveFinalizer", func() {
Describe("true cases", func() {
When("deletedAt is set and we are ready to remove the finalizer", func() {
It("should return true", func() {
// Arrange
readyTime := time.Now().Add(-1 * time.Duration(21*time.Second))

disruptionCron := DisruptionCron{
ObjectMeta: metav1.ObjectMeta{
DeletionTimestamp: &metav1.Time{Time: readyTime},
},
}

// Act
isReady := disruptionCron.IsReadyToRemoveFinalizer(delay)

// Assert
Expect(isReady).To(BeTrue())
})
})
})

Describe("false cases", func() {
DescribeTable("all false cases", func(disruptionCron DisruptionCron) {
// Act
isReady := disruptionCron.IsReadyToRemoveFinalizer(delay)

// Assert
Expect(isReady).To(BeFalse())
},
Entry("no deletion timestamp set", DisruptionCron{}),
Entry("deletion timestamp is < than required", DisruptionCron{
ObjectMeta: metav1.ObjectMeta{
DeletionTimestamp: &metav1.Time{Time: time.Now()},
},
}))
})
})

})
2 changes: 1 addition & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ func New(logger *zap.SugaredLogger, osArgs []string) (config, error) {
return cfg, err
}

mainFS.DurationVar(&cfg.Controller.FinalizerDeletionDelay, "finalizer-deletion-delay", DefaultFinalizerDeletionDelay, "Define a delay before we attempt at removing the finalizers (on disruption only)")
mainFS.DurationVar(&cfg.Controller.FinalizerDeletionDelay, "finalizer-deletion-delay", DefaultFinalizerDeletionDelay, "Define a delay before we attempt at removing the finalizers (on disruption and disruptioncron only)")

if err := viper.BindPFlag("controller.finalizerDeletionDelay", mainFS.Lookup("finalizer-deletion-delay")); err != nil {
return cfg, err
Expand Down
2 changes: 2 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ var _ = Describe("Config", func() {
By("overriding controller values")
Expect(v.Controller.DeleteOnly).To(BeFalse())
Expect(v.Controller.DefaultDuration).To(Equal(1 * time.Hour))
Expect(v.Controller.FinalizerDeletionDelay).To(Equal(20 * time.Second))
Expect(v.Controller.EnableSafeguards).To(BeTrue())
Expect(v.Controller.EnableObserver).To(BeTrue())
Expect(v.Controller.MetricsBindAddr).To(Equal(":8080"))
Expand Down Expand Up @@ -120,6 +121,7 @@ var _ = Describe("Config", func() {
By("overriding controller values")
Expect(v.Controller.DeleteOnly).To(BeTrue())
Expect(v.Controller.DefaultDuration).To(Equal(3 * time.Minute))
Expect(v.Controller.FinalizerDeletionDelay).To(Equal(2 * time.Second))
Expect(v.Controller.EnableSafeguards).To(BeFalse())
Expect(v.Controller.EnableObserver).To(BeFalse())
Expect(v.Controller.MetricsBindAddr).To(Equal("127.0.0.1:8080"))
Expand Down
45 changes: 39 additions & 6 deletions controllers/disruption_cron_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,26 @@ import (

chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1"
"github.com/DataDog/chaos-controller/o11y/metrics"
chaostypes "github.com/DataDog/chaos-controller/types"

"github.com/robfig/cron"
"go.uber.org/zap"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

var DisruptionCronTags = []string{}

type DisruptionCronReconciler struct {
Client client.Client
Scheme *runtime.Scheme
BaseLog *zap.SugaredLogger
log *zap.SugaredLogger
MetricsSink metrics.Sink
Client client.Client
Scheme *runtime.Scheme
BaseLog *zap.SugaredLogger
log *zap.SugaredLogger
MetricsSink metrics.Sink
FinalizerDeletionDelay time.Duration
}

func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) {
Expand Down Expand Up @@ -57,11 +60,41 @@ func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Reque
r.log.Infow("fetched last known history", "history", instance.Status.History)
DisruptionCronTags = []string{"disruptionCronName:" + instance.Name, "disruptionCronNamespace:" + instance.Namespace, "targetName:" + instance.Spec.TargetResource.Name}

// DisruptionCron being deleted
if !instance.DeletionTimestamp.IsZero() {
// Add finalizer here if required
// the instance is being deleted, remove finalizer avec finalizerDeletionDelay
if controllerutil.ContainsFinalizer(instance, chaostypes.DisruptionCronFinalizer) {
if instance.IsReadyToRemoveFinalizer(r.FinalizerDeletionDelay) {
// we reach this code when all the cleanup pods have succeeded and we waited for finalizerDeletionDelay
// we can remove the finalizer and let the resource being garbage collected
r.log.Infow("removing disruptioncron finalizer")

controllerutil.RemoveFinalizer(instance, chaostypes.DisruptionCronFinalizer)

if err := r.Client.Update(ctx, instance); err != nil {
return ctrl.Result{}, fmt.Errorf("error removing disruptioncron finalizer: %w", err)
}

return ctrl.Result{}, nil
}

// waiting for finalizerDeletionDelay before removing finalizer
requeueAfter := r.FinalizerDeletionDelay
r.log.Infow(fmt.Sprintf("requeuing to remove finalizer after %s", requeueAfter))

return ctrl.Result{Requeue: true, RequeueAfter: requeueAfter}, nil
}

return ctrl.Result{}, nil
}

updated := controllerutil.AddFinalizer(instance, chaostypes.DisruptionCronFinalizer)
if updated {
if err := r.Client.Update(ctx, instance); err != nil {
return ctrl.Result{}, fmt.Errorf("error adding disruptioncron finalizer: %w", err)
}
}

// Update the DisruptionCron status based on the presence of the target resource
// If the target resource has been missing for longer than the TargetResourceMissingThreshold, delete the instance
targetResourceExists, instanceDeleted, err := r.updateTargetResourcePreviouslyMissing(ctx, instance)
Expand Down
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,8 @@ func main() {
BaseLog: logger,
Scheme: mgr.GetScheme(),
// new metrics sink for cron controller
MetricsSink: initMetricsSink(cfg.Controller.MetricsSink, logger, metricstypes.SinkAppCronController),
MetricsSink: initMetricsSink(cfg.Controller.MetricsSink, logger, metricstypes.SinkAppCronController),
FinalizerDeletionDelay: cfg.Controller.FinalizerDeletionDelay,
}

defer closeMetricsSink(logger, disruptionCronReconciler.MetricsSink)
Expand Down
7 changes: 4 additions & 3 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,10 @@ const (
// DisruptionNamespaceLabel is the label used to identify the disruption namespace for a chaos pod. This is used to determine pod ownership.
DisruptionNamespaceLabel = GroupName + "/disruption-namespace"

finalizerPrefix = "finalizer." + GroupName
DisruptionFinalizer = finalizerPrefix
ChaosPodFinalizer = finalizerPrefix + "/chaos-pod"
finalizerPrefix = "finalizer." + GroupName
DisruptionFinalizer = finalizerPrefix
DisruptionCronFinalizer = finalizerPrefix
ChaosPodFinalizer = finalizerPrefix + "/chaos-pod"

PulsingDisruptionMinimumDuration = 500 * time.Millisecond
// InjectorPadDuration is the length of time we extend the injector's duration on top of the disruption's duration,
Expand Down

0 comments on commit fddb56b

Please sign in to comment.