From 7edfb2a3a6e8054b96bbe311eb325d3b0cd944bc Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Thu, 4 Jul 2024 13:49:54 +0200 Subject: [PATCH] feat(controller): update instrumentation from older operator versions --- internal/controller/dash0_controller.go | 6 +- internal/controller/dash0_controller_test.go | 99 ++++++++++++++++++-- internal/util/labels.go | 41 ++++++-- test/e2e/e2e_test.go | 6 +- 4 files changed, 130 insertions(+), 22 deletions(-) diff --git a/internal/controller/dash0_controller.go b/internal/controller/dash0_controller.go index c8024f01..52f38194 100644 --- a/internal/controller/dash0_controller.go +++ b/internal/controller/dash0_controller.go @@ -658,11 +658,11 @@ func (r *Dash0Reconciler) instrumentWorkload( var requiredAction ModificationMode if util.WasInstrumentedButHasOptedOutNow(objectMeta) { requiredAction = Uninstrumentation - } else if util.HasBeenInstrumentedSuccessfully(objectMeta) { + } else if util.HasBeenInstrumentedSuccessfullyByThisVersion(objectMeta, r.Images) { // No change necessary, this workload has already been instrumented and an opt-out label (which would need to // trigger uninstrumentation) has not been added since it has been instrumented. - // This check likely needs to change, see - // https://linear.app/dash0/issue/ENG-1955/updating-the-operator-should-run-a-first-reconcile-on-all-workloads-to + logger.Info("not updating the existing instrumentation for this workload, it has already been successfully " + + "instrumented by the same operator version") return false } else if util.HasOptedOutOfInstrumenationAndIsUninstrumented(workload.getObjectMeta()) { logger.Info("not instrumenting this workload due to dash0.com/enable=false") diff --git a/internal/controller/dash0_controller_test.go b/internal/controller/dash0_controller_test.go index abe44886..2e885dd5 100644 --- a/internal/controller/dash0_controller_test.go +++ b/internal/controller/dash0_controller_test.go @@ -24,6 +24,11 @@ import ( . "github.com/dash0hq/dash0-operator/test/util" ) +const ( + olderOperatorControllerImageLabel = "some-registry_com_1234_dash0hq_operator-controller_0.9.8" + olderInitContainerImageLabel = "some-registry_com_1234_dash0hq_instrumentation_2.3.4" +) + var ( namespace = TestNamespaceName @@ -439,8 +444,8 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) }) - Describe("when a workload is already instrumented", func() { - It("should not touch an already instrumented cron job", func() { + Describe("when a workload is already instrumented by the same version", func() { + It("should not touch a successfully instrumented cron job", func() { name := UniqueName(CronJobNamePrefix) workload := CreateInstrumentedCronJob(ctx, k8sClient, TestNamespaceName, name) createdObjects = append(createdObjects, workload) @@ -449,7 +454,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { VerifyNoEvents(ctx, clientset, TestNamespaceName) }) - It("should not touch an already instrumented daemon set", func() { + It("should not touch a successfully instrumented daemon set", func() { name := UniqueName(DaemonSetNamePrefix) workload := CreateInstrumentedDaemonSet(ctx, k8sClient, TestNamespaceName, name) createdObjects = append(createdObjects, workload) @@ -458,7 +463,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { VerifyNoEvents(ctx, clientset, TestNamespaceName) }) - It("should not touch an already instrumented deployment", func() { + It("should not touch a successfully instrumented deployment", func() { name := UniqueName(DeploymentNamePrefix) workload := CreateInstrumentedDeployment(ctx, k8sClient, TestNamespaceName, name) createdObjects = append(createdObjects, workload) @@ -467,7 +472,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { VerifyNoEvents(ctx, clientset, TestNamespaceName) }) - It("should not touch an already instrumented job", func() { + It("should not touch a successfully instrumented job", func() { name := UniqueName(JobNamePrefix) workload := CreateInstrumentedJob(ctx, k8sClient, TestNamespaceName, name) createdObjects = append(createdObjects, workload) @@ -476,7 +481,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { VerifyNoEvents(ctx, clientset, TestNamespaceName) }) - It("should not touch an already instrumented replica set", func() { + It("should not touch a successfully instrumented replica set", func() { name := UniqueName(ReplicaSetNamePrefix) workload := CreateInstrumentedReplicaSet(ctx, k8sClient, TestNamespaceName, name) createdObjects = append(createdObjects, workload) @@ -485,7 +490,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { VerifyNoEvents(ctx, clientset, TestNamespaceName) }) - It("should not touch an already instrumented stateful set", func() { + It("should not touch a successfully instrumented stateful set", func() { name := UniqueName(StatefulSetNamePrefix) workload := CreateInstrumentedStatefulSet(ctx, k8sClient, TestNamespaceName, name) createdObjects = append(createdObjects, workload) @@ -495,6 +500,86 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) }) + Describe("when a workload has been instrumented by a different operator version", func() { + It("should override the the instrumentation settings for a cron job", func() { + name := UniqueName(CronJobNamePrefix) + workload := CreateInstrumentedCronJob(ctx, k8sClient, TestNamespaceName, name) + createdObjects = append(createdObjects, workload) + workload.ObjectMeta.Labels["dash0.com/operator-image"] = olderOperatorControllerImageLabel + workload.ObjectMeta.Labels["dash0.com/init-container-image"] = olderInitContainerImageLabel + UpdateWorkload(ctx, k8sClient, workload) + triggerReconcileRequest(ctx, reconciler, "") + VerifyModifiedCronJob(GetCronJob(ctx, k8sClient, TestNamespaceName, name), BasicInstrumentedPodSpecExpectations()) + VerifySuccessfulInstrumentationEvent(ctx, clientset, namespace, name, "controller") + }) + + It("should override the the instrumentation settings for a daemon set", func() { + name := UniqueName(DaemonSetNamePrefix) + workload := CreateInstrumentedDaemonSet(ctx, k8sClient, TestNamespaceName, name) + createdObjects = append(createdObjects, workload) + workload.ObjectMeta.Labels["dash0.com/operator-image"] = olderOperatorControllerImageLabel + workload.ObjectMeta.Labels["dash0.com/init-container-image"] = olderInitContainerImageLabel + UpdateWorkload(ctx, k8sClient, workload) + triggerReconcileRequest(ctx, reconciler, "") + VerifyModifiedDaemonSet(GetDaemonSet(ctx, k8sClient, TestNamespaceName, name), BasicInstrumentedPodSpecExpectations()) + VerifySuccessfulInstrumentationEvent(ctx, clientset, namespace, name, "controller") + }) + + It("should override the the instrumentation settings for a deployment", func() { + name := UniqueName(DeploymentNamePrefix) + workload := CreateInstrumentedDeployment(ctx, k8sClient, TestNamespaceName, name) + createdObjects = append(createdObjects, workload) + workload.ObjectMeta.Labels["dash0.com/operator-image"] = olderOperatorControllerImageLabel + workload.ObjectMeta.Labels["dash0.com/init-container-image"] = olderInitContainerImageLabel + UpdateWorkload(ctx, k8sClient, workload) + triggerReconcileRequest(ctx, reconciler, "") + VerifyModifiedDeployment(GetDeployment(ctx, k8sClient, TestNamespaceName, name), BasicInstrumentedPodSpecExpectations()) + VerifySuccessfulInstrumentationEvent(ctx, clientset, namespace, name, "controller") + }) + + It("should not override the the instrumentation settings for a job", func() { + name := UniqueName(JobNamePrefix) + workload := CreateInstrumentedJob(ctx, k8sClient, TestNamespaceName, name) + createdObjects = append(createdObjects, workload) + workload.ObjectMeta.Labels["dash0.com/operator-image"] = "some-registry.com_1234_dash0hq_operator-controller_0.9.8" + workload.ObjectMeta.Labels["dash0.com/init-container-image"] = "some-registry.com_1234_dash0hq_instrumentation_2.3.4" + UpdateWorkload(ctx, k8sClient, workload) + triggerReconcileRequest(ctx, reconciler, "") + + // we do not attempt to update the instrumentation for jobs, since they are immutable + workload = GetJob(ctx, k8sClient, TestNamespaceName, name) + jobLabels := workload.ObjectMeta.Labels + Expect(jobLabels["dash0.com/instrumented"]).To(Equal("true")) + Expect(jobLabels["dash0.com/operator-image"]).To(Equal("some-registry.com_1234_dash0hq_operator-controller_0.9.8")) + Expect(jobLabels["dash0.com/init-container-image"]).To(Equal("some-registry.com_1234_dash0hq_instrumentation_2.3.4")) + VerifyNoEvents(ctx, clientset, namespace) + }) + + It("should override the the instrumentation settings for a replica set", func() { + name := UniqueName(ReplicaSetNamePrefix) + workload := CreateInstrumentedReplicaSet(ctx, k8sClient, TestNamespaceName, name) + createdObjects = append(createdObjects, workload) + workload.ObjectMeta.Labels["dash0.com/operator-image"] = olderOperatorControllerImageLabel + workload.ObjectMeta.Labels["dash0.com/init-container-image"] = olderInitContainerImageLabel + UpdateWorkload(ctx, k8sClient, workload) + triggerReconcileRequest(ctx, reconciler, "") + VerifyModifiedReplicaSet(GetReplicaSet(ctx, k8sClient, TestNamespaceName, name), BasicInstrumentedPodSpecExpectations()) + VerifySuccessfulInstrumentationEvent(ctx, clientset, namespace, name, "controller") + }) + + It("should override the the instrumentation settings for a stateful set", func() { + name := UniqueName(StatefulSetNamePrefix) + workload := CreateInstrumentedStatefulSet(ctx, k8sClient, TestNamespaceName, name) + createdObjects = append(createdObjects, workload) + workload.ObjectMeta.Labels["dash0.com/operator-image"] = olderOperatorControllerImageLabel + workload.ObjectMeta.Labels["dash0.com/init-container-image"] = olderInitContainerImageLabel + UpdateWorkload(ctx, k8sClient, workload) + triggerReconcileRequest(ctx, reconciler, "") + VerifyModifiedStatefulSet(GetStatefulSet(ctx, k8sClient, TestNamespaceName, name), BasicInstrumentedPodSpecExpectations()) + VerifySuccessfulInstrumentationEvent(ctx, clientset, namespace, name, "controller") + }) + }) + Describe("when reverting the instrumentation on cleanup", func() { It("should revert an instrumented cron job", func() { // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom diff --git a/internal/util/labels.go b/internal/util/labels.go index 3ead4440..58f95fda 100644 --- a/internal/util/labels.go +++ b/internal/util/labels.go @@ -80,15 +80,33 @@ func HasBeenInstrumentedSuccessfully(meta *metav1.ObjectMeta) bool { return readInstrumentationState(meta) == instrumentedLabelValueSuccessful } +func HasBeenInstrumentedSuccessfullyByThisVersion( + meta *metav1.ObjectMeta, + images Images, +) bool { + if !HasBeenInstrumentedSuccessfully(meta) { + return false + } + operatorImageValue, operatorImageIsSet := readLabel(meta, operatorImageLabelKey) + initContainerImageValue, initContainerImageIsSet := readLabel(meta, initContainerImageLabelKey) + if !operatorImageIsSet || !initContainerImageIsSet { + return false + } + expectedOperatorImageLabel := ImageNameToLabel(images.OperatorImage) + expectedInitContainerImageLabel := ImageNameToLabel(images.InitContainerImage) + return operatorImageValue == expectedOperatorImageLabel && initContainerImageValue == expectedInitContainerImageLabel +} + func InstrumenationAttemptHasFailed(meta *metav1.ObjectMeta) bool { return readInstrumentationState(meta) == instrumentedLabelValueUnsuccessful } func readInstrumentationState(meta *metav1.ObjectMeta) instrumentedState { - if meta.Labels == nil { + instrumented, isSet := readLabel(meta, instrumentedLabelKey) + if !isSet { return instrumentedLabelValueUnknown } - switch meta.Labels[instrumentedLabelKey] { + switch instrumented { case string(instrumentedLabelValueSuccessful): return instrumentedLabelValueSuccessful case string(instrumentedLabelValueUnsuccessful): @@ -111,13 +129,8 @@ func WasInstrumentedButHasOptedOutNow(meta *metav1.ObjectMeta) bool { } func hasOptedOutOfInstrumenation(meta *metav1.ObjectMeta) bool { - if meta.Labels == nil { - return false - } - if value, ok := meta.Labels[dash0EnableLabelKey]; ok && value == "false" { - return true - } - return false + dash0EnabledValue, isSet := readLabel(meta, dash0EnableLabelKey) + return isSet && dash0EnabledValue == "false" } func CheckAndDeleteIgnoreOnceLabel(meta *metav1.ObjectMeta) bool { @@ -152,3 +165,13 @@ func ImageNameToLabel(imageName string) string { } return label[:63] } + +func readLabel(meta *metav1.ObjectMeta, key string) (string, bool) { + if meta.Labels == nil { + return "", false + } + if value, ok := meta.Labels[key]; ok { + return value, true + } + return "", false +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 03952050..5403240b 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -380,9 +380,9 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { By("installing the Node.js daemonset with dash0.com/enable=false") Expect(InstallNodeJsDaemonSetWithOptOutLabel(applicationUnderTestNamespace)).To(Succeed()) By("verifying that the Node.js daemonset has not been instrumented by the webhook") - Eventually(func(g Gomega) { - VerifyOnlyOptOutLabelIsPresent(g, applicationUnderTestNamespace, "daemonset") - }, 30*time.Second, verifyTelemetryPollingInterval).Should(Succeed()) + Consistently(func(g Gomega) { + verifyNoDash0Labels(g, applicationUnderTestNamespace, "daemonset", true) + }, 10*time.Second, verifyTelemetryPollingInterval).Should(Succeed()) By("Removing the opt-out label from the daemonset") Expect(RemoveOptOutLabel(