diff --git a/cmd/main.go b/cmd/main.go index 04496509..905d4771 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -214,7 +214,7 @@ func startOperatorManager( } setupLog.Info("Dash0 reconciler has been set up.") - if os.Getenv("ENABLE_WEBHOOKS") != "false" { + if os.Getenv("ENABLE_WEBHOOK") != "false" { if err = (&dash0webhook.Handler{ Client: mgr.GetClient(), Recorder: mgr.GetEventRecorderFor("dash0-webhook"), diff --git a/helm-chart/dash0-operator/templates/operator/deployment.yaml b/helm-chart/dash0-operator/templates/operator/deployment.yaml index f7cecc4b..0d4776a5 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment.yaml @@ -55,6 +55,10 @@ spec: - name: DASH0_INIT_CONTAINER_IMAGE_PULL_POLICY value: {{ .Values.operator.initContainerImage.pullPolicy }} {{- end }} + {{- if not .Values.operator.enableWebhook }} + - name: ENABLE_WEBHOOK + value: "false" + {{- end }} {{- if .Values.operator.developmentMode }} - name: DASH0_DEVELOPMENT_MODE value: {{ .Values.operator.developmentMode | toString | quote }} diff --git a/helm-chart/dash0-operator/tests/operator/deployment_test.yaml b/helm-chart/dash0-operator/tests/operator/deployment_test.yaml index eed9fbd0..b9536a79 100644 --- a/helm-chart/dash0-operator/tests/operator/deployment_test.yaml +++ b/helm-chart/dash0-operator/tests/operator/deployment_test.yaml @@ -54,6 +54,7 @@ tests: imagePullSecrets: - name: regcred - name: anotherSecret + enableWebhook: false managerPodResources: limits: cpu: 123m @@ -61,6 +62,7 @@ tests: requests: cpu: 5m memory: 32Mi + developmentMode: true asserts: - equal: path: metadata.labels['label1'] @@ -125,6 +127,18 @@ tests: - equal: path: spec.template.spec.containers[0].env[3].value value: Always + - equal: + path: spec.template.spec.containers[0].env[4].name + value: ENABLE_WEBHOOK + - equal: + path: spec.template.spec.containers[0].env[4].value + value: "false" + - equal: + path: spec.template.spec.containers[0].env[5].name + value: DASH0_DEVELOPMENT_MODE + - equal: + path: spec.template.spec.containers[0].env[5].value + value: "true" - equal: path: spec.template.spec.containers[0].resources.limits.cpu value: 123m diff --git a/helm-chart/dash0-operator/values.yaml b/helm-chart/dash0-operator/values.yaml index 99cf06fd..6f1797b6 100644 --- a/helm-chart/dash0-operator/values.yaml +++ b/helm-chart/dash0-operator/values.yaml @@ -44,6 +44,9 @@ operator: # label2: "value 2" podLabels: {} + # Set this to "false" to disable the admission webhook to instrument new workloads at deploy time. + enableWebhook: true + # resources for the controller manager pod(s) managerPodResources: limits: diff --git a/test/e2e/e2e_helpers.go b/test/e2e/e2e_helpers.go index 3a6d9d57..546867cb 100644 --- a/test/e2e/e2e_helpers.go +++ b/test/e2e/e2e_helpers.go @@ -37,6 +37,7 @@ const ( verifyTelemetryTimeout = 90 * time.Second verifyTelemetryPollingInterval = 500 * time.Millisecond dash0CustomResourceName = "dash0-sample" + additionalImageTag = "e2e-test" ) var ( @@ -305,6 +306,19 @@ func RebuildOperatorControllerImage(operatorImage ImageSpec, buildImageLocally b fmt.Sprintf("IMG_REPOSITORY=%s", operatorImage.repository), fmt.Sprintf("IMG_TAG=%s", operatorImage.tag), ))).To(Succeed()) + + additionalTag := ImageSpec{ + repository: operatorImage.repository, + tag: additionalImageTag, + } + Expect( + RunAndIgnoreOutput( + exec.Command( + "docker", + "tag", + renderFullyQualifiedImageName(operatorImage), + renderFullyQualifiedImageName(additionalTag), + ))).To(Succeed()) } func RebuildDash0InstrumentationImage(instrumentationImage ImageSpec, buildImageLocally bool) { @@ -328,6 +342,19 @@ func RebuildDash0InstrumentationImage(instrumentationImage ImageSpec, buildImage instrumentationImage.repository, instrumentationImage.tag, ))).To(Succeed()) + + additionalTag := ImageSpec{ + repository: instrumentationImage.repository, + tag: additionalImageTag, + } + Expect( + RunAndIgnoreOutput( + exec.Command( + "docker", + "tag", + renderFullyQualifiedImageName(instrumentationImage), + renderFullyQualifiedImageName(additionalTag), + ))).To(Succeed()) } func DeployOperatorWithCollectorAndClearExportedTelemetry( @@ -335,6 +362,7 @@ func DeployOperatorWithCollectorAndClearExportedTelemetry( operatorHelmChart string, operatorHelmChartUrl string, images Images, + enableWebhook bool, ) { By("removing old captured telemetry files") _ = os.Remove("test-resources/e2e-test-volumes/collector-received-data/traces.jsonl") @@ -344,7 +372,7 @@ func DeployOperatorWithCollectorAndClearExportedTelemetry( ensureOtelCollectorHelmRepoIsInstalled() ensureDash0OperatorHelmRepoIsInstalled(operatorHelmChart, operatorHelmChartUrl) - By("deploying the controller-manager") + By("deploying the operator controller") arguments := []string{ "install", "--namespace", @@ -355,56 +383,15 @@ func DeployOperatorWithCollectorAndClearExportedTelemetry( "--set", "operator.developmentMode=true", "--set", "operator.disableSecretCheck=true", "--set", "operator.disableOtlpEndpointCheck=true", + "--set", fmt.Sprintf("operator.enableWebhook=%t", enableWebhook), } - arguments = setIfNotEmpty(arguments, "operator.image.repository", images.operator.repository) - arguments = setIfNotEmpty(arguments, "operator.image.tag", images.operator.tag) - arguments = setIfNotEmpty(arguments, "operator.image.digest", images.operator.digest) - arguments = setIfNotEmpty(arguments, "operator.image.pullPolicy", images.operator.pullPolicy) - arguments = setIfNotEmpty(arguments, "operator.initContainerImage.repository", images.instrumentation.repository) - arguments = setIfNotEmpty(arguments, "operator.initContainerImage.tag", images.instrumentation.tag) - arguments = setIfNotEmpty(arguments, "operator.initContainerImage.digest", images.instrumentation.digest) - arguments = setIfNotEmpty(arguments, "operator.initContainerImage.pullPolicy", images.instrumentation.pullPolicy) - arguments = append(arguments, operatorHelmReleaseName) - arguments = append(arguments, operatorHelmChart) + arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) output, err := Run(exec.Command("helm", arguments...)) Expect(err).NotTo(HaveOccurred()) fmt.Fprintf(GinkgoWriter, "output of helm install:\n%s", output) - var controllerPodName string - By("validating that the controller-manager pod is running as expected") - verifyControllerUp := func() error { - cmd := exec.Command("kubectl", "get", - "pods", "-l", "control-plane=controller-manager", - "-o", "go-template={{ range .items }}"+ - "{{ if not .metadata.deletionTimestamp }}"+ - "{{ .metadata.name }}"+ - "{{ \"\\n\" }}{{ end }}{{ end }}", - "-n", operatorNamespace, - ) - - podOutput, err := Run(cmd, false) - Expect(err).NotTo(HaveOccurred()) - podNames := GetNonEmptyLines(podOutput) - if len(podNames) != 1 { - return fmt.Errorf("expect 1 controller pods running, but got %d -- %s", len(podNames), podOutput) - } - controllerPodName = podNames[0] - Expect(controllerPodName).To(ContainSubstring("controller-manager")) - - cmd = exec.Command("kubectl", "get", - "pods", controllerPodName, "-o", "jsonpath={.status.phase}", - "-n", operatorNamespace, - ) - status, err := Run(cmd) - Expect(err).NotTo(HaveOccurred()) - if status != "Running" { - return fmt.Errorf("controller pod in %s status", status) - } - return nil - } - - Eventually(verifyControllerUp, 120*time.Second, time.Second).Should(Succeed()) + verifyThatControllerPodIsRunning(operatorNamespace) // verify that the OTel collector is also up and running Expect(RunAndIgnoreOutput( @@ -420,6 +407,20 @@ func DeployOperatorWithCollectorAndClearExportedTelemetry( ))).To(Succeed()) } +func addOptionalHelmParameters(arguments []string, operatorHelmChart string, images Images) []string { + arguments = setIfNotEmpty(arguments, "operator.image.repository", images.operator.repository) + arguments = setIfNotEmpty(arguments, "operator.image.tag", images.operator.tag) + arguments = setIfNotEmpty(arguments, "operator.image.digest", images.operator.digest) + arguments = setIfNotEmpty(arguments, "operator.image.pullPolicy", images.operator.pullPolicy) + arguments = setIfNotEmpty(arguments, "operator.initContainerImage.repository", images.instrumentation.repository) + arguments = setIfNotEmpty(arguments, "operator.initContainerImage.tag", images.instrumentation.tag) + arguments = setIfNotEmpty(arguments, "operator.initContainerImage.digest", images.instrumentation.digest) + arguments = setIfNotEmpty(arguments, "operator.initContainerImage.pullPolicy", images.instrumentation.pullPolicy) + arguments = append(arguments, operatorHelmReleaseName) + arguments = append(arguments, operatorHelmChart) + return arguments +} + func setIfNotEmpty(arguments []string, key string, value string) []string { if value != "" { arguments = append(arguments, "--set") @@ -496,8 +497,45 @@ func ensureDash0OperatorHelmRepoIsInstalled(operatorHelmChart string, operatorHe } } +func verifyThatControllerPodIsRunning(operatorNamespace string) { + var controllerPodName string + By("validating that the controller-manager pod is running as expected") + verifyControllerUp := func() error { + cmd := exec.Command("kubectl", "get", + "pods", "-l", "control-plane=controller-manager", + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + "-n", operatorNamespace, + ) + + podOutput, err := Run(cmd, false) + Expect(err).NotTo(HaveOccurred()) + podNames := GetNonEmptyLines(podOutput) + if len(podNames) != 1 { + return fmt.Errorf("expect 1 controller pods running, but got %d -- %s", len(podNames), podOutput) + } + controllerPodName = podNames[0] + Expect(controllerPodName).To(ContainSubstring("controller-manager")) + + cmd = exec.Command("kubectl", "get", + "pods", controllerPodName, "-o", "jsonpath={.status.phase}", + "-n", operatorNamespace, + ) + status, err := Run(cmd) + Expect(err).NotTo(HaveOccurred()) + if status != "Running" { + return fmt.Errorf("controller pod in %s status", status) + } + return nil + } + + Eventually(verifyControllerUp, 120*time.Second, time.Second).Should(Succeed()) +} + func UndeployOperatorAndCollector(operatorNamespace string) { - By("undeploying the controller-manager") + By("undeploying the operator controller") Expect( RunAndIgnoreOutput( exec.Command( @@ -535,6 +573,52 @@ func VerifyDash0OperatorReleaseIsNotInstalled(g Gomega, operatorNamespace string ))).To(Succeed()) } +func UpgradeOperator( + operatorNamespace string, + operatorHelmChart string, + operatorHelmChartUrl string, + images Images, + enableWebhook bool, +) { + ensureDash0OperatorHelmRepoIsInstalled(operatorHelmChart, operatorHelmChartUrl) + + By("upgrading the operator controller") + arguments := []string{ + "upgrade", + "--namespace", + operatorNamespace, + "--values", + "test-resources/helm/e2e.values.yaml", + "--set", "operator.developmentMode=true", + "--set", "operator.disableSecretCheck=true", + "--set", "operator.disableOtlpEndpointCheck=true", + "--set", fmt.Sprintf("operator.enableWebhook=%t", enableWebhook), + } + arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) + + output, err := Run(exec.Command("helm", arguments...)) + Expect(err).NotTo(HaveOccurred()) + fmt.Fprintf(GinkgoWriter, "output of helm upgrade:\n%s", output) + + By("waiting shortly, to give the operator time to restart after helm upgrade") + time.Sleep(5 * time.Second) + + verifyThatControllerPodIsRunning(operatorNamespace) + + // verify that the OTel collector is also up and running + Expect(RunAndIgnoreOutput( + exec.Command("kubectl", + "rollout", + "status", + "daemonset", + fmt.Sprintf("%s-opentelemetry-collector-agent", operatorHelmReleaseName), + "--namespace", + operatorNamespace, + "--timeout", + "60s", + ))).To(Succeed()) +} + func DeployDash0CustomResource(namespace string) { Expect( RunAndIgnoreOutput(exec.Command( diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 5403240b..383f4f22 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -151,6 +151,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { operatorHelmChart, operatorHelmChartUrl, images, + true, ) DeployDash0CustomResource(applicationUnderTestNamespace) @@ -192,6 +193,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { operatorHelmChart, operatorHelmChartUrl, images, + true, ) DeployDash0CustomResource(applicationUnderTestNamespace) By("verifying that the Node.js job has been labelled by the controller and that an event has been emitted") @@ -222,6 +224,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { operatorHelmChart, operatorHelmChartUrl, images, + true, ) DeployDash0CustomResource(applicationUnderTestNamespace) By("verifying that the Node.js pod has not been labelled") @@ -230,6 +233,68 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) }) }) + + Describe("when updating workloads at startup", func() { + It("should update instrumentation modifications at startup", func() { + By("installing the Node.js deployment") + Expect(InstallNodeJsDeployment(applicationUnderTestNamespace)).To(Succeed()) + + By("deploy the operator and the Dash0 custom resource") + initialImages := Images{ + operator: ImageSpec{ + repository: "operator-controller", + tag: additionalImageTag, + pullPolicy: "Never", + }, + instrumentation: ImageSpec{ + repository: "instrumentation", + tag: additionalImageTag, + pullPolicy: "Never", + }, + } + DeployOperatorWithCollectorAndClearExportedTelemetry( + operatorNamespace, + operatorHelmChart, + operatorHelmChartUrl, + // we deploy the chart with image digests initially, so that the helm upgrade command we run later + // (with image tags instead of digests) will actually change the reference to the image (even + // if it is the same image content). + initialImages, + false, + ) + DeployDash0CustomResource(applicationUnderTestNamespace) + + By("verifying that the Node.js deployment has been instrumented by the controller") + VerifyThatWorkloadHasBeenInstrumented( + applicationUnderTestNamespace, + "deployment", + 1207, + false, + initialImages, + "controller", + ) + + UpgradeOperator( + operatorNamespace, + operatorHelmChart, + operatorHelmChartUrl, + // now we use different image tags + images, + false, + ) + + By("verifying that the Node.js deployment's instrumentation settings have been updated by the controller") + VerifyThatWorkloadHasBeenInstrumented( + applicationUnderTestNamespace, + "deployment", + 1207, + false, + // check that the new image tags have been applied to the workload + images, + "controller", + ) + }) + }) }) Describe("webhook", func() { @@ -239,6 +304,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { operatorHelmChart, operatorHelmChartUrl, images, + true, ) fmt.Fprint(GinkgoWriter, "waiting 10 seconds to give the webhook some time to get ready\n") @@ -465,6 +531,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { operatorHelmChart, operatorHelmChartUrl, images, + true, ) runInParallelForAllWorkloadTypes(configs, func(config removalTestNamespaceConfig) { DeployDash0CustomResource(config.namespace)