From 9ad4b4c478d816ea3de0aae9c664fab4491e2d23 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Mon, 9 Sep 2024 13:09:03 +0200 Subject: [PATCH 01/16] feat(alerts): Add test framework configuration - Added test-alerts-config.yaml to configure the test framework for integration tests. - Added test-alerts.yaml to define the integration test pod. - Added test-permissions.yaml to set up the necessary permissions for the test framework. - Updated values.yaml to enable the test framework and specify the image and pull policy. --- .../templates/tests/test-alerts-config.yaml | 49 ++++++++++ .../charts/templates/tests/test-alerts.yaml | 28 ++++++ .../templates/tests/test-permissions.yaml | 94 +++++++++++++++++++ alerts/charts/values.yaml | 8 ++ 4 files changed, 179 insertions(+) create mode 100644 alerts/charts/templates/tests/test-alerts-config.yaml create mode 100644 alerts/charts/templates/tests/test-alerts.yaml create mode 100644 alerts/charts/templates/tests/test-permissions.yaml diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml new file mode 100644 index 00000000..9a75e063 --- /dev/null +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -0,0 +1,49 @@ +{{- if .Values.testFramework.enabled -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-test + namespace: {{ .Release.Namespace }} + labels: + type: integration-test + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "-5" # Installed and upgraded before the test pod + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +data: + run.sh: |- + + #!/usr/bin/env bats + + load "/usr/lib/bats/bats-detik/utils" + load "/usr/lib/bats/bats-detik/detik" + + DETIK_CLIENT_NAME="kubectl" + + # @test "Verify successful deployment and running status of the {{ .Release.Name }}-operator pod" { + # verify "there is 1 deployment named '{{ .Release.Name }}-operator'" + # verify "there is 1 service named '{{ .Release.Name }}-operator'" + # try "at most 2 times every 5s to get pods named '{{ .Release.Name }}-operator' and verify that '.status.phase' is 'running'" + # } + + # @test "Verify successful creation and bound status of {{ .Release.Name }} persistent volume claims" { + # try "at most 3 times every 5s to get persistentvolumeclaims named '{{ .Release.Name }}.*' and verify that '.status.phase' is 'Bound'" + # } + + # @test "Verify successful creation and available replicas of {{ .Release.Name }} Prometheus resource" { + # try "at most 3 times every 5s to get prometheuses named '{{ .Release.Name }}' and verify that '.status.availableReplicas' is more than '0'" + # } + + @test "Verify creation of required custom resource definitions (CRDs) for {{ .Release.Name }}" { + verify "there is 1 customresourcedefinition named 'alertmanagerconfigs'" + verify "there is 1 customresourcedefinition named 'alertmanagers'" + verify "there is 1 customresourcedefinition named 'podmonitors'" + verify "there is 1 customresourcedefinition named 'probes'" + verify "there is 1 customresourcedefinition named 'prometheuses'" + verify "there is 1 customresourcedefinition named 'prometheusrules'" + verify "there is 1 customresourcedefinition named 'prometheusagents'" + verify "there is 1 customresourcedefinition named 'servicemonitors'" + verify "there is 1 customresourcedefinition named 'scrapeconfigs'" + verify "there is 1 customresourcedefinition named 'thanosrulers'" + } +{{- end -}} diff --git a/alerts/charts/templates/tests/test-alerts.yaml b/alerts/charts/templates/tests/test-alerts.yaml new file mode 100644 index 00000000..a88aa9ab --- /dev/null +++ b/alerts/charts/templates/tests/test-alerts.yaml @@ -0,0 +1,28 @@ +{{- if .Values.testFramework.enabled -}} +apiVersion: v1 +kind: Pod +metadata: + name: {{ .Release.Name }}-test + namespace: {{ .Release.Namespace }} + labels: + type: integration-test + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +spec: + serviceAccountName: {{ .Release.Name }}-test + containers: + - name: bats-test + image: "{{ .Values.testFramework.image.registry}}/{{ .Values.testFramework.image.repository}}:{{ .Values.testFramework.image.tag }}" + imagePullPolicy: {{ .Values.testFramework.image.pullPolicy }} + command: ["bats", "-t", "/tests/run.sh"] + volumeMounts: + - name: tests + mountPath: /tests + readOnly: true + volumes: + - name: tests + configMap: + name: {{ .Release.Name }}-test + restartPolicy: Never +{{- end -}} diff --git a/alerts/charts/templates/tests/test-permissions.yaml b/alerts/charts/templates/tests/test-permissions.yaml new file mode 100644 index 00000000..225e97ea --- /dev/null +++ b/alerts/charts/templates/tests/test-permissions.yaml @@ -0,0 +1,94 @@ +{{- if .Values.testFramework.enabled -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Release.Name }}-test + namespace: {{ .Release.Namespace }} + labels: + type: integration-test + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ .Release.Name }}-test + namespace: {{ .Release.Namespace }} + labels: + type: integration-test + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +rules: + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["pods", "persistentvolumeclaims", "services"] + verbs: ["get", "list"] + - apiGroups: ["monitoring.coreos.com"] + resources: ["prometheuses", "podmonitors"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ .Release.Name }}-test + namespace: {{ .Release.Namespace }} + labels: + type: integration-test + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +subjects: + - kind: ServiceAccount + name: {{ .Release.Name }}-test + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ .Release.Name }}-test + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ .Release.Name }}-test + labels: + type: integration-test + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +rules: + - apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ .Release.Name }}-test + labels: + type: integration-test + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +subjects: + - kind: ServiceAccount + name: {{ .Release.Name }}-test + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ .Release.Name }}-test + apiGroup: rbac.authorization.k8s.io +{{- end -}} diff --git a/alerts/charts/values.yaml b/alerts/charts/values.yaml index 52c6eaec..0ac09f1f 100644 --- a/alerts/charts/values.yaml +++ b/alerts/charts/values.yaml @@ -244,3 +244,11 @@ alerts: # matchers: # - name: alertname # matchType: "=~" + +testFramework: + enabled: true + image: + registry: ghcr.io + repository: cloudoperators/greenhouse-extensions-integration-test + tag: main + imagePullPolicy: IfNotPresent From 07bea4ae53c3b9977ca67eecab4333b06e780169 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Mon, 9 Sep 2024 14:55:29 +0200 Subject: [PATCH 02/16] refactor(alerts): Update test configuration --- .../templates/tests/test-alerts-config.yaml | 21 ++++++++----------- .../templates/tests/test-permissions.yaml | 2 +- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml index 9a75e063..a0d72db6 100644 --- a/alerts/charts/templates/tests/test-alerts-config.yaml +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -20,19 +20,16 @@ data: DETIK_CLIENT_NAME="kubectl" - # @test "Verify successful deployment and running status of the {{ .Release.Name }}-operator pod" { - # verify "there is 1 deployment named '{{ .Release.Name }}-operator'" - # verify "there is 1 service named '{{ .Release.Name }}-operator'" - # try "at most 2 times every 5s to get pods named '{{ .Release.Name }}-operator' and verify that '.status.phase' is 'running'" - # } - - # @test "Verify successful creation and bound status of {{ .Release.Name }} persistent volume claims" { - # try "at most 3 times every 5s to get persistentvolumeclaims named '{{ .Release.Name }}.*' and verify that '.status.phase' is 'Bound'" - # } + @test "Verify successful deployment, service and running status of the {{ .Release.Name }}-operator pod" { + verify "there is 1 deployment named '{{ .Release.Name }}-operator'" + verify "there is 1 service named '{{ .Release.Name }}-operator'" + try "at most 2 times every 5s to get pods named '{{ .Release.Name }}-operator.*' and verify that '.status.phase' is 'running'" + } - # @test "Verify successful creation and available replicas of {{ .Release.Name }} Prometheus resource" { - # try "at most 3 times every 5s to get prometheuses named '{{ .Release.Name }}' and verify that '.status.availableReplicas' is more than '0'" - # } + @test "Verify that Alertmanager '{{ .Release.Name }}' is Available and Reconciled" { + try "at most 2 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[0].status' is 'True'" + try "at most 2 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[1].status' is 'True'" + } @test "Verify creation of required custom resource definitions (CRDs) for {{ .Release.Name }}" { verify "there is 1 customresourcedefinition named 'alertmanagerconfigs'" diff --git a/alerts/charts/templates/tests/test-permissions.yaml b/alerts/charts/templates/tests/test-permissions.yaml index 225e97ea..70e4a4f1 100644 --- a/alerts/charts/templates/tests/test-permissions.yaml +++ b/alerts/charts/templates/tests/test-permissions.yaml @@ -32,7 +32,7 @@ rules: resources: ["pods", "persistentvolumeclaims", "services"] verbs: ["get", "list"] - apiGroups: ["monitoring.coreos.com"] - resources: ["prometheuses", "podmonitors"] + resources: ["prometheuses", "podmonitors", "alertmanagers"] verbs: ["get", "list"] --- apiVersion: rbac.authorization.k8s.io/v1 From dc79067c7ef498f5d23a46b2ae96f653d0939ac0 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Wed, 18 Sep 2024 21:22:37 +0200 Subject: [PATCH 03/16] test(Alerts) Add tests --- .../templates/tests/test-alerts-config.yaml | 15 + .../templates/tests/test-permissions.yaml | 5 +- alerts/ci/test-values.yaml | 258 ++++++++++++++++++ 3 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 alerts/ci/test-values.yaml diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml index a0d72db6..4cd959e7 100644 --- a/alerts/charts/templates/tests/test-alerts-config.yaml +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -31,6 +31,21 @@ data: try "at most 2 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[1].status' is 'True'" } + {{- if .Values.alerts.alertmanager.ingress.enabled }} + @test "Verify that secret '{{ .Release.Namespace }}-monitoring-ca' is created" { + verify "there is 1 secret named '{{ .Release.Namespace }}-monitoring-ca'" + } + {{- end -}} + + {{- if .Values.alerts.alertmanager.ingress.enabled }} + {{ if .Capabilities.APIVersions.Has "cert-manager.io/v1" }} + @test "Verify that Issuer and Certificate are created" { + verify "there is 1 issuer named '{{ .Release.Namespace }}-monitoring-issuer'" + verify "there is 1 certificate named '{{ .Release.Namespace }}-prometheus-auth'" + } + {{- end }} + {{- end }} + @test "Verify creation of required custom resource definitions (CRDs) for {{ .Release.Name }}" { verify "there is 1 customresourcedefinition named 'alertmanagerconfigs'" verify "there is 1 customresourcedefinition named 'alertmanagers'" diff --git a/alerts/charts/templates/tests/test-permissions.yaml b/alerts/charts/templates/tests/test-permissions.yaml index 70e4a4f1..af6fc73b 100644 --- a/alerts/charts/templates/tests/test-permissions.yaml +++ b/alerts/charts/templates/tests/test-permissions.yaml @@ -29,11 +29,14 @@ rules: resources: ["deployments", "statefulsets"] verbs: ["get", "list"] - apiGroups: [""] - resources: ["pods", "persistentvolumeclaims", "services"] + resources: ["pods", "persistentvolumeclaims", "services", "secrets"] verbs: ["get", "list"] - apiGroups: ["monitoring.coreos.com"] resources: ["prometheuses", "podmonitors", "alertmanagers"] verbs: ["get", "list"] + - apiGroups: ["cert-manager.io"] + resources: ["issuers", "certificates"] + verbs: ["get", "list"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/alerts/ci/test-values.yaml b/alerts/ci/test-values.yaml new file mode 100644 index 00000000..30f163c0 --- /dev/null +++ b/alerts/ci/test-values.yaml @@ -0,0 +1,258 @@ +global: + ## common labels to apply to all resources + ## + commonLabels: {} + +# kube-prometheus-stack configuration scoped to alerts +alerts: + + ## Create default rules for monitoring the cluster + ## + defaultRules: + create: true + rules: + alertmanager: true + etcd: false + configReloaders: false + general: false + k8sContainerCpuUsageSecondsTotal: false + k8sContainerMemoryCache: false + k8sContainerMemoryRss: false + k8sContainerMemorySwap: false + k8sContainerResource: false + k8sPodOwner: false + kubeApiserverAvailability: false + kubeApiserverBurnrate: false + kubeApiserverHistogram: false + kubeApiserverSlos: false + kubeControllerManager: false + kubelet: false + kubeProxy: false + kubePrometheusGeneral: false + kubePrometheusNodeRecording: false + kubernetesApps: false + kubernetesResources: false + kubernetesStorage: false + kubernetesSystem: false + kubeSchedulerAlerting: false + kubeSchedulerRecording: false + kubeStateMetrics: false + network: false + node: false + nodeExporterAlerting: false + nodeExporterRecording: false + prometheus: false + prometheusOperator: false + windows: false + + ## Flag to disable all the kubernetes component scrapers + ## + kubernetesServiceMonitors: + enabled: false + + ## Setting to true produces cleaner resource names, but requires a data migration because the name of the persistent volume changes. Therefore this should only be set once on initial installation. + ## + cleanPrometheusOperatorObjectNames: true + + ## Install Prometheus Operator CRDs + ## + crds: + enabled: true + + windowsMonitoring: + ## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter') + enabled: false + + ## Using default values from https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml + ## + grafana: + enabled: false + + ## Configuration for thanosRuler + ## ref: https://thanos.io/tip/components/rule.md/ + ## + thanosRuler: + + ## Deploy thanosRuler + ## + enabled: false + + + ## Configuration for the Prometheus instance + ## + prometheus: + + ## Deploy Prometheus + ## + enabled: false + + ## Deploy node exporter as a daemonset to all nodes + ## + nodeExporter: + ## Deploy prometheus node-exporter + ## + enabled: false + + ## Component scraping kube state metrics + ## + kubeStateMetrics: + ## Deploy kube-state-metrics subchart + enabled: false + + ## Manages Prometheus and Alertmanager components + ## + prometheusOperator: + ## Deploy prometheus-operator + enabled: true + + ## Configuration for alertmanager + ## ref: https://prometheus.io/docs/alerting/alertmanager/ + ## + alertmanager: + + ## Deploy alertmanager + ## + enabled: true + + ## Alertmanager configuration directives + ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file + ## https://prometheus.io/webtools/alerting/routing-tree-editor/ + ## + config: + global: + resolve_timeout: 5m + inhibit_rules: + - source_matchers: + - 'severity = critical' + target_matchers: + - 'severity =~ warning|info' + equal: + - 'namespace' + - 'alertname' + - source_matchers: + - 'severity = warning' + target_matchers: + - 'severity = info' + equal: + - 'namespace' + - 'alertname' + - source_matchers: + - 'alertname = InfoInhibitor' + target_matchers: + - 'severity = info' + equal: + - 'namespace' + route: + group_by: + - organization + - alertname + - cluster + - region + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + continue: false + receiver: 'null' + routes: + - matchers: + - alertname =~ "Watchdog|InfoInhibitor" + receiver: "null" + continue: false + + receivers: + - name: 'null' + + templates: + - '/etc/alertmanager/config/*.tmpl' + - '/etc/alertmanager/notification-templates/*.tmpl' + + + ingress: + enabled: true + crds: + enabled: true + hosts: + - alertmanager.{{ .Release.Namespace }}.svc.cluster.local + + ## By default, a ca-bundle is deployed to enable tls between Prometheus and Alertmanager + annotations: + disco: "true" + kubernetes.io/tls-acme: "true" + nginx.ingress.kubernetes.io/auth-tls-secret: "{{ $.Release.Namespace }}/{{ $.Release.Namespace }}-ca-bundle" + nginx.ingress.kubernetes.io/auth-tls-verify-client: "true" + nginx.ingress.kubernetes.io/auth-tls-verify-depth: "3" + nginx.ingress.kubernetes.io/cors-allow-headers: Content-Type + nginx.ingress.kubernetes.io/cors-allow-methods: DELETE + nginx.ingress.kubernetes.io/enable-cors: "true" + ingressClassName: nginx + + ## Settings affecting alertmanagerSpec + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerspec + + alertmanagerSpec: + + ## Defines the strategy used by AlertmanagerConfig objects to match alerts. + ## + alertmanagerConfigMatcherStrategy: + type: OnNamespace + + ## AlertmanagerConfigs to be selected to merge and configure Alertmanager with. + ## + alertmanagerConfigSelector: + matchLabels: + plugin: "{{ $.Release.Name }}" + + ## Additional volumes on the output StatefulSet definition. + volumeMounts: + - mountPath: /etc/alertmanager/notification-templates + name: alertmanager-notification-templates + readOnly: true + + ## Additional VolumeMounts on the output StatefulSet definition. + volumes: + - configMap: + name: alertmanager-notification-templates + name: alertmanager-notification-templates + + ## Alertmanager integration with Slack and Webhooks + alertmanagerConfig: + slack: + routes: + # - name: slack-info-route + # channel: slack-info-channel + # webhookURL: https://hooks.slack.com/services/some-id + # matchers: + # - name: severity + # matchType: "=" + # value: "info" + # - name: slack-warning-route + # channel: slack-warning-channel + # webhookURL: https://hooks.slack.com/services/some-id + # matchers: + # - name: severity + # matchType: "=" + # value: "warning" + # - name: slack-critical-route + # channel: slack-critical-channel + # webhookURL: https://hooks.slack.com/services/some-id + # matchers: + # - name: severity + # matchType: "=" + # value: "critical" + + + webhook: + routes: + # - name: webhook-route + # url: https://some-webhook-url + # matchers: + # - name: alertname + # matchType: "=~" + +testFramework: + enabled: true + image: + registry: ghcr.io + repository: cloudoperators/greenhouse-extensions-integration-test + tag: main + imagePullPolicy: IfNotPresent From e365faddd2560e535b789b35e1beec22c787f40c Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 12:25:53 +0200 Subject: [PATCH 04/16] chore(alerts): cleanup test values file --- alerts/ci/test-values.yaml | 38 +------------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/alerts/ci/test-values.yaml b/alerts/ci/test-values.yaml index 30f163c0..49ea5858 100644 --- a/alerts/ci/test-values.yaml +++ b/alerts/ci/test-values.yaml @@ -169,10 +169,8 @@ alerts: ingress: enabled: true - crds: - enabled: true hosts: - - alertmanager.{{ .Release.Namespace }}.svc.cluster.local + - dummy-host ## By default, a ca-bundle is deployed to enable tls between Prometheus and Alertmanager annotations: @@ -214,40 +212,6 @@ alerts: name: alertmanager-notification-templates name: alertmanager-notification-templates - ## Alertmanager integration with Slack and Webhooks - alertmanagerConfig: - slack: - routes: - # - name: slack-info-route - # channel: slack-info-channel - # webhookURL: https://hooks.slack.com/services/some-id - # matchers: - # - name: severity - # matchType: "=" - # value: "info" - # - name: slack-warning-route - # channel: slack-warning-channel - # webhookURL: https://hooks.slack.com/services/some-id - # matchers: - # - name: severity - # matchType: "=" - # value: "warning" - # - name: slack-critical-route - # channel: slack-critical-channel - # webhookURL: https://hooks.slack.com/services/some-id - # matchers: - # - name: severity - # matchType: "=" - # value: "critical" - - - webhook: - routes: - # - name: webhook-route - # url: https://some-webhook-url - # matchers: - # - name: alertname - # matchType: "=~" testFramework: enabled: true From e34978d99d49cd9d7cabc95d31f2ef4d634d1f34 Mon Sep 17 00:00:00 2001 From: License Bot Date: Thu, 19 Sep 2024 10:28:22 +0000 Subject: [PATCH 05/16] Automatic application of license header --- alerts/ci/test-values.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/alerts/ci/test-values.yaml b/alerts/ci/test-values.yaml index 49ea5858..1f0e047d 100644 --- a/alerts/ci/test-values.yaml +++ b/alerts/ci/test-values.yaml @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors +# SPDX-License-Identifier: Apache-2.0 + global: ## common labels to apply to all resources ## From 260426e9c99e53a70cfa953ee05c32f18a998aa5 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 12:30:38 +0200 Subject: [PATCH 06/16] chore(alerts): improve test description --- alerts/charts/templates/tests/test-alerts-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml index 4cd959e7..f04fcf25 100644 --- a/alerts/charts/templates/tests/test-alerts-config.yaml +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -32,14 +32,14 @@ data: } {{- if .Values.alerts.alertmanager.ingress.enabled }} - @test "Verify that secret '{{ .Release.Namespace }}-monitoring-ca' is created" { + @test "Verify that secret '{{ .Release.Namespace }}-monitoring-ca' is created when ingress is enabled" { verify "there is 1 secret named '{{ .Release.Namespace }}-monitoring-ca'" } {{- end -}} {{- if .Values.alerts.alertmanager.ingress.enabled }} {{ if .Capabilities.APIVersions.Has "cert-manager.io/v1" }} - @test "Verify that Issuer and Certificate are created" { + @test "Verify that Issuer and Certificate are created when ingress is enabled and cert-manager crd is available" { verify "there is 1 issuer named '{{ .Release.Namespace }}-monitoring-issuer'" verify "there is 1 certificate named '{{ .Release.Namespace }}-prometheus-auth'" } From cfb1c6184150e7cf8a5a4c4cb461c4268beea328 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 12:37:04 +0200 Subject: [PATCH 07/16] chore(alerts): tweak the test --- alerts/charts/templates/tests/test-alerts-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml index f04fcf25..7cfd506c 100644 --- a/alerts/charts/templates/tests/test-alerts-config.yaml +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -26,9 +26,9 @@ data: try "at most 2 times every 5s to get pods named '{{ .Release.Name }}-operator.*' and verify that '.status.phase' is 'running'" } - @test "Verify that Alertmanager '{{ .Release.Name }}' is Available and Reconciled" { - try "at most 2 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[0].status' is 'True'" - try "at most 2 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[1].status' is 'True'" + @test "Verify that Alertmanager is Available and Reconciled" { + try "at most 5 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[0].status' is 'True'" + try "at most 5 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[1].status' is 'True'" } {{- if .Values.alerts.alertmanager.ingress.enabled }} From d40123111b87f50d35583c8cf2e9e2dd70c4eba8 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 12:41:07 +0200 Subject: [PATCH 08/16] chore(plugin) chart bump version --- alerts/charts/Chart.yaml | 2 +- alerts/plugindefinition.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/alerts/charts/Chart.yaml b/alerts/charts/Chart.yaml index 87ecce1c..9b9b5e53 100644 --- a/alerts/charts/Chart.yaml +++ b/alerts/charts/Chart.yaml @@ -8,7 +8,7 @@ maintainers: name: alerts sources: - https://github.com/cloudoperators/greenhouse-extensions -version: 0.12.4 +version: 0.12.5 keywords: - prometheus-alertmanager dependencies: diff --git a/alerts/plugindefinition.yaml b/alerts/plugindefinition.yaml index 77fe44cf..16ca38da 100644 --- a/alerts/plugindefinition.yaml +++ b/alerts/plugindefinition.yaml @@ -6,7 +6,7 @@ kind: PluginDefinition metadata: name: alerts spec: - version: 2.0.3 + version: 2.0.4 weight: 0 displayName: Alerts description: The Alerts Plugin consists of both Prometheus Alertmanager and Supernova, the holistic alert management UI @@ -15,7 +15,7 @@ spec: helmChart: name: alerts repository: oci://ghcr.io/cloudoperators/greenhouse-extensions/charts - version: 0.12.4 + version: 0.12.5 uiApplication: name: supernova version: "latest" From b2961b0bc45b28e77e90dc28cb84cd019ad33f29 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 12:58:49 +0200 Subject: [PATCH 09/16] chore(alerts): make the test names consistent --- alerts/charts/templates/tests/test-alerts-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml index 7cfd506c..e57c2049 100644 --- a/alerts/charts/templates/tests/test-alerts-config.yaml +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -26,7 +26,7 @@ data: try "at most 2 times every 5s to get pods named '{{ .Release.Name }}-operator.*' and verify that '.status.phase' is 'running'" } - @test "Verify that Alertmanager is Available and Reconciled" { + @test "Verify that alertmanager is available and reconciled" { try "at most 5 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[0].status' is 'True'" try "at most 5 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[1].status' is 'True'" } @@ -39,7 +39,7 @@ data: {{- if .Values.alerts.alertmanager.ingress.enabled }} {{ if .Capabilities.APIVersions.Has "cert-manager.io/v1" }} - @test "Verify that Issuer and Certificate are created when ingress is enabled and cert-manager crd is available" { + @test "Verify that issuer and certificate are created when ingress is enabled and cert-manager crd is available" { verify "there is 1 issuer named '{{ .Release.Namespace }}-monitoring-issuer'" verify "there is 1 certificate named '{{ .Release.Namespace }}-prometheus-auth'" } From 31d730546550e8a1e1c5ba53df8e952b8567f9c8 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 13:33:25 +0200 Subject: [PATCH 10/16] fix(alerts): modify test --- alerts/charts/templates/tests/test-alerts-config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml index e57c2049..71df21ab 100644 --- a/alerts/charts/templates/tests/test-alerts-config.yaml +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -46,6 +46,7 @@ data: {{- end }} {{- end }} + {{- if .Values.alerts.crds.enabled }} @test "Verify creation of required custom resource definitions (CRDs) for {{ .Release.Name }}" { verify "there is 1 customresourcedefinition named 'alertmanagerconfigs'" verify "there is 1 customresourcedefinition named 'alertmanagers'" @@ -58,4 +59,5 @@ data: verify "there is 1 customresourcedefinition named 'scrapeconfigs'" verify "there is 1 customresourcedefinition named 'thanosrulers'" } + {{- end }} {{- end -}} From 823c4673a47d8cdde214b3e5ef4ba7bc6150a3c3 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 13:37:02 +0200 Subject: [PATCH 11/16] chore(alerts) debug test --- alerts/ci/test-values.yaml | 212 ------------------------------------- 1 file changed, 212 deletions(-) diff --git a/alerts/ci/test-values.yaml b/alerts/ci/test-values.yaml index 1f0e047d..0cfd0145 100644 --- a/alerts/ci/test-values.yaml +++ b/alerts/ci/test-values.yaml @@ -1,225 +1,13 @@ # SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors -# SPDX-License-Identifier: Apache-2.0 -global: - ## common labels to apply to all resources - ## - commonLabels: {} -# kube-prometheus-stack configuration scoped to alerts alerts: - - ## Create default rules for monitoring the cluster - ## - defaultRules: - create: true - rules: - alertmanager: true - etcd: false - configReloaders: false - general: false - k8sContainerCpuUsageSecondsTotal: false - k8sContainerMemoryCache: false - k8sContainerMemoryRss: false - k8sContainerMemorySwap: false - k8sContainerResource: false - k8sPodOwner: false - kubeApiserverAvailability: false - kubeApiserverBurnrate: false - kubeApiserverHistogram: false - kubeApiserverSlos: false - kubeControllerManager: false - kubelet: false - kubeProxy: false - kubePrometheusGeneral: false - kubePrometheusNodeRecording: false - kubernetesApps: false - kubernetesResources: false - kubernetesStorage: false - kubernetesSystem: false - kubeSchedulerAlerting: false - kubeSchedulerRecording: false - kubeStateMetrics: false - network: false - node: false - nodeExporterAlerting: false - nodeExporterRecording: false - prometheus: false - prometheusOperator: false - windows: false - - ## Flag to disable all the kubernetes component scrapers - ## - kubernetesServiceMonitors: - enabled: false - - ## Setting to true produces cleaner resource names, but requires a data migration because the name of the persistent volume changes. Therefore this should only be set once on initial installation. - ## - cleanPrometheusOperatorObjectNames: true - - ## Install Prometheus Operator CRDs - ## - crds: - enabled: true - - windowsMonitoring: - ## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter') - enabled: false - - ## Using default values from https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml - ## - grafana: - enabled: false - - ## Configuration for thanosRuler - ## ref: https://thanos.io/tip/components/rule.md/ - ## - thanosRuler: - - ## Deploy thanosRuler - ## - enabled: false - - - ## Configuration for the Prometheus instance - ## - prometheus: - - ## Deploy Prometheus - ## - enabled: false - - ## Deploy node exporter as a daemonset to all nodes - ## - nodeExporter: - ## Deploy prometheus node-exporter - ## - enabled: false - - ## Component scraping kube state metrics - ## - kubeStateMetrics: - ## Deploy kube-state-metrics subchart - enabled: false - - ## Manages Prometheus and Alertmanager components - ## - prometheusOperator: - ## Deploy prometheus-operator - enabled: true - - ## Configuration for alertmanager - ## ref: https://prometheus.io/docs/alerting/alertmanager/ - ## alertmanager: - ## Deploy alertmanager - ## - enabled: true - - ## Alertmanager configuration directives - ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file - ## https://prometheus.io/webtools/alerting/routing-tree-editor/ - ## - config: - global: - resolve_timeout: 5m - inhibit_rules: - - source_matchers: - - 'severity = critical' - target_matchers: - - 'severity =~ warning|info' - equal: - - 'namespace' - - 'alertname' - - source_matchers: - - 'severity = warning' - target_matchers: - - 'severity = info' - equal: - - 'namespace' - - 'alertname' - - source_matchers: - - 'alertname = InfoInhibitor' - target_matchers: - - 'severity = info' - equal: - - 'namespace' - route: - group_by: - - organization - - alertname - - cluster - - region - group_wait: 30s - group_interval: 5m - repeat_interval: 12h - continue: false - receiver: 'null' - routes: - - matchers: - - alertname =~ "Watchdog|InfoInhibitor" - receiver: "null" - continue: false - - receivers: - - name: 'null' - - templates: - - '/etc/alertmanager/config/*.tmpl' - - '/etc/alertmanager/notification-templates/*.tmpl' - - ingress: enabled: true hosts: - dummy-host - ## By default, a ca-bundle is deployed to enable tls between Prometheus and Alertmanager - annotations: - disco: "true" - kubernetes.io/tls-acme: "true" - nginx.ingress.kubernetes.io/auth-tls-secret: "{{ $.Release.Namespace }}/{{ $.Release.Namespace }}-ca-bundle" - nginx.ingress.kubernetes.io/auth-tls-verify-client: "true" - nginx.ingress.kubernetes.io/auth-tls-verify-depth: "3" - nginx.ingress.kubernetes.io/cors-allow-headers: Content-Type - nginx.ingress.kubernetes.io/cors-allow-methods: DELETE - nginx.ingress.kubernetes.io/enable-cors: "true" - ingressClassName: nginx - - ## Settings affecting alertmanagerSpec - ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerspec - - alertmanagerSpec: - - ## Defines the strategy used by AlertmanagerConfig objects to match alerts. - ## - alertmanagerConfigMatcherStrategy: - type: OnNamespace - - ## AlertmanagerConfigs to be selected to merge and configure Alertmanager with. - ## - alertmanagerConfigSelector: - matchLabels: - plugin: "{{ $.Release.Name }}" - - ## Additional volumes on the output StatefulSet definition. - volumeMounts: - - mountPath: /etc/alertmanager/notification-templates - name: alertmanager-notification-templates - readOnly: true - - ## Additional VolumeMounts on the output StatefulSet definition. - volumes: - - configMap: - name: alertmanager-notification-templates - name: alertmanager-notification-templates -testFramework: - enabled: true - image: - registry: ghcr.io - repository: cloudoperators/greenhouse-extensions-integration-test - tag: main - imagePullPolicy: IfNotPresent From a73b443299b5a44a9b052105be02e7cd8a682848 Mon Sep 17 00:00:00 2001 From: License Bot Date: Thu, 19 Sep 2024 11:37:50 +0000 Subject: [PATCH 12/16] Automatic application of license header --- alerts/ci/test-values.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/alerts/ci/test-values.yaml b/alerts/ci/test-values.yaml index 0cfd0145..ac6f7a24 100644 --- a/alerts/ci/test-values.yaml +++ b/alerts/ci/test-values.yaml @@ -1,4 +1,7 @@ # SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors alerts: From 3902e834a2caf25db9eb7a07e24165ec6ca49335 Mon Sep 17 00:00:00 2001 From: IB Akshay Date: Thu, 19 Sep 2024 13:42:27 +0200 Subject: [PATCH 13/16] chore(alerts): remove unnecessary code in test-alerts-config.yaml --- alerts/charts/templates/tests/test-alerts-config.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/alerts/charts/templates/tests/test-alerts-config.yaml b/alerts/charts/templates/tests/test-alerts-config.yaml index 71df21ab..455b1680 100644 --- a/alerts/charts/templates/tests/test-alerts-config.yaml +++ b/alerts/charts/templates/tests/test-alerts-config.yaml @@ -25,7 +25,6 @@ data: verify "there is 1 service named '{{ .Release.Name }}-operator'" try "at most 2 times every 5s to get pods named '{{ .Release.Name }}-operator.*' and verify that '.status.phase' is 'running'" } - @test "Verify that alertmanager is available and reconciled" { try "at most 5 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[0].status' is 'True'" try "at most 5 times every 5s to get alertmanagers named '{{ .Release.Name }}' and verify that '.status.conditions[1].status' is 'True'" @@ -38,7 +37,7 @@ data: {{- end -}} {{- if .Values.alerts.alertmanager.ingress.enabled }} - {{ if .Capabilities.APIVersions.Has "cert-manager.io/v1" }} + {{- if .Capabilities.APIVersions.Has "cert-manager.io/v1" }} @test "Verify that issuer and certificate are created when ingress is enabled and cert-manager crd is available" { verify "there is 1 issuer named '{{ .Release.Namespace }}-monitoring-issuer'" verify "there is 1 certificate named '{{ .Release.Namespace }}-prometheus-auth'" From d3e45ad5e27dd09f1a0ccd273fe42836e6f3171e Mon Sep 17 00:00:00 2001 From: Akshay Iyyadurai Balasundaram Date: Thu, 19 Sep 2024 13:52:56 +0200 Subject: [PATCH 14/16] Update alerts/ci/test-values.yaml Co-authored-by: Richard Tief <56597015+richardtief@users.noreply.github.com> --- alerts/ci/test-values.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/alerts/ci/test-values.yaml b/alerts/ci/test-values.yaml index ac6f7a24..93171684 100644 --- a/alerts/ci/test-values.yaml +++ b/alerts/ci/test-values.yaml @@ -1,16 +1,8 @@ # SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors # SPDX-License-Identifier: Apache-2.0 - -# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors - - alerts: alertmanager: - ingress: enabled: true hosts: - dummy-host - - - From 0a18c8fd925678f69358e31473dc2cea7920e45c Mon Sep 17 00:00:00 2001 From: Akshay Iyyadurai Balasundaram Date: Thu, 19 Sep 2024 14:10:57 +0200 Subject: [PATCH 15/16] Update alerts/charts/Chart.yaml Co-authored-by: Richard Tief <56597015+richardtief@users.noreply.github.com> --- alerts/charts/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alerts/charts/Chart.yaml b/alerts/charts/Chart.yaml index 9b9b5e53..6280cb63 100644 --- a/alerts/charts/Chart.yaml +++ b/alerts/charts/Chart.yaml @@ -8,7 +8,7 @@ maintainers: name: alerts sources: - https://github.com/cloudoperators/greenhouse-extensions -version: 0.12.5 +version: 0.13.0 keywords: - prometheus-alertmanager dependencies: From af9d4814d1228c83af3f6f81b69649b23ed79255 Mon Sep 17 00:00:00 2001 From: Akshay Iyyadurai Balasundaram Date: Thu, 19 Sep 2024 14:11:02 +0200 Subject: [PATCH 16/16] Update alerts/plugindefinition.yaml Co-authored-by: Richard Tief <56597015+richardtief@users.noreply.github.com> --- alerts/plugindefinition.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alerts/plugindefinition.yaml b/alerts/plugindefinition.yaml index 16ca38da..1d58038d 100644 --- a/alerts/plugindefinition.yaml +++ b/alerts/plugindefinition.yaml @@ -6,7 +6,7 @@ kind: PluginDefinition metadata: name: alerts spec: - version: 2.0.4 + version: 2.1.0 weight: 0 displayName: Alerts description: The Alerts Plugin consists of both Prometheus Alertmanager and Supernova, the holistic alert management UI