Skip to content

🌱 Add prometheus to e2e workflow #1928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,24 @@ image-registry: ## Build the testdata catalog used for e2e tests and push it to
test-e2e: KIND_CLUSTER_NAME := operator-controller-e2e
test-e2e: KUSTOMIZE_BUILD_DIR := config/overlays/e2e
test-e2e: GO_BUILD_EXTRA_FLAGS := -cover
test-e2e: run image-registry e2e e2e-coverage kind-clean #HELP Run e2e test suite on local kind cluster
test-e2e: run image-registry prometheus e2e e2e-metrics e2e-coverage kind-clean #HELP Run e2e test suite on local kind cluster

.PHONY: prometheus
prometheus: PROMETHEUS_NAMESPACE := olmv1-system
prometheus: PROMETHEUS_VERSION := v0.83.0
prometheus: #HELP Deploy Prometheus into specified namespace
./hack/test/setup-monitoring.sh $(PROMETHEUS_NAMESPACE) $(PROMETHEUS_VERSION) $(KUSTOMIZE)

# The metrics.out file contains raw json data of the metrics collected during a test run.
# In an upcoming PR, this query will be replaced with one that checks for alerts from
# prometheus. Prometheus will gather metrics we currently query for over the test run,
# and provide alerts from the metrics based on the rules that we set.
.PHONY: e2e-metrics
e2e-metrics: #HELP Request metrics from prometheus; place in ARTIFACT_PATH if set
curl -X POST \
-H "Content-Type: application/x-www-form-urlencoded" \
--data 'query={pod=~"operator-controller-controller-manager-.*|catalogd-controller-manager-.*"}' \
http://localhost:30900/api/v1/query > $(if $(ARTIFACT_PATH),$(ARTIFACT_PATH),.)/metrics.out

.PHONY: extension-developer-e2e
extension-developer-e2e: KUSTOMIZE_BUILD_DIR := config/overlays/cert-manager
Expand Down
222 changes: 222 additions & 0 deletions hack/test/setup-monitoring.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
#!/bin/bash

set -euo pipefail

help="setup-monitoring.sh is used to set up prometheus monitoring for e2e testing.
Usage:
setup-monitoring.sh [PROMETHEUS_NAMESPACE] [PROMETHEUS_VERSION] [KUSTOMIZE]
"

if [[ "$#" -ne 3 ]]; then
echo "Illegal number of arguments passed"
echo "${help}"
exit 1
fi

NAMESPACE=$1
PROMETHEUS_VERSION=$2
KUSTOMIZE=$3

TMPDIR=$(mktemp -d)
trap 'echo "Cleaning up ${TMPDIR}"; rm -rf "${TMPDIR}"' EXIT
curl -s "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/refs/tags/${PROMETHEUS_VERSION}/kustomization.yaml" > "${TMPDIR}/kustomization.yaml"
curl -s "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/refs/tags/${PROMETHEUS_VERSION}/bundle.yaml" > "${TMPDIR}/bundle.yaml"
(cd ${TMPDIR} && ${KUSTOMIZE} edit set namespace ${NAMESPACE}) && kubectl create -k "${TMPDIR}"
kubectl wait --for=condition=Ready pods -n ${NAMESPACE} -l app.kubernetes.io/name=prometheus-operator

kubectl apply -f - << EOF
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: ${NAMESPACE}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- apiGroups:
- discovery.k8s.io
resources:
- endpointslices
verbs: ["get", "list", "watch"]
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: ${NAMESPACE}
EOF

kubectl apply -f - << EOF
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: prometheus
namespace: ${NAMESPACE}
spec:
logLevel: debug
serviceAccountName: prometheus
scrapeTimeout: 30s
scrapeInterval: 1m
securityContext:
runAsNonRoot: true
runAsUser: 65534
seccompProfile:
type: RuntimeDefault
serviceMonitorSelector: {}
EOF

kubectl apply -f - << EOF
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: prometheus
namespace: ${NAMESPACE}
spec:
podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
policyTypes:
- Egress
- Ingress
egress:
- {} # Allows all egress traffic for metrics requests
ingress:
- {} # Allows us to query prometheus
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that we need this NP to allow us to get data from Prometheus integrate it with Grafana and etc. So, I am OK with 👍

EOF

# Give the operator time to create the pod
kubectl wait --for=create pods -n ${NAMESPACE} prometheus-prometheus-0 --timeout=60s
kubectl wait --for=condition=Ready pods -n ${NAMESPACE} prometheus-prometheus-0 --timeout=120s

# Authentication token for the scrape requests
kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
type: kubernetes.io/service-account-token
metadata:
name: prometheus-metrics-token
namespace: ${NAMESPACE}
annotations:
kubernetes.io/service-account.name: prometheus
EOF

# ServiceMonitors for operator-controller and catalogd
kubectl apply -f - <<EOF
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: operator-controller-controller-manager-metrics-monitor
namespace: ${NAMESPACE}
spec:
endpoints:
- path: /metrics
port: https
scheme: https
authorization:
credentials:
name: prometheus-metrics-token
key: token
tlsConfig:
serverName: operator-controller-service.${NAMESPACE}.svc
insecureSkipVerify: false
ca:
secret:
name: olmv1-cert
key: ca.crt
cert:
secret:
name: olmv1-cert
key: tls.crt
keySecret:
name: olmv1-cert
key: tls.key
selector:
matchLabels:
control-plane: operator-controller-controller-manager
EOF

CATD_SECRET=$(kubectl get secret -n ${NAMESPACE} -o jsonpath="{.items[*].metadata.name}" | tr ' ' '\n' | grep '^catalogd-service-cert')

kubectl apply -f - <<EOF
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: catalogd-controller-manager-metrics-monitor
namespace: ${NAMESPACE}
spec:
endpoints:
- path: /metrics
port: metrics
scheme: https
authorization:
credentials:
name: prometheus-metrics-token
key: token
tlsConfig:
serverName: catalogd-service.${NAMESPACE}.svc
insecureSkipVerify: false
ca:
secret:
name: ${CATD_SECRET}
key: ca.crt
cert:
secret:
name: ${CATD_SECRET}
key: tls.crt
keySecret:
name: ${CATD_SECRET}
key: tls.key
selector:
matchLabels:
app.kubernetes.io/name: catalogd
EOF

# NodePort service to allow querying prometheus from outside the cluster
# NOTE: This NodePort must also be configured in kind-config.yaml
kubectl apply -f - <<EOF
apiVersion: v1
kind: Service
metadata:
name: prometheus-service
namespace: ${NAMESPACE}
spec:
type: NodePort
ports:
- name: web
nodePort: 30900
port: 9090
protocol: TCP
targetPort: web
selector:
prometheus: prometheus
EOF
5 changes: 5 additions & 0 deletions kind-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ nodes:
hostPort: 30000
listenAddress: "127.0.0.1"
protocol: tcp
# prometheus metrics service's NodePort
- containerPort: 30900
hostPort: 30900
listenAddress: "127.0.0.1"
protocol: tcp
kubeadmConfigPatches:
- |
kind: ClusterConfiguration
Expand Down